MixSIAR,使用贝叶斯模型,依据指纹因子数据,估计物源对混合物的贡献比例,尤其是食物网中的食物来源问题。目前,MixSIAR在cran和github上的版本均为v3.1.12(2020-10-20发布),但是github上的,多了5个函数(函数及作用见案例5)。
本教程共包括若干水土保持学案例,以及11个官方案例(其中前6个来自github的tutorials,后5个补充自官方手册,但案例顺序略有改变)。
涉及资源包括MixSIAR包以及JAGS软件:
必要数据包括三个,混合物、物源、辨别因子
对于混合物:
load_mix_data
,共6个参数:
filename
:char(文件路径和文件名),指定混合物文件iso_names
:char(列名),指定指纹因子factors
:NULL(无)/char(列名),指定协变量,并保留输入顺序作为后续变量名识别用fac_random
:NULL(无随机、无固定)/T(随机)/F(固定),认为变量是对总体的随机影响,则用随机变量;如果更关心特定部分小总体,则应该选择固定效应fac_nested
:NULL(无嵌套)/T(嵌套于其他因子)/F(未嵌套于其他因子)cont_effects
:NULL(无)/char(列名),连续效应变量对于物源:
raw
和means
两类:
raw
:第一列名必须为”Sources”(注意结尾有s),变量名与mix中相同means
:第一列名必须为”Sources”,则全部指纹因子列名必须有前缀Mean和SD(如geese中的Meand15N、SDd15N),且必须有一列名为”n”标识出样本数量。load_source_data
,共5个参数(不需再定义指纹因子):
filename
:char(文件路径和文件名),指定物源文件source_factors
:NULL(无)/char(列名),效应因子,可以为mix和sources数据中共有的固定、随机、连续效应conc_dep
:T(有)/F(无),浓度依赖,数据如有,列名规则固定,只与指纹因子有关,因此这里只为T或Fdata_type
:means
(均值型)/raw
(原始型)mix
:list,已读取的混合物数据对于辨别因子数据
绘图:
plot_prior
函数的alpha.piror,向量形式下应与源地数量相同,否则将重定向为1plot_continuous_var
函数绘制,包括连续变量的自身、最大值、中位数、最小值的密度函数图output_JAGS
中的heidel
设置为默认的FALSE,若为TRUE运行时有可能报错模型:
sum(alpha)==length(alpha)
,mean(alpha)==1
,可以通过乘以长度、除以和的方式实现output_JAGS
结束后,不能顺利生成png文件(文件大小为0kb,需要重启rstudio),多数情况下不能顺利生成model_result_5_diagnostics.pdf。graphics.off()
关闭,则不用重启也能完成绘图run_model
后保存模型,再做后续分析时,通常结果会报错!结果:
output_diagnostics
,对应output_JAGS的result_5output_pairs
, 对应output_JAGS的result_3output_posteriors
, 对应output_JAGS的result_2output_stats
, 对应output_JAGS的result_1output_xy
, 对应output_JAGS的result_4其他
英文含义:
MCMC设置:
raw2means <- function(method = "clipboard", rawdata=NULL, fullfilename=NULL){
# 定义一个转换函数,将raw格式的sources,转换为means格式
# 四种方式clipboard/data/filechoose/filename:
# 1. method="clipboard"为默认,从剪切板读取,结果写入剪切板
# 2. method="data"为输入数据,结果输出为数据
# 3. method="filechoose"为从路径选择raw文件,将means结果写入同路径下的csv文件
# 4. method="filename"为输入raw的路径+文件名,将means结果写入同路径下的csv文件
# 注意输入的raw格式,要求第一列为"sources",其余列均为指纹因子
# library(data.table)
# library(magrittr)
# 定义一个专职转换的函数
r2m <- function(data.raw){
fin.name <- names(data.raw)[-1]
m.name <- paste0("Mean", fin.name)
s.name <- paste0("SD", fin.name)
n.name <- "n"
m <- data.raw[, lapply(.SD, mean), by="sources"] %>% set_colnames(c("sources", m.name))
s <- data.raw[, lapply(.SD, sd), by="sources"] %>% .[,-1] %>% set_colnames(s.name)
n <- data.raw[, lapply(.SD, length), by="sources"] %>% .[,2] %>% set_colnames(n.name)
data.means <- cbind(m,s,n)
return(data.means)
}
# 分情况
if(method == "clipboard"){
# 如果是剪切板,则读取、转换、输出到剪切板
data.raw <- read.table("clipboard", header = T) %>% setDT()
data.means <- r2m(data.raw)
write.table(data.means, "clipboard", row.names = F, sep = "\t")
message("Done!")
return(invisible())
} else if(method == "data"){
# 如果是数据输入,则转换、输出到R对象
if(!is.null(rawdata)) stop("Data does not exist!")
data.means <- r2m(data.raw)
return(data.means)
} else if(method == "filechoose"){
# 如果是文件选择,先记录下文件名
file.raw.name <- file.choose() %>% gsub("\\\\", "/", .)
} else if(method == "filename"){
# 如果是文件名,先记录下文件名
if(!file.exists(fullfilename)) stop("File does not exist!")
file.raw.name <- fullfilename %>% gsub("\\\\", "/", .)
} else{
# 其他情况,停止
stop("Method error! (Try clipboard/data/filechoose/filename)")
}
# 先进行数据转换
data.raw <- fread(file.raw.name)
data.means <- r2m(data.raw)
# 根据raw的文件名,生成储存means所需的文件名
file.raw.dir <- dirname(file.raw.name)
t.file.means.name <- gsub(paste0(file.raw.dir, "/"), "", file.raw.name)
# 生成menas的文件名,如果raw文件名有区分,则直接替换为means,若没有,则追加means
if(grepl("raw", t.file.means.name)){
file.means.name <- gsub("raw", "means", t.file.means.name)
} else{
file.means.name <- gsub(".csv", "_means.csv", t.file.means.name)
}
# 保存means
fwrite(data.means, paste0(file.raw.dir, "/", file.means.name))
message("Your means-type file (the path same with raw one): ")
message(paste0(" ", file.means.name))
return(invisible())
}
load_discr_zero <- function(mix, source){
# 读取指纹因子均值、指纹因子标准差、物源的名和数量
s.m.name <- mix$MU_names
s.m.number <- length(s.m.name)
s.sig.name <- mix$SIG_names
s.sig.number <- length(s.sig.name)
source.name <- source$source_names
source.number <- length(source.name)
# 生成均值0矩阵
s.m <- matrix(data=0, nrow = source.number, ncol = s.m.number)
colnames(s.m) <- s.m.name
rownames(s.m) <- source.name
# 生成标准差0矩阵
s.sig <- matrix(data=0, nrow = source.number, ncol = s.sig.number)
colnames(s.sig) <- s.sig.name
rownames(s.sig) <- source.name
# 输出结果
res <- list(mu = s.m, sig2 = s.sig)
return(res)
}
# 定义一个先验信息超参数正规化函数
standard.alpha <- function(a, zero = 0.01){
s.a <- a*length(a)/sum(a)
s.a[which(s.a == 0)] <- zero
return(s.a)
}
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 1
# 案例名称
enam <- "burnedarea"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIAR_WSexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.创建所需数据和模型的列表
mod.n <- 3
mix <- vector("list", mod.n)
source <- vector("list", mod.n)
discr <- vector("list", mod.n)
alpha <- vector("list", mod.n)
jags.mod <- vector("list", mod.n)
# 3.设置先验信息
# 定义一个先验信息超参数正规化函数
standard.alpha <- function(a, zero = 0.01){
s.a <- a*length(a)/sum(a)
s.a[which(s.a == 0)] <- zero
return(s.a)
}
# 指定每个模型的先验信息超参数
alpha[[1]] <- alpha[[2]] <- alpha[[3]] <- 1
# alpha[[1]] <- standard.alpha(c(0.3333, 0.3333, 0.3333))
# alpha[[2]] <- standard.alpha(c(0.4, 0.2, 0.4))
# alpha[[3]] <- standard.alpha(c(0.6, 0.2, 0.2))
# alpha[[1]] <- standard.alpha(c(0.45, 0.1, 0.45))
# alpha[[2]] <- standard.alpha(c(0.1, 0.8, 0.1))
# alpha[[3]] <- standard.alpha(c(0.1, 0.45, 0.45))
# 4.构建并运行模型
startTime <- Sys.time()
for(mod in 1:mod.n){
# 4.1加载混合物数据
mix[[mod]] <- load_mix_data(filename = paste0(edir, "/", enam, "-mixsiar_mix_", mod, ".csv"),
iso_names = c("cs137","pb210","xlf"),
factors = NULL,
fac_random = NULL,
fac_nested = NULL,
cont_effects = NULL)
# 4.2加载物源数据
source[[mod]] <- load_source_data(filename = paste0(edir, "/", enam, "-mixsiar_source_means.csv"),
source_factors = NULL,
conc_dep = F,
data_type = "means",
mix = mix[[mod]])
# 4.3加载鉴别因子数据
# 水保案例,鉴别因子都为0
discr[[mod]] <- load_discr_data(filename = paste0(edir, "/", enam, "-mixsiar_discrimination.csv"),
mix = mix[[mod]])
# 4.4新建文件夹以存储文件
mainDir <- getwd()
subDir <- paste0("model_real_inf_", mod)
# subDir <- paste0("model_wrong_inf_", mod)
dir.create(file.path(mainDir, subDir), showWarnings = FALSE)
setwd(file.path(mainDir, subDir))
tDir <- getwd()
# 4.5绘制数据图
plot_data(filename = paste0("model_", mod, "-plot_data_isospace"),
plot_save_pdf = F,
plot_save_png = T,
mix = mix[[mod]],
source = source[[mod]],
discr = discr[[mod]])
graphics.off()
# 4.6绘制先验分布(因为所有模型源都一样,所以没必要都画,这里只在运行前画了一个)
plot_prior(alpha.prior = alpha[[mod]],
source = source[[mod]],
plot_save_pdf = F,
plot_save_png = T,
filename = paste0("model_", mod, "-plot_source_prior"))
graphics.off()
# 4.7建立模型(有无先验信息并不影响)
model_filename <- paste0(tDir, "/model_",mod,"-MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = T, #有过程误差
mix = mix[[mod]],
source = source[[mod]])
# 4.8运行模型
jags.mod[[mod]] <- run_model(run = "long",
mix = mix[[mod]],
source = source[[mod]],
discr = discr[[mod]],
model_filename = model_filename,
alpha.prior = alpha[[mod]],
resid_err = T,
process_err = T)
# 4.9结果分析
output_options <- list(summary_save = TRUE,
summary_name = paste0("model_",mod,"-model_result_1_summary_statistics"),
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = paste0("model_",mod,"-model_result_2_posterior_density"),
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = paste0("model_",mod,"-model_result_3_pairs_plot"),
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = paste0("model_",mod,"-model_result_4_xy_plot"),
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = paste0("model_",mod,"-model_result_5_diagnostics"),
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.1 = jags.mod[[mod]],
mix = mix[[mod]],
source = source[[mod]],
output_options = output_options)
graphics.off()
setwd(mainDir)
}
endTime <- Sys.time()
endTime - startTime #Time difference of 9.628532 mins
官方文档中,对几个案例的总结
案例说明(rmarkdown列表前需空一行,列表每一行前无需空格或制表符)
# 加载所需包
library(MixSIAR)
# 案例序号
enum <- 1
# 案例名称
enam <- "wolves"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径,rmarkdown在代码块中的定义,执行过该代码块后,路径将被重置,因此不建议在代码块中使用
setwd(edir)
此案例中,捕食者为狼群,混合物数据生成函数的参数含义如下:
filename = mixname
字符,文件名字,即csv格式的数据文件全名(路径+文件名)iso_names = c("d13C", "d15N")
字符串,同位素(指纹因子)的名称,此案例有两个变量,分别为δ13C和δ15Nfactors = c("Region","Pack")
字符,混合物数据中的协变量,本案例为区域和狼群(Region和Pack)fac_random = c(TRUE,TRUE)
布尔,协变量的类型,是否为随机(否则为固定),此处全为随机fac_nested = c(FALSE,TRUE)
布尔,层级关系,此处含义为Pack隶属于Regioncont_effects = NULL
字符,连续效应的名称,此处为无,为空本例中,狼群Pack和区域Region有层级隶属关系,Pack隶属于Region,意思是某个狼群完全属于某个区域、狼群中的每个个体也同样隶属于这个区域。汇数据用图示可表示为:
# 读取狼群数据文件名
mixname <- paste0(edir, "/", enam, "_consumer.csv")
# 生成狼群数据
mix <- load_mix_data(filename = mixname,
iso_names = c("d13C", "d15N"),
factors = c("Region","Pack"),
fac_random = c(TRUE,TRUE),
fac_nested = c(FALSE,TRUE),
cont_effects = NULL)
本案例中,源为3个区域内,三种动物的同位素平均值、方差数据,物源数据生成函数的参数含义如下:
filename = souname
字符,文件名字,即csv格式的数据文件全名(路径+文件名)source_factors = "Region"
字符串,物源数据中的协变量,此处为Regionconc_dep = FALSE
布尔,是否有浓度依赖性数据data_type = "means"
字符,可选means
或raw
,均值方差型数据或原始个体数据mix = mix
列表,load_mix_data
生成的混合物数据注意如果使用均值方差数据,原数据中应有一列名为”n”,记录原始样本个数
# 读取物源数据文件名
souname <- paste0(edir, "/", enam, "_sources.csv")
# 生成物源数据
source <- load_source_data(filename = souname,
source_factors = "Region",
conc_dep = FALSE,
data_type = "means",
mix = mix)
读取营养鉴别因子(TDF)数据,TDF是指消费者在进食某一食物后,机体组织中的生物示踪剂值被修改(富集/耗尽)的量。如果示踪剂是保守的,则将TDF设置为0(例如,必需脂肪酸、脂肪酸分布数据、元素浓度)。
disname <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = disname, mix = mix)
对数据进行绘图,以检验:
需要注意的是:
plot_data
主要根据指纹因子数量,在plot_data_one_iso
和plot_data_two_iso
之间选择绘图函数plot_data
将对所有指纹因子两两结合绘制这个图plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr,
return_obj = T)
绘图结果(上述代码在rmarkdown中并不输出图,因此用html展示):
当且仅当有两个指纹因子时,计算归一化的多边形凸包围表面积
注意:
# 计算凸包面积,由源方差标准化
calc_area(source = source, mix = mix, discr = discr)
#>0.8750158 13.9726701 13.6440547
定义一个先验函数的参数(狄利克雷函数的超参数),然后绘制图形
在这个图中,数据的先验图在左侧,是红色的;无先验/广义先验(alpha = 1)在右侧,是深灰色的。
# 默认参数,即一个不提供先验信息的、广义先验参数,alpha=1
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
绘图结果(上述代码在rmarkdown中并不输出图,因此用html展示):
JAGS模型有三种误差形式,即
resid_err = TRUE, process_err = TRUE
残差乘过程误差resid_err = TRUE, process_err = FALSE
残差resid_err = FALSE, process_err = TRUE
过程误差注意:如果模型混合物只有1个数据(或每个协变量级别(例如本例中的区域和狼群)只有1个数据),则模型没有关于混合或消费者可变性的信息。
# 生成一个本地txt文档,记录模型信息
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = TRUE,
process_err = TRUE,
mix = mix,
source = source)
选择运行参数(马尔可夫链蒙特卡罗),通过自定义,或调用几种典型参数
# 可选1,自定义运行参数,DIC是对预期预测误差的估计
run <- list(chainLength = 200000, burn = 150000, thin = 50, chains = 3, calcDIC = TRUE)
# 可选2,选择运行参数模板,先用test测试模型数据是否正确,再用normal寻找可能收敛的解
# 此时run可选test/very short/short/normal/long/very long/extreme等字符形式
jags.test <- run_model(run="test", mix, source, discr, model_filename)
# 运行normal将耗时半小时
system.time({
jags.normal <- run_model(run="normal", mix, source, discr, model_filename)
})
定义输出项目,再分析模型计算结果
对于函数输入,需要注意:
对于模型的结果评价,注意以下几点:
gelman
默认,Gelman-Rubin诊断,value < 1.1
可认为模型收敛geweke
默认,Geweke诊断,是一个标准化的双侧z检验,比较链的第一部分的平均值与第二部分的平均值。在收敛时,这些平均值应该是相同的,大的绝对z分数表示拒绝。heidel
Heidelberger-Welch诊断,结果中直接给出哪些是失败的。# 读取并分析运行结果
# load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = FALSE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
# 这步,应该有个model_result_5_diagnostics.pdf,但是后几次都没有输出,不知道为什么
output_JAGS(jags.1 = jags.normal,
mix = mix,
source = source,
output_options = output_options)
模型结果中的关键指标:
# 读取模型
source$source_names #模型的源,物源地,按顺序为Deer、Marine Mammals、Salmon
mix$iso_names #模型的汇,指纹因子(iso),按顺序为"d13C" "d15N"
mix$factors #模型的汇,协变量因子(factor),按顺序为Region、Pack
jags.normal$BUGSoutput$mean$p.global #模型求得的全局最终平均贡献比(顺序为物源顺序)
jags.normal$BUGSoutput$sd$p.global #同上,标准差
jags.normal$BUGSoutput$median$p.global #同上,中位数(相加不是1)
jags.normal$BUGSoutput$mean$p.fac1 #因子1内的平均贡献比,即第i行是Region i内source1/2/3的
jags.normal$BUGSoutput$mean$p.fac2 #因子2内的平均贡献比,即第i行是Pack i内source1/2/3的
jags.normal$BUGSoutput$sd$p.fac1 #含义规则同上
jags.normal$BUGSoutput$sd$p.fac2
jags.normal$BUGSoutput$median$p.fac1
jags.normal$BUGSoutput$median$p.fac2
# 如果需要重新绘图,代码可参考output_JAGS,密度数据在jags.1$BUGSoutput$sims.list,全局的例子如下
library(ggplot2)
n.draws <- nrow(jags.1$BUGSoutput$sims.list$p.global)
df <- data.table()
df$sources <- rep(source$source_names, each = n.draws) #df中x和sources的顺序只能这样,否则会出错
df$x <- matrix(jags.1$BUGSoutput$sims.list$p.global, ncol = 1)
ggplot(df, aes(x=x, fill=sources, colour=sources)) +
geom_density(alpha=.3, aes(y=..scaled..)) +
theme_bw() +
xlab("Proportion of Diet") +
ylab("Scaled Posterior Density") +
xlim(0,1) +
labs(title = my.title) +
theme(legend.position=c(1,1),
legend.justification=c(1,1),
legend.title=element_blank())
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 2
# 案例名称
enam <- "geese"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names = c("d13C","d15N"),
factors = "Group",
fac_random = FALSE, #是为随机,否为固定
fac_nested = FALSE, #无嵌套关系
cont_effects = NULL) #无连续效应
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #来源并无协变量因子Group
conc_dep = TRUE, #存在浓度依赖变量
data_type = "means", #数据类型为均值型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr) #绘图结果文件名后的"1_2"表示指纹因子1&2之间的数据图
# 6.计算凸包面积
calc_area(source = source, mix = mix, discr = discr)
#> 20.27097
# 7.绘制先验分布
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
# 8.建立模型
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = F, #无过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.normal <- run_model(run="normal",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = F)
})
# 10.模型结果
# load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = TRUE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.normal, mix, source, output_options)
# 指标含义
source$source_names #模型的源,物源地,"Enteromorpha" "Grass" "U.lactuca" "Zostera"
mix$iso_names #模型的汇,指纹因子,"d13C" "d15N"
mix$factors #模型的汇,协变量因子,"Group"
jags.normal$BUGSoutput$mean$p.global #注意,固定效应中,分组是强制的,这个总的已经没有意义了
jags.normal$BUGSoutput$sd$p.global #同上,
jags.normal$BUGSoutput$median$p.global #同上,
jags.normal$BUGSoutput$mean$p.fac1 #因子1内的平均贡献比,即第i行是Group i内source1/2/3/4的
jags.normal$BUGSoutput$sd$p.fac1 #含义规则同上
jags.normal$BUGSoutput$median$p.fac1
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 3
# 案例名称
enam <- "lake"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names = c("d13C","d15N"),
factors = NULL, #无协变量
fac_random = NULL, #无随机和固定
fac_nested = NULL, #无嵌套关系
cont_effects = "Secchi.Mixed")#有连续效应,注意用fread读取,会将变量名中的点号变为冒号
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #来源并无协变量因子
conc_dep = F, #不存在浓度依赖变量
data_type = "raw", #数据类型为原始型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
# 绘图结果文件名后的"1_2"表示指纹因子1&2之间的数据图
# 黑白图,因为没有factor了
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr)
# 6.计算凸包面积
calc_area(source = source, mix = mix, discr = discr)
#> 3.053018
# 7.绘制先验分布
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
# 8.建立模型
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = F, #无过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.test <- run_model(run="test",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = F)
})
# 10.模型结果
# load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = TRUE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.test, mix, source, output_options)
# 指标含义
source$source_names #模型的源,物源地,"Subsurface POM" "Surface POM" "Terrestrial Detritus"
mix$iso_names #模型的汇,指纹因子,"d13C" "d15N"
mix$factors #模型的汇,协变量因子,NULL
jags.normal$BUGSoutput$mean$p.global #总体贡献比
jags.normal$BUGSoutput$sd$p.global #同上,
jags.normal$BUGSoutput$median$p.global #同上,
# 本例中的连续效应,由`plot_continuous_var`函数绘制图像,包括连续变量的自身、最大值、中位数、最小值的密度函数图
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 4
# 案例名称
enam <- "killerwhale"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names = c("d13C","d15N"),
factors = NULL, #无协变量
fac_random = NULL, #无随机和固定
fac_nested = NULL, #无嵌套关系
cont_effects = NULL) #无连续因子
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #来源并无协变量因子
conc_dep = F, #不存在浓度依赖变量
data_type = "means", #数据类型为均值型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
# 绘图结果文件名后的"1_2"表示指纹因子1&2之间的数据图
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr)
# 6.计算凸包面积
calc_area(source = source, mix = mix, discr = discr)
#> 12.77484
# 7.绘制先验分布
# 7.1 无先验信息
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior1_uninf")
# 7.2 有先验信息
# 来自文献的排泄物样品
kw.alpha <- c(10,1,0,0,3)
# 生成alpha超参数,并用标准化
kw.alpha <- kw.alpha*length(kw.alpha)/sum(kw.alpha)
# 狄利克雷分布的alpha超参数不能为0,可将其设置为0.01
kw.alpha[which(kw.alpha==0)] <- 0.01
# 再次绘制先验分布图
plot_prior(alpha.prior = kw.alpha,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior2_inf")
# 8.建立模型(有无先验信息并不影响)
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = T, #有过程误差
mix = mix,
source = source)
# 9.运行模型
# 9.1无先验信息模型
system.time({
jags.normal.uninf <- run_model(run="normal",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = T)
})
# 9.2有先验信息模型
system.time({
jags.normal.inf <- run_model(run="normal",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = kw.alpha,
resid_err = T,
process_err = T)
})
# 10.模型结果
# load(paste0(edir, "/model_result.RData"))
# 10.1无先验信息结果
output_options <- list(summary_save = TRUE,
summary_name = "model1_uninf_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model1_uninf_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model1_uninf_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model1_uninf_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model1_uninf_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.normal.uninf, mix, source, output_options)
# 10.2有先验信息结果
output_options <- list(summary_save = TRUE,
summary_name = "model2_inf_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model2_inf_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model2_inf_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model2_inf_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model2_inf_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.normal.inf, mix, source, output_options)
结果对比:
无先验信息:
DIC = -4.393189
Mean SD
Epsilon.1 1.121 0.812
Epsilon.2 2.634 2.025
p.global.Chinook 0.368 0.104
p.global.Chum 0.108 0.092
p.global.Coho 0.175 0.131
p.global.Sockeye 0.071 0.059
p.global.Steelhead 0.278 0.092
有先验信息:
DIC = -13.66512
Mean SD
Epsilon.1 0.650 0.433
Epsilon.2 1.441 0.917
p.global.Chinook 0.501 0.029
p.global.Chum 0.073 0.082
p.global.Coho 0.000 0.000
p.global.Sockeye 0.000 0.002
p.global.Steelhead 0.427 0.062
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 5
# 案例名称
enam <- "isopod"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names =c("c16.4w3","c18.2w6","c18.3w3",
"c18.4w3","c20.4w6","c20.5w3",
"c22.5w3","c22.6w3"),
factors = "Site", #有协变量
fac_random = TRUE, #随机
fac_nested = FALSE, #无嵌套关系
cont_effects = NULL) #无连续因子
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #来源并无协变量因子
conc_dep = F, #不存在浓度依赖变量
data_type = "means", #数据类型为均值型
mix = mix)
# 4.加载鉴别因子数据
# 数据来自饲养实验,the sources are actually consumers fed exclusively each of the sources. 因此鉴别数据全是0
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
# 绘图结果文件名后的"1_2"表示指纹因子1&2之间的数据图
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr)
# 6.计算凸包面积
# 算不了,只有两个指纹因子时,才能算,这里都有8个了
calc_area(source = source, mix = mix, discr = discr)
# 7.绘制先验分布
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
# 8.建立模型(有无先验信息并不影响)
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = F, #无过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.normal <- run_model(run="normal",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = F)
})
# 10.模型结果
# load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = TRUE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.normal, mix, source, output_options)
# 11.github额外模型输出函数
# 读取额外的5个函数,注意文件夹内需仅有这5个函数的文件
fdir <- "D:/#R/learn_Bayesian/MixSIAR_github_functions"
funs <- list.files(fdir, full.names = T)
base::source(funs[1]) #output_diagnostics
base::source(funs[2]) #output_pairs
base::source(funs[3]) #output_posteriors
base::source(funs[4]) #output_stats
base::source(funs[5]) #output_xy
# 重新定义一个options,以区别上面的输出文件
# 注意多了一句return_obj = TRUE,用于保持对象,以用来分析和绘图
# 结果与与上面一样,就不保存文件了
output_options2 <- output_options <- list(summary_save = TRUE,
summary_name = "ops2-model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "ops2-model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "ops2-model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "ops2-model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = FALSE,
diag_name = "ops2-model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = FALSE,
plot_pairs_save_png = FALSE,
plot_xy_save_png = FALSE,
diag_save_ggmcmc = FALSE,
return_obj = TRUE) #注意多了一句return_obj = TRUE
# github函数1,输出模型结果诊断(result_5)
diag <- output_diagnostics(jags.1 = jags.normal, mix = mix, source = source, output_options = output_options2)
# github函数2,输出总体贡献比例分布图(result_3),此图无对象输出,及pair为NULL
pair <- output_pairs(jags.1 = jags.normal, mix = mix, source = source, output_options = output_options2)
# github函数3,输出贡献密度图(result_2,分为总体、按因子、方差等)
post <- output_posteriors(jags.1 = jags.normal, mix = mix, source = source, output_options = output_options2)
graphics.off() #注意这个函数也会打开图像设备,并且不会自己关
post$global+ #本质上是ggplot2对象,可以进行修改,也可以读取其中的数据,自己画图
ggplot2::scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9")) +
ggplot2::scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))
post$global+ #本质上是ggplot2对象,可以进行修改,也可以读取其中的数据,自己画图
ggplot2::theme(legend.position="right") #图例经常遮挡图像,这个位置设置可以很好的修改
# github函数4,输出模型的统计结果(result_1)
stat <- output_stats(jags.1 = jags.normal, mix = mix, source = source, output_options = output_options2)
# github函数5,各来源的贡献、标准差(如果有)的线形图(result_4),此图无对象输出,及pair为NULL
xyli <- output_xy(jags.1 = jags.normal, mix = mix, source = source, output_options = output_options2)
graphics.off() #注意这个函数也会打开图像设备,并且不会自己关
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 6
# 案例名称
enam <- "cladocera"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names =c("c14.0","c16.0","c16.1w9","c16.1w7","c16.2w4",
"c16.3w3","c16.4w3","c17.0","c18.0","c18.1w9",
"c18.1w7","c18.2w6","c18.3w6","c18.3w3","c18.4w3",
"c18.5w3","c20.0","c22.0","c20.4w6","c20.5w3",
"c22.6w3","BrFA"),
factors = "id", #有协变量
fac_random = FALSE, #固定
fac_nested = FALSE, #无嵌套关系,
cont_effects = NULL) #无连续因子
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #来源并无协变量因子
conc_dep = F, #不存在浓度依赖变量
data_type = "means", #数据类型为均值型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
# 绘图结果文件名后的"1_2"表示指纹因子1&2之间的数据图
# 图太多了不画了,考虑non-metric multidimensional scaling (NMDS) plots吧
# plot_data(filename = "plot_data_isospace",
# plot_save_pdf = F,
# plot_save_png = T,
# mix = mix,
# source = source,
# discr = discr)
# 6.计算凸包面积
# 算不了,只有两个指纹因子时,才能算,这里都有22个了
# calc_area(source = source, mix = mix, discr = discr)
# 7.绘制先验分布
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
# 8.建立模型(有无先验信息并不影响)
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = F, #无残差
process_err = T, #有过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.normal <- run_model(run="normal",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = F,
process_err = T)
})
# 10.模型结果
# load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = TRUE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.normal, mix, source, output_options)
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 7
# 案例名称
enam <- "palmyra"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names =c("d13C","d15N"),
factors = "Taxa", #有协变量
fac_random = FALSE, #固定
fac_nested = FALSE, #无嵌套关系,
cont_effects = NULL) #无连续因子
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #来源并无协变量因子
conc_dep = F, #不存在浓度依赖变量
data_type = "raw", #数据类型为原始型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
# 绘图结果文件名后的"1_2"表示指纹因子1&2之间的数据图
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr)
# 6.计算凸包面积
calc_area(source = source, mix = mix, discr = discr)
#>1.186978
# 7.绘制先验分布
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
# 8.建立模型(有无先验信息并不影响)
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = T, #有过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.test <- run_model(run="test",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = T)
})
# 10.模型结果
# load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = TRUE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.test, mix, source, output_options)
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 8
# 案例名称
enam <- "stormpetrel"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names =c("d13C","d15N"),
factors = "Region", #有协变量
fac_random = FALSE, #固定
fac_nested = FALSE, #无嵌套关系,
cont_effects = NULL) #无连续因子
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #来源并无协变量因子
conc_dep = T, #有浓度依赖变量
data_type = "raw", #数据类型为原始型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
# 绘图结果文件名后的"1_2"表示指纹因子1&2之间的数据图
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr)
# 6.计算凸包面积
calc_area(source = source, mix = mix, discr = discr)
#>13.51959
# 7.绘制先验分布
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
# 8.建立模型(有无先验信息并不影响)
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = T, #有过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.test <- run_model(run="test",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = T)
})
# 10.模型结果
# load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = TRUE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.test, mix, source, output_options)
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 9
# 案例名称
enam <- "snail"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names =c("d13C"),
factors = NULL, #无协变量
fac_random = NULL, #无固定随机
fac_nested = FALSE, #无嵌套关系,
cont_effects = "Length") #有连续因子
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = NULL, #无协变量因子
conc_dep = F, #无浓度依赖变量
data_type = "raw", #数据类型为均值型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr)
# 6.计算凸包面积(只有一个指纹因子,算不了)
# calc_area(source = source, mix = mix, discr = discr)
# 7.绘制先验分布
plot_prior(alpha.prior = 1,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior")
# 8.建立模型(有无先验信息并不影响)
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = T, #有过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.test <- run_model(run="test",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = T)
})
# 10.模型结果
load(paste0(edir, "/model_result.RData"))
#10.1无先验信息结果
output_options <- list(summary_save = TRUE,
summary_name = "model_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.test, mix, source, output_options)
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 10
# 案例名称
enam <- "mantis"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- load_mix_data(filename = mix.filename,
iso_names =c("d13C", "d15N"),
factors = "Habitat", #有协变量
fac_random = FALSE, #固定效应
fac_nested = FALSE, #无嵌套关系,
cont_effects = NULL) #无连续因子
# 3.加载物源数据
source.filename <- paste0(edir, "/", enam, "_sources.csv")
source <- load_source_data(filename = source.filename,
source_factors = "Habitat", #有协变量
conc_dep = T, #有浓度依赖变量
data_type = "means", #数据类型为均值型
mix = mix)
# 4.加载鉴别因子数据
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
discr <- load_discr_data(filename = discr.filename, mix = mix)
# 5.数据绘图
plot_data(filename = "plot_data_isospace",
plot_save_pdf = F,
plot_save_png = T,
mix = mix,
source = source,
discr = discr)
# 6.计算凸包面积
calc_area(source = source, mix = mix, discr = discr)
#>7.056381 18.789736
# 7.绘制先验分布
# 7.1绘制默认的无先验或一致先验(alpha = 1)
alpha.unif <- rep(1, source$n.sources)
plot_prior(alpha.prior = alpha.unif,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior1_unif")
# 7.2专家知识表明,螳螂虾捕食硬壳动物的数量是软体动物的4倍,据此更新先验信息
alpha.spec <- c(1,1,4,4,1,4)
alpha.spec <- alpha.spec*length(alpha.spec)/sum(alpha.spec)
plot_prior(alpha.prior = alpha.spec,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior2_spec")
# 7.3两个栖息地,物源物种的丰度数据可用来建立先验信息,但是由于是两个栖息地的先验信息,因此只能分别建模求解
# 栖息地seagrass,丰度先验信息
alpha.grass <- c(0.35,1.61,0.43,(51.65+0.26),5.18,40.5)*6/100
plot_prior(alpha.prior = alpha.grass,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior3_1grass")
# 栖息地Coral,丰度先验信息
alpha.coral <- c((14.31+24.74),0.01,15.48,(13.81+4.71),8.44,18.51)*6/100
plot_prior(alpha.prior = alpha.coral,
source = source,
plot_save_pdf = F,
plot_save_png = T,
filename = "plot_source_prior3_2coral")
# 8.建立模型(有无先验信息并不影响)
model_filename <- paste0(edir, "/MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = T, #有过程误差
mix = mix,
source = source)
# 9.运行模型
system.time({
jags.normal <- run_model(run="normal",
mix = mix,
source = source,
discr = discr,
model_filename = model_filename,
alpha.prior = alpha.spec,
resid_err = T,
process_err = T)
})
# 10.模型结果(专家验信息结果)
load(paste0(edir, "/model_result.RData"))
output_options <- list(summary_save = TRUE,
summary_name = "model1_spec_result_1_summary_statistics",
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = "model1_spec_result_2_posterior_density",
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = "model1_spec_result_3_pairs_plot",
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = "model1_spec_result_4_xy_plot",
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = "model1_spec_result_5_diagnostics",
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.normal, mix, source, output_options)
# 11.物源重组
# 为了回答螳螂虾是否更偏向于捕食硬壳动物,将原6个物源进行融合,同时为了对比,将原物源也做同样转换
# 6物源融合为2物源
combined <- combine_sources(jags.1 = jags.normal,
mix = mix,
source = source,
alpha.prior = alpha.spec,
groups=list(hard=c("clam","crab","snail"),
soft=c("alphworm","brittlestar","fish")))
# 6物源融合为6物源
original <- combine_sources(jags.1 = jags.normal,
mix = mix,
source = source,
alpha.prior = alpha.spec,
groups=list(alphworm="alphworm",
brittlestar="brittlestar",
clam="clam",
crab="crab",
fish="fish",
snail="snail"))
# 输出模型总结
summary_stat(combined = combined,
savetxt = T,
filename = "combined_summary_statistics_sources6to2")
summary_stat(combined = original,
savetxt = T,
filename = "combined_summary_statistics_sources6to6")
# 12.绘制置信区间图(由于bayesplot包的mcmc_intervals绘图功能不能用了,所以下面的绘图功能全部失效!)
# Original 6-source model
plot_intervals(original,toplot="fac1")
# Aggregated posteriors, 6 sources combined into 2 groups
plot_intervals(combined,toplot="fac1")
# level 1 = Seagrass
plot_intervals(combined,toplot="fac1",levels=1)
# level 2 = Coral
plot_intervals(combined,toplot="fac1",levels=2)
案例说明
# 1.建模准备
# 加载包
library(MixSIAR)
# 案例序号
enum <- 11
# 案例名称
enam <- "alligator"
# 案例路径
edir <- paste0("D:/#R/learn_Bayesian/MixSIARexample", enum, "-", enam)
# 定义文件路径
setwd(edir)
# 2.加载混合物数据,分8个模型
mod.n <- 8
mix.filename <- paste0(edir, "/", enam, "_consumer.csv")
mix <- vector("list", mod.n)
mix[[1]] <- load_mix_data(filename = mix.filename,
iso_names =c("d13C","d15N"),
factors = NULL,
fac_random = NULL,
fac_nested = NULL,
cont_effects = NULL)
mix[[2]] <- load_mix_data(filename = mix.filename,
iso_names =c("d13C","d15N"),
factors = "habitat",
fac_random = F,
fac_nested = F,
cont_effects = NULL)
mix[[3]] <- load_mix_data(filename = mix.filename,
iso_names =c("d13C","d15N"),
factors = "sex",
fac_random = F,
fac_nested = F,
cont_effects = NULL)
mix[[4]] <- load_mix_data(filename = mix.filename,
iso_names =c("d13C","d15N"),
factors = "sclass",
fac_random = F,
fac_nested = F,
cont_effects = NULL)
mix[[5]] <- load_mix_data(filename = mix.filename,
iso_names =c("d13C","d15N"),
factors = NULL,
fac_random = NULL,
fac_nested = NULL,
cont_effects = "Length")
mix[[6]] <- load_mix_data(filename = mix.filename,
iso_names = c("d13C","d15N"),
factors = c("sex","sclass"),
fac_random = c(FALSE,FALSE),
fac_nested = c(FALSE,FALSE),
cont_effects = NULL)
mix[[7]] <- load_mix_data(filename = mix.filename,
iso_names = c("d13C","d15N"),
factors = "sex",
fac_random = FALSE,
fac_nested = FALSE,
cont_effects = "Length")
mix[[8]] <- load_mix_data(filename = mix.filename,
iso_names = c("d13C","d15N"),
factors = "sex_sclass",
fac_random = FALSE,
fac_nested = FALSE,
cont_effects = NULL)
# 3.构建并运行模型
source <- vector("list", mod.n)
discr <- vector("list", mod.n)
jags.mod <- vector("list", mod.n)
area.val <- vector("list", mod.n)
source.filename <- paste0(edir, "/", enam, "_sources.csv")
discr.filename <- paste0(edir, "/", enam, "_discrimination.csv")
startTime <- Sys.time()
for(mod in 1:mod.n){
# 新建文件夹以存储文件
mainDir <- getwd()
subDir <- paste0("model_", mod)
dir.create(file.path(mainDir, subDir), showWarnings = FALSE)
setwd(file.path(mainDir, subDir))
tDir <- getwd()
# 读取源
source[[mod]] <- load_source_data(filename = source.filename,
source_factors = NULL, #无协变量因子
conc_dep = F, #无浓度依赖变量
data_type = "means", #数据类型为均值型
mix = mix[[mod]])
# 读TEF
discr[[mod]] <- load_discr_data(filename = discr.filename, mix = mix[[mod]])
# 绘制数据图
plot_data(filename = paste0("model_", mod, "-plot_data_isospace"),
plot_save_pdf = F,
plot_save_png = T,
mix = mix[[mod]],
source = source[[mod]],
discr = discr[[mod]])
# 计算凸包面积
area.val[[mod]] <- calc_area(source = source[[mod]], mix = mix[[mod]], discr = discr[[mod]])
# 绘制先验分布(因为所有模型源都一样,所以没必要都画,这里只在运行前画了一个)
# plot_prior(alpha.prior = 1,
# source = source[[mod]],
# plot_save_pdf = F,
# plot_save_png = T,
# filename = "plot_source_prior")
# 建立模型(有无先验信息并不影响)
model_filename <- paste0(tDir, "/model_",mod,"-MixSIAR_model.txt")
write_JAGS_model(filename = model_filename,
resid_err = T, #有残差
process_err = T, #有过程误差
mix = mix[[mod]],
source = source[[mod]])
# 运行模型
jags.mod[[mod]] <- run_model(run = "short",
mix = mix[[mod]],
source = source[[mod]],
discr = discr[[mod]],
model_filename = model_filename,
alpha.prior = 1,
resid_err = T,
process_err = T)
# 结果分析
output_options <- list(summary_save = TRUE,
summary_name = paste0("model_",mod,"-model_result_1_summary_statistics"),
sup_post = FALSE,
plot_post_save_pdf = FALSE,
plot_post_name = paste0("model_",mod,"-model_result_2_posterior_density"),
sup_pairs = FALSE,
plot_pairs_save_pdf = FALSE,
plot_pairs_name = paste0("model_",mod,"-model_result_3_pairs_plot"),
sup_xy = FALSE,
plot_xy_save_pdf = FALSE,
plot_xy_name = paste0("model_",mod,"-model_result_4_xy_plot"),
gelman = TRUE,
heidel = FALSE,
geweke = TRUE,
diag_save = TRUE,
diag_name = paste0("model_",mod,"-model_result_5_diagnostics"),
indiv_effect = FALSE,
plot_post_save_png = TRUE,
plot_pairs_save_png = TRUE,
plot_xy_save_png = TRUE,
diag_save_ggmcmc = TRUE)
output_JAGS(jags.1 = jags.mod[[mod]],
mix = mix[[mod]],
source = source[[mod]],
output_options = output_options)
graphics.off()
setwd(mainDir)
}
endTime <- Sys.time()
endTime - startTime #耗时3.872049 hours
# 4.模型对比
# MixSIAR使用LOO包,计算LOO (leave-one-out cross-validation)或WAIC (widely applicable information criterion)
# LOO和WAIC是利用参数值的后验模拟评估的对数似然,从拟合贝叶斯模型中估计逐点样本外预测精度的方法
# 简单来说,LOO和WAIC比AIC或DIC要好,LOO比WAIC更鲁棒,loo包计算两个模型的LOO差(dLOOic),MixSIAR使用赤池权重计算相对支持度。
# 为模型添加名字:
names(jags.mod) <- c("Null",
"Habitat",
"Sex",
"Size class",
"Length",
"Sex + Size class",
"Sex + Length",
"Sex : Length")
# 生成对比表
# 这个结果解释为,模型“Length”,赤池权重为0.86,即这个是最优模型的可能性为86%(根据观察到的数据,在考虑的模型之外的新数据的预期预测性能)
comparison.table <- compare_models(jags.mod)
© 2021-2024, LIANG Chen, Institute of Mountain Hazards and Environment, CAS. All rights reserved.