#******************************************************************************* # 15 Prior influence and parameter estimability #******************************************************************************* # # Korner-Nievergelt, F., T. Roth, S. von Felten, J. Guelat, B. Almasi, and # P. Korner-Nievergelt. 2015. Bayesian Data Analysis in Ecology using Linear # Models with R, BUGS and Stan. Elsevier. # # Last modification: 2 October 2014 #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- ## 15.1. How to specify prior distributions #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # The Beta distribution #------------------------------------------------------------------------------- colkey <- colorRampPalette(c("orange", "blue"))(6) x <- seq(0,1,length=100) jpeg(filename = "../figures/Figure15-1_beta.jpg", width = 6000, height = 6000, units = "px", res=1200) par(mar=c(4,4,1,1), mgp=c(2.5,1,0)) plot(c(0,1), c(0,6), type="n", las=1, xlab="p", ylab="Density", cex.lab=1.2) for(i in 1:6){ a <- c(1, 2,2,10,17,3)[i] b <- c(10,9,2,10, 6,1)[i] lines(x, dbeta(x, a, b), lwd=2, col=colkey[i]) text(a/(a+b) + as.numeric(i %in% 1:2)*0.03, 1.06*dbeta(a/(a+b), a, b), paste0("Beta(",a,",",b,")"), adj=0, srt=90, col=colkey[i]) } dev.off() #------------------------------------------------------------------------------- ## 15.2. Prior sensitivity analysis #------------------------------------------------------------------------------- library(rstan) #------------------------------------------------------------------------------- # Simulate fake data #------------------------------------------------------------------------------- set.seed(34) n <- 50 # sample size sigma <- 5 # standard deviation of the residuals b0 <- 2 # intercept b1 <- 0.7 # slope x <- runif(n, 10, 30) # random numbers of the covariate simresid <- rnorm(n, 0, sd=sigma) # residuals y <- b0 + b1*x + simresid # calculate y, i.e. the data # define different prior variances for the slope parameter tauprior <- exp(seq(-5, 5, length=20)) #------------------------------------------------------------------------------- # Bundle data into a list #------------------------------------------------------------------------------- datax <- list(n=length(y), y=y, x=x, tauprior=tauprior) #------------------------------------------------------------------------------- # Run STAN #------------------------------------------------------------------------------- fit <- stan(file = "Stan/linreg.stanpriorinfluence.txt", data=datax, chains=10, iter=1000) print(fit, c("beta0", "beta1", "sigma")) modsims <- extract(fit) slopes <- apply(modsims$beta1, 2, mean) slopes.se <- apply(modsims$beta1, 2, sd) intc <- apply(modsims$beta0, 2, mean) intc.se <- apply(modsims$beta0, 2, sd) sigmas <- apply(modsims$sigma, 2, mean) sigmas.se <- apply(modsims$sigma, 2, sd) jpeg(filename = "../figures/Figure15-2_priorinfl.jpg", width = 6000, height = 6000, units = "px", res=1200) plot(log(tauprior), slopes, las=1, ylab="Estimated slope with SE", xlab="Prior standard deviation", ylim=c(0,0.9), pch=16, xaxt="n", cex.lab=1.2) axis(1, at=log(tauprior[c(1,5,10,15,20)]), labels=round(tauprior[c(1,5,10,15,20)], c(2,2,2,0,0))) segments(log(tauprior), slopes-slopes.se, log(tauprior), slopes+slopes.se, lwd=2, lend="butt") dev.off() #----------------------------------------------------------------- # priors for variance parameters #----------------------------------------------------------------- dtstan <- function(x, v,m,s){ gamma((v+1)/2)/(gamma(v/2)*sqrt(v*pi*s)) * (1+1/v*((x-m)/s)^2)^(-(v+1)/2) } foldedt <- function(v, m=0, s, nsim=1000000, plot=FALSE){ z <- rnorm(nsim) x <- rchisq(nsim, df=v) sigma <- abs(m + s*z*sqrt(v/x)) res <- list(mean=mean(sigma), q=quantile(sigma, prob=c(0.01, 0.05, 0.5, 0.9, 0.95)), x=sigma) return(res) } results <- data.frame(prior="folded t", v= c(1, 1, 2, 2, 3, 3, 4, 4), m= c(0, 0, 0, 0, 0,0,0,0), s= c(1, 2, 1, 2, 2, 3, 2, 3)) results$mean <- NA results$q01 <- NA results$q05 <- NA results$q50 <- NA results$q90 <- NA results$q95 <- NA cex.main <- 0.8 ylimupper <- 0.68 jpeg(filename = "../figures/Figure15-3_foldedta.jpg", width = 6000, height = 7200, units = "px", res=1200) # foldet t par(mfrow=c(5,4), mar=c(1,1,2,1), oma=c(2,2,0,0)) for(i in 1:nrow(results)){ temp <- foldedt(results$v[i], results$m[i], results$s[i]) results$mean[i] <- temp$mean results[i, c("q01", "q05", "q50", "q90", "q95")] <- as.numeric(temp$q) hist(temp$x, breaks=seq(0, max(temp$x)+1, by=0.5), xlim=c(0,50), main=paste0("folded t (", results$v[i], ",", results$m[i], ",",results$s[i],")"), freq=FALSE, ylim=c(0, ylimupper), axes=FALSE, xlab=NA, ylab=NA, cex.main=cex.main, col="blue") if(is.element(i, c(1,5,9,13))) axis(2, las=1) box() text(8, 0.35, paste0("q50 = ", round(results$q50[i], 1), "\n", "q90 = ", round(results$q90[i], 1), "\n"), adj=c(0,1)) } # truncated Cauchy for(i in 1:4){ cauchypars <- c(1,3,5,10) tx <- rcauchy(1000000, 0, cauchypars[i]) tx <- abs(tx) #tx[tx>=0] hist(tx, breaks=seq(0, max(tx)+1, by=0.5), xlim=c(0,50), main=paste0("truncated Cauchy(0,", cauchypars[i], ")"), xlab=NA, cex.main=cex.main, freq=FALSE, ylim=c(0, ylimupper), las=1, axes=FALSE, col="blue") if(i==1) axis(2, las=1) text(8, 0.35, paste0("q50 = ", round(median(tx), 1), "\n", "q90 = ", round(quantile(tx, 0.9), 1), "\n"), adj=c(0,1)) box() } # Uniform unifpar <- c(5, 10, 100, 1000) for(i in 1:4){ tx <- runif(1000000, 0, unifpar[i]) hist(tx, breaks=seq(0, max(tx)+1, by=0.5), xlim=c(0,50), main=paste0("Unif(0,", unifpar[i],")"), cex.main=cex.main, freq=FALSE, ylim=c(0, ylimupper), las=1, xaxt="n", xlab=NA, yaxt="n", col="blue") if(i==1) axis(2, las=1) text(8, 0.35, paste0("q50 = ", round(median(tx), 1), "\n", "q90 = ", round(quantile(tx, 0.9), 1), "\n"), adj=c(0,1)) box() } # inverse Gamma gammapars <- c(1, 0.1, 0.01) for(i in 1:3){ invtx <- rgamma(1000000, gammapars[i],gammapars[i]) tx <- 1/invtx tx[tx>1e6] <- 1e6 th <- hist(tx, breaks=seq(0, max(tx)+1, by=0.5), xlim=c(0,50), main=paste0("inverse Gamma(", gammapars[i], ",", gammapars[i],")"), yaxt="n", xlab=NA, cex.main=cex.main, freq=FALSE, ylim=c(0, ylimupper), yaxt="n", col="blue") if(i ==1) axis(2, las=1) if(i==1) text(8, 0.35, paste0("q50 = ", round(median(tx), ifelse(i<2, 1,0)), "\n", "q90 = ", round(quantile(tx, 0.9), ifelse(i<2, 1,0)), "\n"), adj=c(0,1)) if(i==2) text(8, 0.35, paste0("mean > 10000\n", "q50 = ", round(median(tx), ifelse(i<2, 1,0)), "\n", "q90 > 10000\n"), adj=c(0,1)) if(i==3) text(8, 0.35, paste0("q50 > 10000\n", "q90 > 10000\n"), adj=c(0,1)) box() } plot(1,1, type="n", yaxt="n", main="improper prior", cex.main=cex.main, xlim=c(0,50)) text(25,1, "not available") box() dev.off() # end of figure with different priors----------------------- #------------------------------------------------------------------------------- # assess prior influence #------------------------------------------------------------------------------- datax <- list(n=length(y), y=y, x=x, priorparsigma=as.numeric(results[1, c("v", "m", "s")])) fithalft <- stan(file = "Stan/linreg.stanpriorinflusigma.txt", data=datax, chains=10, iter=1000) print(fithalft, c("beta0", "beta1", "sigma")) modsims <- extract(fithalft) results$slopes[1] <- mean(modsims$beta1) results$slopes.se[1] <- sd(modsims$beta1) results$intc[1] <- mean(modsims$beta0) results$intc.se[1] <- sd(modsims$beta0) results$sigmas[1] <- mean(modsims$sigma) results$sigmas.se[1] <- sd(modsims$sigma) results$sigmas.lwr[1] <- quantile(modsims$sigma, prob=0.025) results$sigmas.upr[1] <- quantile(modsims$sigma, prob=0.975) for(i in 2:8){ datax <- list(n=length(y), y=y, x=x, priorparsigma=as.numeric(results[i, c("v", "m", "s")])) fithalft <- stan(file = "Stan/linreg.stanpriorinflusigma.txt", data=datax, chains=10, iter=1000, fit=fithalft) print(fithalft, c("beta0", "beta1", "sigma")) modsims <- extract(fithalft) results$slopes[i] <- mean(modsims$beta1) results$slopes.se[i] <- sd(modsims$beta1) results$intc[i] <- mean(modsims$beta0) results$intc.se[i] <- sd(modsims$beta0) results$sigmas[i] <- mean(modsims$sigma) results$sigmas.se[i] <- sd(modsims$sigma) results$sigmas.lwr[i] <- quantile(modsims$sigma, prob=0.025) results$sigmas.upr[i] <- quantile(modsims$sigma, prob=0.975) } #------------------------------------------------------------- # add unif, gamma, truncated cauchy and improper priors with sim tempdat <- data.frame(prior=c("truncated Cauchy(0,1)", "truncated Cauchy(0,3)", "truncated Cauchy(0,5)","truncated Cauchy(0,10)", "Unif(0,5)", "Unif(0,10)", "Unif(0,100)", "Unif(0,1000)", "inverse Gamma(1,1)", "inverse Gamma(0.1,0.1)", "inverse Gamma(0.01,0.01)", "improper")) results <- merge(results, tempdat, all=TRUE) results <- results[c(1:8, 15, 16, 13, 14, 20, 17, 18, 19, 12, 11, 10, 9),] # re-order # fit models--------------------------------------------------- # Uniform distribution for(k in 1:4) { datax <- list(n=length(y), y=y, x=x, uppersigma=unifpar[k]) if(k==1) fitunif <- stan(file = "Stan/linreg.stanunifprior.txt", data=datax, chains=10, iter=1000) else fitunif <- stan(file = "Stan/linreg.stanunifprior.txt", data=datax, chains=10, iter=1000, fit=fitunif) print(fitunif, c("beta0", "beta1", "sigma")) modsims <- extract(fitunif) i <- results$prior==paste0("Unif(0,", unifpar[k],")") results$slopes[i] <- mean(modsims$beta1) results$slopes.se[i] <- sd(modsims$beta1) results$intc[i] <- mean(modsims$beta0) results$intc.se[i] <- sd(modsims$beta0) results$sigmas[i] <- mean(modsims$sigma) results$sigmas.se[i] <- sd(modsims$sigma) results$sigmas.lwr[i] <- quantile(modsims$sigma, prob=0.025) results$sigmas.upr[i] <- quantile(modsims$sigma, prob=0.975) } # inverse gamma for(k in 1:3){ datax <- list(n=length(y), y=y, x=x, parssigma=gammapars[k]) if(k==1) fitgamma <- stan(file = "Stan/linreg.stangammaprior.txt", data=datax, chains=10, iter=1000) else fitgamma <- stan(file = "Stan/linreg.stangammaprior.txt", data=datax, chains=10, iter=1000, fit=fitgamma) print(fitgamma, c("beta0", "beta1", "sigma")) modsims <- extract(fitgamma) i <- results$prior==paste0("inverse Gamma(", gammapars[k],",",gammapars[k],")") results$slopes[i] <- mean(modsims$beta1) results$slopes.se[i] <- sd(modsims$beta1) results$intc[i] <- mean(modsims$beta0) results$intc.se[i] <- sd(modsims$beta0) results$sigmas[i] <- mean(modsims$sigma) results$sigmas.se[i] <- sd(modsims$sigma) results$sigmas.lwr[i] <- quantile(modsims$sigma, prob=0.025) results$sigmas.upr[i] <- quantile(modsims$sigma, prob=0.975) } # truncated Cauchy for(k in 1:4){ datax <- list(n=length(y), y=y, x=x, cauchypar=c(0, cauchypars[k])) if(k==1) fitcauchy <- stan(file = "Stan/linreg.stancauchyprior.txt", data=datax, chains=10, iter=1000) else fitcauchy <- stan(file = "Stan/linreg.stancauchyprior.txt", data=datax, chains=10, iter=1000, fit=fitcauchy) print(fitcauchy, c("beta0", "beta1", "sigma")) modsims <- extract(fitcauchy) i <- results$prior==paste0("truncated Cauchy(0,", cauchypars[k], ")") results$slopes[i] <- mean(modsims$beta1) results$slopes.se[i] <- sd(modsims$beta1) results$intc[i] <- mean(modsims$beta0) results$intc.se[i] <- sd(modsims$beta0) results$sigmas[i] <- mean(modsims$sigma) results$sigmas.se[i] <- sd(modsims$sigma) results$sigmas.lwr[i] <- quantile(modsims$sigma, prob=0.025) results$sigmas.upr[i] <- quantile(modsims$sigma, prob=0.975) } # improper priors mod <- lm(y~x) library(arm) bsim <- sim(mod, n.sim=3000) i <- results$prior=="improper" results$slopes[i] <- mean(bsim@coef[,2]) results$slopes.se[i] <- sd(bsim@coef[,2]) results$intc[i] <- mean(bsim@coef[,1]) results$intc.se[i] <- sd(bsim@coef[,1]) results$sigmas[i] <- mean(bsim@sigma) results$sigmas.se[i] <- sd(bsim@sigma) results$sigmas.lwr[i] <- quantile(bsim@sigma, prob=0.025) results$sigmas.upr[i] <- quantile(bsim@sigma, prob=0.975) #-------------------------------------------------------------------------------- # add results from zero-inflated model library(blmeco) data(blackstork) # Prepare data for STAN y <- blackstork$njuvs N <- nrow(blackstork) nest <- as.numeric(factor(blackstork$nest)) Nnests <- nlevels(factor(blackstork$nest)) year <- as.numeric(scale(blackstork$year)) # truncated cauchy for(k in 1:4){ cauchypar <- c(0, cauchypars[k]) datax <- c("y", "N", "nest", "Nnests", "year", "cauchypar") if(k==1) zipfitcauchy <- stan(file = "Stan/zeroinflcauchy.stan", data=datax, chains=5, iter=1000) else zipfitcauchy <- stan(file = "Stan/zeroinflcauchy.stan", data=datax, chains=5, iter=1000, fit=zipfitcauchy) print(zipfitcauchy, c("a", "b", "sigmatheta")) modsims <- extract(zipfitcauchy) i <- results$prior==paste0("truncated Cauchy(0,",cauchypar[2],")") results$zipsigmas[i] <- mean(modsims$sigmatheta) results$zipsigmas.se[i] <- sd(modsims$sigmatheta) results$zipsigmas.lwr[i] <- quantile(modsims$sigmatheta, prob=0.025) results$zipsigmas.upr[i] <- quantile(modsims$sigmatheta, prob=0.975) } # half-t priors for(k in 1:8){ priorpars <- c(results$v[k], results$m[k], results$s[k]) datax <- c("y", "N", "nest", "Nnests", "year", "priorpars") if(k==1) zipfithalft <- stan(file = "Stan/zeroinflhalft.stan", data=datax, chains=5, iter=1000) else zipfithalft <- stan(file = "Stan/zeroinflhalft.stan", data=datax, chains=5, iter=1000, fit=zipfithalft) print(zipfithalft, c("a", "b", "sigmatheta")) modsims <- extract(zipfithalft) i <- k results$zipsigmas[i] <- mean(modsims$sigmatheta) results$zipsigmas.se[i] <- sd(modsims$sigmatheta) results$zipsigmas.lwr[i] <- quantile(modsims$sigmatheta, prob=0.025) results$zipsigmas.upr[i] <- quantile(modsims$sigmatheta, prob=0.975) } # uniform priors for(k in 1:4){ uppersigma <-unifpar[k] datax <- c("y", "N", "nest", "Nnests", "year", "uppersigma") if(k==1) zipfitunif <- stan(file = "Stan/zeroinfluniform.stan", data=datax, chains=5, iter=1000) else zipfitunif <- stan(file = "Stan/zeroinfluniform.stan", data=datax, chains=5, iter=1000, fit=zipfitunif) print(zipfitunif, c("a", "b", "sigmatheta")) modsims <- extract(zipfitunif) i <- results$prior==paste0("Unif(0,", uppersigma, ")") results$zipsigmas[i] <- mean(modsims$sigmatheta) results$zipsigmas.se[i] <- sd(modsims$sigmatheta) results$zipsigmas.lwr[i] <- quantile(modsims$sigmatheta, prob=0.025) results$zipsigmas.upr[i] <- quantile(modsims$sigmatheta, prob=0.975) } # unif (0,1000) did not work! # inverse gamma priors for sigma2 for(k in 1:3){ priorpars <- c(gammapars[k], gammapars[k]) datax <- c("y", "N", "nest", "Nnests", "year", "priorpars") if(k==1) zipfitgamma <- stan(file = "Stan/zeroinflgamma.stan", data=datax, chains=5, iter=1000) else zipfitgamma <- stan(file = "Stan/zeroinflgamma.stan", data=datax, chains=5, iter=1000, fit=zipfitgamma) print(zipfitgamma, c("a", "b", "sigmatheta")) modsims <- extract(zipfitgamma) i <- results$prior==paste0("inverse Gamma(", gammapars[k], ",", gammapars[k],")") results$zipsigmas[i] <- mean(modsims$sigmatheta) results$zipsigmas.se[i] <- sd(modsims$sigmatheta) results$zipsigmas.lwr[i] <- quantile(modsims$sigmatheta, prob=0.025) results$zipsigmas.upr[i] <- quantile(modsims$sigmatheta, prob=0.975) } #save(results, file="results_prior_sensitivity_variances3.RData") #load("results_prior_sensitivity_variances3.RData") #-------------------------------------------------------------------------------- # draw figure 15-4 #------------------------------------------------------------------------------- index <- results$prior=="folded t" results$priorname <- c(paste0("folded t (", results$v[index], ",", results$m[index], ",", results$s[index], ")"), as.character(results$prior)[!index]) results1 <- results[c(1:8, 11, 9, 10, 12, 13:20),] results1 <- results1[rev(1:nrow(results)),] jpeg(filename = "../figures/Figure15-4_sigmas.jpg", width = 9000, height = 6000, units = "px", res=1200) par(mfrow=c(1,2), mar=c(5,0.5,2,0.5), oma=c(0,10,0,0)) plot(seq(4, 6.75, length=20), 1:20, type="n", yaxt="n", xlab="Sigma", ylab=NA, main="Linear regression") axis(2, at=1:20, las=1,cex.axis=0.8, labels=results1$priorname) segments(results1$sigmas.lwr, 1:20, results1$sigmas.upr, 1:20, lwd=2, lend="butt") points(results1$sigmas, 1:20, pch=21, bg="white") abline(v=seq(4, 7.5, by=0.5), lty=3, col=grey(0.5)) plot(seq(0.3, 1.5, length=20), 1:20, type="n", yaxt="n", xlab="Sigma", ylab=NA, main="ZIP") segments(results1$zipsigmas.lwr, 1:20, results1$zipsigmas.upr, 1:20, lwd=2, lend="butt") points(results1$zipsigmas, 1:20, pch=21, bg="white") abline(v=seq(0, 7.5, by=0.25), lty=3, col=grey(0.5)) dev.off() # ------------------------------------------------------------------------------------- ## 15.3 Parameter estimability # ------------------------------------------------------------------------------------- library(blmeco) library(R2OpenBUGS) data(nightingales) y <- nightingales class(y) dim(y) # Prepare some additional data N <- dim(y)[1] # Number of territories T <- dim(y)[2] # Number of study years J <- dim(y)[3] # Number of yearly visits # bundle the data datax <- list(y=y, N=N, T=T, J=J) #------------------------------------------------------------------------------- # Calculate initial values #------------------------------------------------------------------------------- # Calculate x[i,t] --> x[i,t] = 1 if a nightingale was observed at least once during the J visits x <- array(NA, dim = c(N,T)) for(t in 1:T) x[,t] <- as.integer(apply(y[,t,], 1, sum)>0) # Function to calculate initial values inits <- function() { list(omega = runif(1,0,1), phi = runif(1,0,1), r = runif(1,0,1), p = runif(1,0,1), x=x ) } #------------------------------------------------------------------------------- # Run WinBUGS #------------------------------------------------------------------------------- # Parameters to be monitored parameters <- c("phi", "r", "p", "omega") # Run OpenBUGS bugsmod <- bugs(datax, inits, parameters, model.file="territoryoccupancy.txt", n.thin=2, n.chains=2, n.burnin=1000, n.iter=5000, debug=FALSE, working.directory = getwd()) #------------------------------------------------------------------------------- library(birdring) overlap(bugsmod$sims.list$phi, prior="unif01") overlap(bugsmod$sims.list$r, prior="unif01") overlap(bugsmod$sims.list$p, prior="unif01") overlap(bugsmod$sims.list$omega, prior="unif01") jpeg(filename = "../figures/Figure15-5_overlap.jpg", width = 6000, height = 6000, units = "px", res=1200) par(mfrow=c(2,2), mar=c(3,2,2,1), oma=c(0,3,0,0)) for(i in 1:4){ dposterior <- density(as.numeric(bugsmod$sims.array[,,i]), from=0, to=1) plot(c(0,1), c(0,15), lwd=3, lty=3, type="n", las=1, ylab=NA, xlab="", main=dimnames(bugsmod$sims.array)[[3]][i]) if(is.element(i, c(1,3))) axis(2, las=1) if(is.element(i, c(1,3))) mtext("Density", side=2, line=3) abline(h=1, lwd=2, lty=3) lines(dposterior, lwd=2) if(i==4)legend(0, 14, lwd=2, lty=c(3,1), legend=c("prior", "posterior"), bty="n") y <- apply(cbind(rep(1, length(dposterior$y)), dposterior$y), 1, min) polygon(c(dposterior$x, rev(dposterior$x)), c(y, rep(0, length(dposterior$x))), border=NA, col=grey(.5)) } dev.off()