### R code from vignette source '../parameter_estimation_chapter/parameter_estimation.Rnw' ################################################### ### code chunk number 1: loadLibraries ################################################### library(rjags) library(VGAM) ################################################### ### code chunk number 2: misc ################################################### myFormat <- function(...) { tmp <- format(...) return(sub("e(.*)","\\\\\\\\times 10^{\\1}",tmp)) } ################################################### ### code chunk number 3: RFEconsistency ################################################### p <- 0.3 #x <- seq(0,1,by=0.01) #plot(c(),c(),xlim=c(0,1),ylim=c(0,1)) #i <- 1 #N <- seq(5,105,by=20) #for(n in N) { # k <- round(x*n) # lines(x, choose(n,k) * p^k * (1-p)^(n-k), col=i,lty=i,lwd=3) # i <- i + 1 #} #legend(0.75,0.75,N,lwd=3,lty=1:length(N),col=1:length(N)) n <- seq(100,10000,by=100) plot(c(),c(), xlab=expression(n),ylab=expression(paste("P(a < ",hat(p)," < b)"),sep=""),xlim=c(0,max(n)),ylim=c(0,1)) f <- function(lower, upper,i) { lines(n, pbinom(upper*n, n, p) - pbinom(lower*n, n, p),lwd=i,lty=i,col=i) } uppers <- c(0.31, 0.32, 0.35, 0.4) lowers <- c(0.29, 0.28, 0.25, 0.2) for(i in 1:length(uppers)) { f(lowers[i],uppers[i],i) } lft <- paste("P(", uppers, " < ", sep="") rt <- paste( " < ", lowers, ")", sep="") legend(5000, 0.3, paste(lft,"^p",rt,sep=""),lwd=1:4,lty=1:4,col=1:4) ################################################### ### code chunk number 4: binomialLikelihood ################################################### x <- seq(0,1,by=0.01) f <- function(x) { choose(10,3) * x^3 * (1-x)^7 } plot(x,f(x), xlab=expression(pi), ylab="Likelihood",type="l",lwd=3,xaxp=c(0,1,10)) lines(c(0.3,0.3),c(0,f(0.3)),lwd=2,col=4,lty=2) ################################################### ### code chunk number 5: MLEbias ################################################### set.seed(10) N <- seq(2,100,by=2) m <- c() # initialize m for(n in N) { a <- c() # initialize a b <- c() # initialize b for(j in 1:500) { samp <- runif(n,0,1) #b <- c(b, max(samp)) a <- c(a, min(samp)) } len <- 1 - a m <- c(m, mean(len)) } plot(N,m,type="l",lwd=3, xlab="sample size", ylab="Average MLE interval length as a proportion of true interval length") ################################################### ### code chunk number 6: MLEbiascorrection ################################################### plot(N, m*(N+1)/(N), ylim=c(0.9, 1.1),type="l", xlab="sample size", ylab="Average MLE interval length as a proportion of true interval length") ################################################### ### code chunk number 7: normalvarianceMLEbias ################################################### plot(4,0,type="p",pch="x",xlim=c(0,8),ylim=c(0,1),cex=3, xlab="Y", ylab="P(Y)") x <- seq(0,8,by=0.01) lines(x,dnorm(x,4,2)) lines(x,dnorm(x,4,1),lty=2) lines(x,dnorm(x,4,1/2),lty=3) legend(6,1, c(expression(sigma^2 == 4), expression(sigma^2 == 1), expression(sigma^2 == 1/4)), lty=c(1,2,3),cex=1) ################################################### ### code chunk number 8: normalEstimation ################################################### pb <- read.table("../data/peterson_barney_data/pb.txt",header=T) pb.means <- with(pb,aggregate(data.frame(F0,F1,F2,F3), by=list(Type,Sex,Speaker,Vowel,IPA),mean)) names(pb.means) <- c("Type","Sex","Speaker","Vowel","IPA",names(pb.means)[6:9]) y <- subset(pb.means,Type=="c" & Vowel=="ae")$F3 mu.hat <- mean(y) sigma.hat <- sqrt(var(y)) x <- seq(2000,5000,by=1) plot(x,dnorm(x,mu.hat,sigma.hat),type="l",xlab="F3",ylab="p(F3)",lwd=3) lines(x,dnorm(x,mu.hat,sigma.hat / sqrt(length(y)/(length(y)-1))),lty=2,col="magenta",lwd=3) rug(y) ################################################### ### code chunk number 9: betaDistribution ################################################### x <- seq(0,1,by=0.01) plot(x,dbeta(x, 1,1),type="l",ylim=c(0,4),xlab=expression(pi), ylab=expression(p(pi)),cex.lab=1.3) lines(x,dbeta(x,0.5,0.5),col="magenta") lines(x,dbeta(x,3,3), col="green") lines(x,dbeta(x,3,0.5),col="orange") legend(0.1,4,c("Beta(1,1)","Beta(0.5,0.5)", "Beta(3,3)", "Beta(3,0.5)"),lwd=3,col=c("black","magenta","green","orange"),cex=1.3) ################################################### ### code chunk number 10: binomialBayes ################################################### p <- seq(0,1,by=0.01) old.par <- par(mar=c(5,4,4,5)+0.1) plot(p, dbeta(p, 3, 24),type="l",ylab="P(p)",lwd=2,ylim=c(0,8)) # likelihood s1 <- 1 / (choose(7,2) * beta(3,6) ) lines(p,s1*choose(7,2)*p^2 * (1-p)^5, lwd=1, lty=2, col="magenta") y2ticks <- seq(0,1,by=0.2) axis(4,at=y2ticks*s1, labels=y2ticks) mtext("Likelihood(p)", side=4, line=3) s2 <- 1 / (choose(21, 6) * beta(7, 16)) lines(p,s2*choose(21,6)*p^6 * (1-p)^15, lwd=3, lty=2, col=3) # posteriors lines(p, dbeta(p, 5, 29),type="l",lwd=1,lty=4, col="magenta") lines(p, dbeta(p, 9, 45),type="l",lwd=3,lty=4, col=3) legend(0.45,8,c("Prior","Likelihood (n=7)","Posterior (n=7)","Likelihood (n=21)","Posterior (n=21)"), lwd=c(2,1,1,3,3), col=c(1, "magenta", "magenta", 3, 3), lty=c(1,2,3,2,3)) par <- old.par ################################################### ### code chunk number 11: betabinomial ################################################### r <- 50 k <- 0:r dbbinom <- function(k, r, alpha1, alpha2) { numerator <- 1 denominator <- 1 x <- alpha1 + k - 1 while(x >= alpha1) { numerator <- numerator * x x <- x - 1 } x <- alpha2 + r - k - 1 while(x >= alpha2) { numerator <- numerator * x x <- x - 1 } x <- alpha1 + alpha2 + r - 1 while(x >= alpha1 + alpha2) { denominator <- denominator * x x <- x - 1 } choose(r, k) * numerator / denominator } plot(k, dbinom(k, r, 4/27), type="l",lty=2, lwd=2,ylab="P(k passives out of 50 trials | y, I)") lines(k, sapply(k, function(x) dbbinom(x, r, 5, 29)), lty=1,lwd=3,col="magenta") legend(25, 0.15, c("Binomial", "Beta-Binomial"), lwd=c(2,3),lty=c(2,1),col=c("black","magenta")) ################################################### ### code chunk number 12: jagsPosteriorMean ################################################### ls() rm(i,p) set.seed(45) # first, set up observed data response <- c(rep(1,5),rep(0,5)) # now compile the BUGS model m <- jags.model("../jags_examples/asymm_binomial_prior/asymm_binomial_prior.bug",data=list("response"=response)) # initial period of running the model to get it converged update(m,1000) # Now get samples res <- coda.samples(m, c("p","prediction1","prediction2"), thin = 20, n.iter=5000) # posterior predictions not completely consistent due to sampling noise print(apply(res[[1]],2,mean)) posterior.mean <- apply(res[[1]],2,mean) ################################################### ### code chunk number 13: densityBinomialNonConjugate ################################################### plot(density(res[[1]][,1]),xlab=expression(pi),ylab=expression(paste("p(",pi,")"))) ################################################### ### code chunk number 14: binomialNonConjugateIII ################################################### # plot posterior predictive distribution 2 preds2 <- table(res[[1]][,3]) plot(preds2/sum(preds2),type='h',xlab="r",ylab="P(r|y)",lwd=4,ylim=c(0,0.25)) posterior.mean.predicted.freqs <- dbinom(0:10,10,posterior.mean[1]) x <- 0:10 + 0.1 arrows(x, 0, x, posterior.mean.predicted.freqs,length=0,lty=2,lwd=4,col="magenta") legend(0,0.25,c(expression(paste("Marginizing over ",pi)),"With posterior mean"),lty=c(1,2),col=c("black","magenta"),lwd=4) ################################################### ### code chunk number 15: autocorrelation ################################################### m <- jags.model("../jags_examples/asymm_binomial_prior/asymm_binomial_prior.bug",data=list("response"=response)) # initial period of running the model to get it converged update(m,1000) res <- coda.samples(m, c("p","prediction1","prediction2"), thin = 1, n.iter=5000) autocorr(res,lags=c(1,5,10,20,50)) ################################################### ### code chunk number 16: binomialNonConjugateI ################################################### x <- seq(-0.05,1.05,by=0.001) y <- sapply(x, function(x) { if(x < 0 | x > 1) 0 else if(x > 0.5) 4/3 else 2/3 }) plot(x,y,type="l",xlab=expression(pi),ylab=expression(P(pi)),ylim=c(0,1.5)) ################################################### ### code chunk number 17: parameter_estimation.Rnw:1740-1741 ################################################### plot(density(res[[1]][,1]),xlab=expression(pi),ylab=expression(paste("p(",pi,")"))) ################################################### ### code chunk number 18: parameter_estimation.Rnw:1747-1748 ################################################### # plot posterior predictive distribution 2 preds2 <- table(res[[1]][,3]) plot(preds2/sum(preds2),type='h',xlab="r",ylab="P(r|y)",lwd=4,ylim=c(0,0.25)) posterior.mean.predicted.freqs <- dbinom(0:10,10,posterior.mean[1]) x <- 0:10 + 0.1 arrows(x, 0, x, posterior.mean.predicted.freqs,length=0,lty=2,lwd=4,col="magenta") legend(0,0.25,c(expression(paste("Marginizing over ",pi)),"With posterior mean"),lty=c(1,2),col=c("black","magenta"),lwd=4) ################################################### ### code chunk number 19: childF3 ################################################### pb <- read.table("../data/peterson_barney_data/pb.txt",header=T) pb.means <- with(pb,aggregate(data.frame(F0,F1,F2,F3), by=list(Type,Sex,Speaker,Vowel,IPA),mean)) names(pb.means) <- c("Type","Sex","Speaker","Vowel","IPA",names(pb.means)[6:9]) set.seed(18) response <- subset(pb.means,Vowel=="ae" & Type=="c")[["F3"]] M <- 10 # number of predictions to make m <- jags.model("../jags_examples/child_f3_formant/child_f3_formant.bug",data=list("response"=response,"M"=M)) update(m,1000) res <- coda.samples(m, c("mu","sigma","predictions"),n.iter=20000,thin=1) ################################################### ### code chunk number 20: childF3plot ################################################### # compute posterior mean and standard deviation mu.mean <- mean(res[[1]][,1]) sigma.mean <- mean(res[[1]][,12]) # plot Bayesian density estimate from <- 1800 to <- 4800 x <- seq(from,to,by=1) plot(x,dnorm(x,mu.mean,sigma.mean),col="magenta",lwd=3,lty=2,type="l",xlim=c(from,to),xlab="F3 formant frequency",ylab="p(F3)") lines(density(res[[1]][,2],from=from,to=to),lwd=3) rug(response) legend(from,0.0011,c("marginal density","density from\nposterior mean"),lty=c(1,2),lwd=2,col=c("black","magenta")) ################################################### ### code chunk number 21: childF3predictedMeanPlot ################################################### # plot density estimate over mean observed in 10 more observations from <- 2500 to <- 4100 plot(x,dnorm(x,mu.mean,sigma.mean/sqrt(M)),type="l",lty=2,col="magenta",lwd=3,xlim=c(from,to),xlab="F3 formant frequency",ylab="p(F3)") # posterior mean lines(density(apply(res[[1]][,2:11],1,mean,from=from,to=to)),lwd=3) # using samples to marginalize rug(response) legend(from,0.0035,c("marginal density","density from\nposterior mean"),lty=c(1,2),lwd=2,col=c("black","magenta")) ################################################### ### code chunk number 22: parameter_estimation.Rnw:1885-1887 (eval = FALSE) ################################################### ## # compute posterior mean and standard deviation ## mu.mean <- mean(res[[1]][,1]) ## sigma.mean <- mean(res[[1]][,12]) ## # plot Bayesian density estimate ## from <- 1800 ## to <- 4800 ## x <- seq(from,to,by=1) ## plot(x,dnorm(x,mu.mean,sigma.mean),col="magenta",lwd=3,lty=2,type="l",xlim=c(from,to),xlab="F3 formant frequency",ylab="p(F3)") ## lines(density(res[[1]][,2],from=from,to=to),lwd=3) ## rug(response) ## legend(from,0.0011,c("marginal density","density from\nposterior mean"),lty=c(1,2),lwd=2,col=c("black","magenta")) ## # plot density estimate over mean observed in 10 more observations ## from <- 2500 ## to <- 4100 ## plot(x,dnorm(x,mu.mean,sigma.mean/sqrt(M)),type="l",lty=2,col="magenta",lwd=3,xlim=c(from,to),xlab="F3 formant frequency",ylab="p(F3)") # posterior mean ## lines(density(apply(res[[1]][,2:11],1,mean,from=from,to=to)),lwd=3) # using samples to marginalize ## rug(response) ## legend(from,0.0035,c("marginal density","density from\nposterior mean"),lty=c(1,2),lwd=2,col=c("black","magenta"))