### R code from vignette source '../latent_variable_models_chapter/latent_variable_models.Rnw' ################################################### ### code chunk number 1: f1f2durTwoSpeakerLibs ################################################### library(lattice) library(rgl) library(mvtnorm) ################################################### ### code chunk number 2: f1f2durTwoSpeakerPrep ################################################### dat <- read.table("../data/mog_vowel_data/vallabha-etal-speaker-means.txt",header=T) dat <- subset(dat, ! (Language %in% "English" & Speaker==11)) sigma.vars <- paste("Sigma",1:9,sep="") mu.vars <- c("F1Mean","F2Mean","DurMean") make.covariance.matrix <- function(x) matrix(x, 3, 3) covs <- with(dat,by(dat[,sigma.vars],list(Speaker,Language,Vowel),make.covariance.matrix)) means <- with(dat,by(dat[,mu.vars],list(Speaker,Language,Vowel), function(x) x)) N <- c(100,200) ## points per sample: English, Japanese cov.english <- covs[,1,] means.english <- means[,1,] cov.japanese <- covs[,2,] means.japanese <- means[,2,] f <- function(means,covs,N) { if(! is.matrix(N)) N <- matrix(rep(N,length(means)),dim(means)[1],dim(means)[2]) result <- c() for(j in 1:dim(means)[2]) { # j is vowel for(i in 1:dim(means)[1]) { # i is speaker ##print(means[i,j][[1]]) ##print(cov[i,j][[1]]) tmp <- rmvnorm(N[i,j], as.numeric(means[i,j][[1]]),apply(covs[i,j][[1]],c(1,2),function(x) x[[1]])) tmp <- cbind(rep(j,N[j]),tmp) # speaker tmp <- cbind(rep(i,N[j]),tmp) # vowel result <- rbind(result,tmp) } } return(result) } vowels <- c("e","E","i","I") ## speaker 12 has good separation; speaker 7 has poor separation spk <- c(12,7) N <- 760 / length(spk) y <- data.frame(f(means.english[spk,],cov.english[spk,],N)) names(y) <- c("Speaker","Vowel","F1","F2","Duration") y$Vowel <- factor(vowels[y$Vowel]) my.pch <- 15:18 my.col <- gray(c(0,0.25,0.5,0.75)) ################################################### ### code chunk number 3: f1f2durTwoSpeaker ################################################### print(with(y,splom(~ y[,3:5]| Speaker,groups=Vowel,pch=my.pch,col=my.col,key=list(text=list(levels(Vowel)),points=list(pch=my.pch,col=my.col),space="right"), strip=function(which.panel,...) { ltext(0.5,0.5,paste("S",which.panel,sep="")) }))) ################################################### ### code chunk number 4: f1f2durMixedSpeakersPrep ################################################### set.seed(11) spk <- 1:19 N <- 760 / length(spk) y <- data.frame(f(means.english[spk,],cov.english[spk,],N)) names(y) <- c("Speaker","Vowel","F1","F2","Duration") y$Vowel <- factor(vowels[y$Vowel]) ################################################### ### code chunk number 5: f1f2durMixedSpeakers ################################################### print(with(y,splom(~ y[,3:5],groups=Vowel,pch=my.pch,col=my.col,key=list(text=list(levels(Vowel)),points=list(pch=my.pch,col=my.col),space="right"), strip=function(which.panel,...) { ltext(0.5,0.5,paste("S",which.panel,sep="")) }))) ################################################### ### code chunk number 6: mog1d ################################################### M <- 100 p <- c(0.35,0.65) mu <- c(0,4) s <- c(1,2) x <- seq(-5,10,by=0.01) idx <- rbinom(M,1,p[1])+1 y0 <- rnorm(M,mean=mu[idx],sd=s[idx]) my.col <- gray(c(0.5,0)) plot(x,dnorm(x,mu[1],s[1]),type='l',lwd=p[1]*5,col=my.col[1],ylab="P(x)",xlab="x",lty=2) lines(x,dnorm(x,mu[2],s[2]),lwd=p[2]*5,col=my.col[2],lty=2) points(y0,rep(-0.005,length(y0)),col=my.col[idx],pch=c(16,17)[idx]) y1 <- rnorm(M,mean=mu[idx],sd=s[idx]) points(y1,rep(0.41,length(y1)),col=my.col[1],pch=16) ################################################### ### code chunk number 7: loadMCMCpack ################################################### library(MCMCpack) ################################################### ### code chunk number 8: sigmaMuPriorPrep ################################################### mu <- seq(-4,4,by=0.025) sigma <- seq(0.05,2,by=0.025) sigma.mu <- expand.grid(sigma,mu) p <- function(sigma.mu,mu0=0,sigma0=2,nu=2,Ainv=10) { sigma <- sigma.mu[1] mu <- sigma.mu[2] #print(sigma.mu) return(diwish(matrix(sigma,1,1),nu, matrix(sigma0,1,1))* dnorm(mu,mu0,sigma*Ainv)) } z <- mapply(function(x,y) p(c(x,y)),sigma.mu[,1],sigma.mu[,2]) df <- data.frame(cbind(sigma.mu,z)) names(df) <- c("sigma","mu","z") ##wireframe(z ~ sigma * mu, df) sigmaMuPrior <- levelplot(z ~ sigma * mu, df,xlab=expression(sigma),ylab=expression(mu)) ################################################### ### code chunk number 9: sigmaMuPrior ################################################### print(sigmaMuPrior) ################################################### ### code chunk number 10: runGibbsSample ################################################### ### Note that I am approximating having truly hard-coded the mixture proportions by using a very peaked symmetric Dirichlet prior. library(bayesm) library(coda) M <- 4 niter <- 1000 vars <- c("F1","F2","Duration") res <- rnmixGibbs(Data=list(y=as.matrix(y[,vars])),Prior=list(ncomp=M,a=rep(10000,M)),Mcmc=list(R=niter,keep=1)) ################################################### ### code chunk number 11: f1f2MOGlibs ################################################### library(ellipse) ################################################### ### code chunk number 12: f1f2MOGresultsPrep ################################################### true.means <- with(y,by(y[,vars], Vowel, mean)) true.covs <- with(y,by(y[,vars], Vowel, cov)) mu.sample <- lapply(res$nmix$compdraw[[niter]], function(x) x$mu) rooti.sample <- lapply(res$nmix$compdraw[[niter]], function(x) x$rooti) rooti.to.cov <- function(x) { tmp <- solve(x) return(Conj(t(tmp)) %*% tmp) } my.pch <- 15:18 my.col <- gray(c(0,0.25,0.5,0.75)) #my.col <- rainbow(4) y$Vowel <- factor(vowels[y$Vowel]) plot.pair <- function(data,true.means,true.covs,mu.sample,rooti.sample,i,j,xlab,ylab) { with(y,xyplot(data[,j+2] ~ data[,i+2], groups=Vowel,xlab=xlab,ylab=ylab,col=my.col,pch=".",panel=function(...) { panel.xyplot(...) #print(c(i,j)) for(vowel in 1:4) { #print(vowel) #print(rooti.to.cov(rooti.sample[[vowel]])[c(i,j),c(i,j)]) this.cov <- rooti.to.cov(rooti.sample[[vowel]])[c(i,j),c(i,j)] #print(this.cov) llines(ellipse(this.cov,centre=mu.sample[[vowel]][c(i,j)]),lwd=2) llines(ellipse(true.covs[[vowel]][c(i,j),c(i,j)],centre=true.means[[vowel]][c(i,j)]),lwd=2,col=my.col[vowel],lty=2) } }))} ################################################### ### code chunk number 13: f1f2MOGresults ################################################### print(plot.pair(y,true.means,true.covs,mu.sample,rooti.sample,1,2,"F1","F2")) ################################################### ### code chunk number 14: f1DurMOGresults ################################################### print(plot.pair(y,true.means,true.covs,mu.sample,rooti.sample,1,3,"F1","Duration")) ################################################### ### code chunk number 15: f2DurMOGresults ################################################### print(plot.pair(y,true.means,true.covs,mu.sample,rooti.sample,2,3,"F2","Duration")) ################################################### ### code chunk number 16: confusionTableUnsupervisedPrep ################################################### ### graphical confusion table plot.confusion.matrix <- function(true.categories,guess.categories) { true.guess <- data.frame(true=true.categories,guess=guess.categories) canonicalize <- function(mat) { for(i in 1:(dim(mat)[1]-1)) { tmp <- apply(mat,2,function(x) x/sum(x)) best <- which.max(as.numeric(tmp[i,i:dim(mat)[2]])) + i - 1 #print(c(i,best)) mat[,c(i,best)] <- mat[,c(best,i)] } return(mat) } #print(xtabs(~ true + guess, true.guess)) confusions <- canonicalize(xtabs(~ true + guess, true.guess)) #print(confusions) xyplot(Var1 ~ Var2,expand.grid(1:dim(confusions)[1],1:dim(confusions)[1]),pch=15,cex=20*sqrt(confusions[4:1,]/sum(confusions)),xlim=c(0.5,4.5),ylim=c(0.5,4.5),scales=list(y=list(labels=c("",rev(levels(y$Vowel))))),xlab="Cluster",ylab="True vowel") # sqrt to ensure fidelity of area<->proportion mapping #return(confusions) } ################################################### ### code chunk number 17: confusionTableUnsupervised ################################################### ## unsupervised table print(plot.confusion.matrix(y$Vowel,res$nmix$zdraw[niter,])) ################################################### ### code chunk number 18: confusionTableSupervisedPrep ################################################### supervised.cluster.assignment <- function(x,true.means,true.covs,p=rep(1/length(true.means),length(true.means)),probability.match=T) { liks <- mapply(function(mu,sigma,p) return(dmvnorm(x,mu,sigma) * p), true.means,true.covs,p) if(probability.match) return(which.max(rmultinom(1,1,liks/sum(liks)))) else return(which.max(liks)) } supervised.assignments <- apply(y[,3:5],1,function(x) supervised.cluster.assignment(x,true.means,true.covs)) supervised.match <- function(x1,x2,same=F) (x1==x2)==same s <- sample(1:dim(y)[1], 500) s1 <- s[(1:250) * 2] s2 <- s[(1:250) * 2 - 1] cluster.assignment.performance <- function(samples, x1, x2, same=F) { tot <- dim(samples)[2] matched <- sum(samples[,x1] == samples[,x2]) if(same) return(matched/tot) else return((tot-matched)/tot) } conf.matrix.supervised <- plot.confusion.matrix(y[["Vowel"]],supervised.assignments) ################################################### ### code chunk number 19: confusionTableSupervised ################################################### print(conf.matrix.supervised) ################################################### ### code chunk number 20: setupCategoryProbsLearning ################################################### ## vowels are e,E,i,I #set.seed(20) #tmp <- c(2.1,2.6,5.0,9.9) set.seed(18) tmp <- c(2.1,2.6,14.0,29.9) vowelProbs <- tmp/sum(tmp) nobs <- 30 NN <- rmultinom(1,nobs,vowelProbs) matr <- matrix(rep(NN, length(spk)),length(spk),length(NN),byrow=T) y1 <- data.frame(f(means.english[spk,],cov.english[spk,],matr)) names(y1) <- c("Speaker","Vowel","F1","F2","Duration") y1$Vowel <- factor(vowels[y1$Vowel]) niter <- 1000 library(bayesm) dirichlet.alpha <- 4 res1 <- rnmixGibbs(Data=list(y=as.matrix(y1[,vars])),Prior=list(ncomp=M,a=rep(dirichlet.alpha,M)),Mcmc=list(R=niter,keep=1)) sm <- sample(1:dim(y1)[1], 500) sm1 <- sm[(1:250) * 2] sm2 <- sm[(1:250) * 2 - 1] res2 <- rnmixGibbs(Data=list(y=as.matrix(y1[,vars])),Prior=list(ncomp=M,a=rep(100000,M)),Mcmc=list(R=niter,keep=1)) ################################################### ### code chunk number 21: mogLearnedMixtureprobsF1F2 ################################################### true.means1 <- with(y1,by(y1[,vars], Vowel, mean)) true.covs1 <- with(y1,by(y1[,vars], Vowel, cov)) p.sample1 <- lapply(res1$nmix$compdraw[[niter]], function(x) x$mu) mu.sample1 <- lapply(res1$nmix$compdraw[[niter]], function(x) x$mu) rooti.sample1 <- lapply(res1$nmix$compdraw[[niter]], function(x) x$rooti) print(plot.pair(y1,true.means1,true.covs1,mu.sample1,rooti.sample1,1,2,"F1","F2")) ################################################### ### code chunk number 22: mogLearnedMixtureprobsF1Dur ################################################### print(plot.pair(y1,true.means1,true.covs1,mu.sample1,rooti.sample1,1,3,"F1","Duration")) ################################################### ### code chunk number 23: mogLearnedMixtureprobsF2Dur ################################################### print(plot.pair(y1,true.means1,true.covs1,mu.sample1,rooti.sample1,2,3,"F2","Duration")) ################################################### ### code chunk number 24: mogCantLearnMixtureprobsF1F2 ################################################### p.sample2 <- lapply(res2$nmix$compdraw[[niter]], function(x) x$mu) mu.sample2 <- lapply(res2$nmix$compdraw[[niter]], function(x) x$mu) rooti.sample2 <- lapply(res2$nmix$compdraw[[niter]], function(x) x$rooti) print(plot.pair(y1,true.means1,true.covs1,mu.sample2,rooti.sample2,1,2,"F1","F2")) ################################################### ### code chunk number 25: mogCantLearnMixtureprobsF1Dur ################################################### print(plot.pair(y1,true.means1,true.covs1,mu.sample2,rooti.sample2,1,3,"F1","Duration")) ################################################### ### code chunk number 26: mogCantLearnMixtureprobsF2Dur ################################################### print(plot.pair(y1,true.means1,true.covs1,mu.sample2,rooti.sample2,2,3,"F2","Duration")) ################################################### ### code chunk number 27: mogLearnedMixtureprobsConfusions ################################################### print(plot.confusion.matrix(y1$Vowel,res1$nmix$zdraw[niter,])) ################################################### ### code chunk number 28: mogCantLearnMixtureprobsConfusions ################################################### print(plot.confusion.matrix(y1$Vowel,res2$nmix$zdraw[niter,])) ################################################### ### code chunk number 29: ldaLibraries ################################################### library(lda) ################################################### ### code chunk number 30: ldaExamplePrep ################################################### visualize.2.topic <- function(corpus,lda.assignments) { f <- function(i) { words <- sapply(1:length(lda.assignments[[i]]), function(j) { word <- corpus$vocab[corpus$documents[[i]][1,j] + 1] ifelse(lda.assignments[[i]][j] == 1, toupper(word),tolower(word)) }) return(paste(paste(words,collapse=" "),"\n",sep="")) } invisible(lapply(1:length(lda.assignments),f)) } pretty.print.lda <- function(visualized.text) { for(t in visualized.text) cat(t) } rawDocuments <- readLines(file("../data/lda_examples/mini_corpus.txt")) corpus <- lexicalize(readLines(file("../data/lda_examples/mini_corpus.txt"))) ################################################### ### code chunk number 31: ldaExampleShowRawDocuments ################################################### cat(rawDocuments,sep="\n") ################################################### ### code chunk number 32: ldaExampleRunI ################################################### set.seed(20) system.time(m <- lda.collapsed.gibbs.sampler(corpus$documents,K=2,vocab=corpus$vocab,burnin=9999,num.iterations=10000,alpha=1,eta=0.1)) ################################################### ### code chunk number 33: ldaExample ################################################### pretty.print.lda(visualize.2.topic(corpus,m$assignments)) ################################################### ### code chunk number 34: ldaExampleRunII ################################################### set.seed(1) system.time(m2 <- lda.collapsed.gibbs.sampler(corpus$documents,K=2,vocab=corpus$vocab,burnin=9999,num.iterations=10000,alpha=0.01,eta=0.1)) ################################################### ### code chunk number 35: ldaExampleII ################################################### pretty.print.lda(visualize.2.topic(corpus,m2$assignments)) ################################################### ### code chunk number 36: varyetaPrep ################################################### #### how likely are words and to be in the same topic as a function of eta? ### first run some models run.models <- function(alpha,eta,n.iter=100,seeds=1:100) { chains <- lapply(seeds,function(seed) { set.seed(seed) lda.collapsed.gibbs.sampler(corpus$documents,K=2,vocab=corpus$vocab,burnin=n.iter-1,num.iterations=n.iter,alpha=alpha,eta=eta) }) assignments <- lapply(1:length(chains[[1]]$assignments),function(i) { sapply(chains,function(chain) chain$assignments[[i]]) }) return(list(assignments=assignments)) } ### then apply this function to the model output same.topic.rate <- function(assignments,i1,j1,i2,j2) mean(assignments[[i1]][j1,]==assignments[[i2]][j2,]) etas <- c(seq(0.05,1.05,by=0.2),2) set.seed(1) system.time(res.varyeta <- sapply(etas,function(eta) { ms <- run.models(1,eta,seeds=1:1000) return(c(same.topic.rate(ms$assignments,1,5,1,7),same.topic.rate(ms$assignments,2,2,4,5),same.topic.rate(ms$assignments,2,2,4,2),same.topic.rate(ms$assignments,1,5,4,10),same.topic.rate(ms$assignments,1,5,5,9))) })) ################################################### ### code chunk number 37: varyEta ################################################### for(i in 1:nrow(res.varyeta)) { if(i==1) plot(etas,res.varyeta[i,],type="l",ylim=c(0,1),lwd=3,ylab="Probability of same topic assignment",xlab=expression(sigma[phi])) else lines(etas,res.varyeta[i,],lty=i,lwd=3) } legend(0.5,0.3,expression(paste("Two instances of ",italic(dog)," in D1"), paste(italic(dog)," in D2 and D4"), paste(italic(dog)," in D2 and ",italic(cat), " in D4"), paste(italic(dog)," in D1 and ",italic(mat), " in D4"), paste(italic(dog)," in D1 and ",italic(sun), " in D5")),lty=1:5,lwd=3) ################################################### ### code chunk number 38: varyAlphaPrep ################################################### alphas <- c(0.1,0.25,0.5,0.75,1,1.5,2,5,10) system.time(res.varyalpha <- sapply(alphas,function(alpha) { ms <- run.models(alpha,0.1,seeds=1:1000,n.iter=100) return(c(same.topic.rate(ms$assignments,1,5,1,7),same.topic.rate(ms$assignments,2,2,4,5),same.topic.rate(ms$assignments,2,2,4,2),same.topic.rate(ms$assignments,1,5,4,10),same.topic.rate(ms$assignments,1,5,5,9))) })) ################################################### ### code chunk number 39: varyAlpha ################################################### for(i in 1:nrow(res.varyalpha)) { if(i==1) plot(alphas,res.varyalpha[i,],type="l",ylim=c(0,1),lwd=3,ylab="Probability of same topic assignment",xlab=expression(sigma[theta])) else lines(alphas,res.varyalpha[i,],lty=i,lwd=3) } legend(4,0.3,expression(paste("Two instances of ",italic(dog)," in D1"), paste(italic(dog)," in D2 and D4"), paste(italic(dog)," in D2 and ",italic(cat), " in D4"), paste(italic(dog)," in D1 and ",italic(mat), " in D4"), paste(italic(dog)," in D1 and ",italic(sun), " in D5")),lty=1:5,lwd=3)