### R code from vignette source '../latent_variable_models_chapter/latent_variable_models.Rnw'

###################################################
### code chunk number 1: f1f2durTwoSpeakerLibs
###################################################
library(lattice)
library(rgl)
library(mvtnorm)


###################################################
### code chunk number 2: f1f2durTwoSpeakerPrep
###################################################
dat <- read.table("../data/mog_vowel_data/vallabha-etal-speaker-means.txt",header=T)
dat <- subset(dat, ! (Language %in% "English" & Speaker==11))
sigma.vars <- paste("Sigma",1:9,sep="")
mu.vars <- c("F1Mean","F2Mean","DurMean")
make.covariance.matrix <- function(x) matrix(x, 3, 3)
covs <- with(dat,by(dat[,sigma.vars],list(Speaker,Language,Vowel),make.covariance.matrix))
means <- with(dat,by(dat[,mu.vars],list(Speaker,Language,Vowel), function(x) x))
N <- c(100,200) ## points per sample: English, Japanese
cov.english <- covs[,1,]
means.english <- means[,1,]
cov.japanese <- covs[,2,]
means.japanese <- means[,2,]
f <- function(means,covs,N) {
  if(! is.matrix(N))
    N <- matrix(rep(N,length(means)),dim(means)[1],dim(means)[2])
  result <- c()
  for(j in 1:dim(means)[2]) { # j is vowel
    for(i in 1:dim(means)[1]) { # i is speaker
      ##print(means[i,j][[1]])
      ##print(cov[i,j][[1]])
      tmp <- rmvnorm(N[i,j], as.numeric(means[i,j][[1]]),apply(covs[i,j][[1]],c(1,2),function(x) x[[1]]))
      tmp <- cbind(rep(j,N[j]),tmp) # speaker
      tmp <- cbind(rep(i,N[j]),tmp) # vowel
      result <- rbind(result,tmp)
    }
  }
  return(result)
}
vowels <- c("e","E","i","I")
## speaker 12 has good separation; speaker 7 has poor separation
spk <- c(12,7)
N <- 760 / length(spk)
y <- data.frame(f(means.english[spk,],cov.english[spk,],N))
names(y) <- c("Speaker","Vowel","F1","F2","Duration")
y$Vowel <- factor(vowels[y$Vowel])
my.pch <- 15:18
my.col <- gray(c(0,0.25,0.5,0.75))


###################################################
### code chunk number 3: f1f2durTwoSpeaker
###################################################
print(with(y,splom(~ y[,3:5]| Speaker,groups=Vowel,pch=my.pch,col=my.col,key=list(text=list(levels(Vowel)),points=list(pch=my.pch,col=my.col),space="right"),
  strip=function(which.panel,...) {
 ltext(0.5,0.5,paste("S",which.panel,sep=""))
})))


###################################################
### code chunk number 4: f1f2durMixedSpeakersPrep
###################################################
set.seed(11)
spk <- 1:19
N <- 760 / length(spk)
y <- data.frame(f(means.english[spk,],cov.english[spk,],N))
names(y) <- c("Speaker","Vowel","F1","F2","Duration")
y$Vowel <- factor(vowels[y$Vowel])


###################################################
### code chunk number 5: f1f2durMixedSpeakers
###################################################
print(with(y,splom(~ y[,3:5],groups=Vowel,pch=my.pch,col=my.col,key=list(text=list(levels(Vowel)),points=list(pch=my.pch,col=my.col),space="right"),
  strip=function(which.panel,...) {
 ltext(0.5,0.5,paste("S",which.panel,sep=""))
})))


###################################################
### code chunk number 6: mog1d
###################################################
M <- 100
p <- c(0.35,0.65)
mu <- c(0,4)
s <- c(1,2)
x <- seq(-5,10,by=0.01)
idx <- rbinom(M,1,p[1])+1
y0 <- rnorm(M,mean=mu[idx],sd=s[idx])
my.col <- gray(c(0.5,0))
plot(x,dnorm(x,mu[1],s[1]),type='l',lwd=p[1]*5,col=my.col[1],ylab="P(x)",xlab="x",lty=2)
lines(x,dnorm(x,mu[2],s[2]),lwd=p[2]*5,col=my.col[2],lty=2)
points(y0,rep(-0.005,length(y0)),col=my.col[idx],pch=c(16,17)[idx])
y1 <- rnorm(M,mean=mu[idx],sd=s[idx])
points(y1,rep(0.41,length(y1)),col=my.col[1],pch=16)


###################################################
### code chunk number 7: loadMCMCpack
###################################################
library(MCMCpack)


###################################################
### code chunk number 8: sigmaMuPriorPrep
###################################################
mu <- seq(-4,4,by=0.025)
sigma <- seq(0.05,2,by=0.025)
sigma.mu <- expand.grid(sigma,mu)
p <- function(sigma.mu,mu0=0,sigma0=2,nu=2,Ainv=10) {
	sigma <- sigma.mu[1]
	mu <- sigma.mu[2]
	#print(sigma.mu)
	return(diwish(matrix(sigma,1,1),nu, matrix(sigma0,1,1))* dnorm(mu,mu0,sigma*Ainv))
}
z <- mapply(function(x,y) p(c(x,y)),sigma.mu[,1],sigma.mu[,2])
df <- data.frame(cbind(sigma.mu,z))
names(df) <- c("sigma","mu","z")
##wireframe(z ~ sigma * mu, df)
sigmaMuPrior <- levelplot(z ~ sigma * mu, df,xlab=expression(sigma),ylab=expression(mu))


###################################################
### code chunk number 9: sigmaMuPrior
###################################################
print(sigmaMuPrior)


###################################################
### code chunk number 10: runGibbsSample
###################################################
### Note that I am approximating having truly hard-coded the mixture proportions by using a very peaked symmetric Dirichlet prior.
library(bayesm)
library(coda)
M <- 4
niter <- 1000
vars <- c("F1","F2","Duration")
res <- rnmixGibbs(Data=list(y=as.matrix(y[,vars])),Prior=list(ncomp=M,a=rep(10000,M)),Mcmc=list(R=niter,keep=1))


###################################################
### code chunk number 11: f1f2MOGlibs
###################################################
library(ellipse)


###################################################
### code chunk number 12: f1f2MOGresultsPrep
###################################################
true.means <- with(y,by(y[,vars], Vowel, mean))
true.covs <- with(y,by(y[,vars], Vowel, cov))
mu.sample <- lapply(res$nmix$compdraw[[niter]], function(x) x$mu)
rooti.sample <- lapply(res$nmix$compdraw[[niter]], function(x) x$rooti)
rooti.to.cov <- function(x) {
	tmp <- solve(x)
	return(Conj(t(tmp)) %*% tmp)
}
my.pch <- 15:18
my.col <- gray(c(0,0.25,0.5,0.75))
#my.col <- rainbow(4)
y$Vowel <- factor(vowels[y$Vowel])
plot.pair <- function(data,true.means,true.covs,mu.sample,rooti.sample,i,j,xlab,ylab) {
	with(y,xyplot(data[,j+2] ~ data[,i+2], groups=Vowel,xlab=xlab,ylab=ylab,col=my.col,pch=".",panel=function(...) {
		panel.xyplot(...)
		#print(c(i,j))
		for(vowel in 1:4) {
			#print(vowel)
			#print(rooti.to.cov(rooti.sample[[vowel]])[c(i,j),c(i,j)])
			this.cov <- rooti.to.cov(rooti.sample[[vowel]])[c(i,j),c(i,j)]
			#print(this.cov)
			llines(ellipse(this.cov,centre=mu.sample[[vowel]][c(i,j)]),lwd=2)
			llines(ellipse(true.covs[[vowel]][c(i,j),c(i,j)],centre=true.means[[vowel]][c(i,j)]),lwd=2,col=my.col[vowel],lty=2)
		}
	}))}


###################################################
### code chunk number 13: f1f2MOGresults
###################################################

print(plot.pair(y,true.means,true.covs,mu.sample,rooti.sample,1,2,"F1","F2"))


###################################################
### code chunk number 14: f1DurMOGresults
###################################################
print(plot.pair(y,true.means,true.covs,mu.sample,rooti.sample,1,3,"F1","Duration"))


###################################################
### code chunk number 15: f2DurMOGresults
###################################################
print(plot.pair(y,true.means,true.covs,mu.sample,rooti.sample,2,3,"F2","Duration"))


###################################################
### code chunk number 16: confusionTableUnsupervisedPrep
###################################################
### graphical confusion table
plot.confusion.matrix <- function(true.categories,guess.categories) {
  true.guess <- data.frame(true=true.categories,guess=guess.categories)
  canonicalize <- function(mat) {
	 for(i in 1:(dim(mat)[1]-1)) {
	 	tmp <- apply(mat,2,function(x) x/sum(x))
		best <- which.max(as.numeric(tmp[i,i:dim(mat)[2]])) + i - 1
		#print(c(i,best))
		mat[,c(i,best)] <- mat[,c(best,i)]
	 }
	 return(mat)
  }
  #print(xtabs(~ true + guess, true.guess))
  confusions <- canonicalize(xtabs(~ true + guess, true.guess))
  #print(confusions)
  xyplot(Var1 ~ Var2,expand.grid(1:dim(confusions)[1],1:dim(confusions)[1]),pch=15,cex=20*sqrt(confusions[4:1,]/sum(confusions)),xlim=c(0.5,4.5),ylim=c(0.5,4.5),scales=list(y=list(labels=c("",rev(levels(y$Vowel))))),xlab="Cluster",ylab="True vowel") # sqrt to ensure fidelity of area<->proportion mapping
  #return(confusions)
  }


###################################################
### code chunk number 17: confusionTableUnsupervised
###################################################
## unsupervised table
print(plot.confusion.matrix(y$Vowel,res$nmix$zdraw[niter,]))


###################################################
### code chunk number 18: confusionTableSupervisedPrep
###################################################
supervised.cluster.assignment <- function(x,true.means,true.covs,p=rep(1/length(true.means),length(true.means)),probability.match=T) {
	liks <- mapply(function(mu,sigma,p)
		return(dmvnorm(x,mu,sigma) * p), true.means,true.covs,p)
	if(probability.match)
		return(which.max(rmultinom(1,1,liks/sum(liks))))
	else
		return(which.max(liks))
}
supervised.assignments <- apply(y[,3:5],1,function(x) supervised.cluster.assignment(x,true.means,true.covs))
supervised.match <- function(x1,x2,same=F)  (x1==x2)==same
s <- sample(1:dim(y)[1], 500)
s1 <- s[(1:250) * 2]
s2 <- s[(1:250) * 2 - 1]
cluster.assignment.performance <- function(samples, x1, x2, same=F) {
  tot <- dim(samples)[2]
	matched <- sum(samples[,x1] == samples[,x2])
	if(same)
	  return(matched/tot)
	else
	  return((tot-matched)/tot)
	}
conf.matrix.supervised <- plot.confusion.matrix(y[["Vowel"]],supervised.assignments)


###################################################
### code chunk number 19: confusionTableSupervised
###################################################
print(conf.matrix.supervised)


###################################################
### code chunk number 20: setupCategoryProbsLearning
###################################################
## vowels are e,E,i,I
#set.seed(20)
#tmp <- c(2.1,2.6,5.0,9.9)
set.seed(18)
tmp <- c(2.1,2.6,14.0,29.9)
vowelProbs <- tmp/sum(tmp)
nobs <- 30
NN <- rmultinom(1,nobs,vowelProbs)
matr <- matrix(rep(NN, length(spk)),length(spk),length(NN),byrow=T)
y1 <- data.frame(f(means.english[spk,],cov.english[spk,],matr))
names(y1) <- c("Speaker","Vowel","F1","F2","Duration")
y1$Vowel <- factor(vowels[y1$Vowel])
niter <- 1000
library(bayesm)
dirichlet.alpha <- 4
res1 <- rnmixGibbs(Data=list(y=as.matrix(y1[,vars])),Prior=list(ncomp=M,a=rep(dirichlet.alpha,M)),Mcmc=list(R=niter,keep=1))
sm <- sample(1:dim(y1)[1], 500)
sm1 <- sm[(1:250) * 2]
sm2 <- sm[(1:250) * 2 - 1]
res2 <- rnmixGibbs(Data=list(y=as.matrix(y1[,vars])),Prior=list(ncomp=M,a=rep(100000,M)),Mcmc=list(R=niter,keep=1))


###################################################
### code chunk number 21: mogLearnedMixtureprobsF1F2
###################################################
true.means1 <- with(y1,by(y1[,vars], Vowel, mean))
true.covs1 <- with(y1,by(y1[,vars], Vowel, cov))
p.sample1 <- lapply(res1$nmix$compdraw[[niter]], function(x) x$mu)
mu.sample1 <- lapply(res1$nmix$compdraw[[niter]], function(x) x$mu)
rooti.sample1 <- lapply(res1$nmix$compdraw[[niter]], function(x) x$rooti)
print(plot.pair(y1,true.means1,true.covs1,mu.sample1,rooti.sample1,1,2,"F1","F2"))


###################################################
### code chunk number 22: mogLearnedMixtureprobsF1Dur
###################################################
print(plot.pair(y1,true.means1,true.covs1,mu.sample1,rooti.sample1,1,3,"F1","Duration"))


###################################################
### code chunk number 23: mogLearnedMixtureprobsF2Dur
###################################################
print(plot.pair(y1,true.means1,true.covs1,mu.sample1,rooti.sample1,2,3,"F2","Duration"))


###################################################
### code chunk number 24: mogCantLearnMixtureprobsF1F2
###################################################
p.sample2 <- lapply(res2$nmix$compdraw[[niter]], function(x) x$mu)
mu.sample2 <- lapply(res2$nmix$compdraw[[niter]], function(x) x$mu)
rooti.sample2 <- lapply(res2$nmix$compdraw[[niter]], function(x) x$rooti)
print(plot.pair(y1,true.means1,true.covs1,mu.sample2,rooti.sample2,1,2,"F1","F2"))


###################################################
### code chunk number 25: mogCantLearnMixtureprobsF1Dur
###################################################
print(plot.pair(y1,true.means1,true.covs1,mu.sample2,rooti.sample2,1,3,"F1","Duration"))


###################################################
### code chunk number 26: mogCantLearnMixtureprobsF2Dur
###################################################
print(plot.pair(y1,true.means1,true.covs1,mu.sample2,rooti.sample2,2,3,"F2","Duration"))


###################################################
### code chunk number 27: mogLearnedMixtureprobsConfusions
###################################################
print(plot.confusion.matrix(y1$Vowel,res1$nmix$zdraw[niter,]))


###################################################
### code chunk number 28: mogCantLearnMixtureprobsConfusions
###################################################
print(plot.confusion.matrix(y1$Vowel,res2$nmix$zdraw[niter,]))


###################################################
### code chunk number 29: ldaLibraries
###################################################
library(lda)


###################################################
### code chunk number 30: ldaExamplePrep
###################################################
visualize.2.topic <- function(corpus,lda.assignments) {
	f <- function(i) {
		words <- sapply(1:length(lda.assignments[[i]]), function(j) {
			word <- corpus$vocab[corpus$documents[[i]][1,j] + 1]
			ifelse(lda.assignments[[i]][j] == 1, toupper(word),tolower(word))
		})
		return(paste(paste(words,collapse=" "),"\n",sep=""))
	}
	invisible(lapply(1:length(lda.assignments),f))
}

pretty.print.lda <- function(visualized.text) {
	for(t in visualized.text)
	  cat(t)
}
rawDocuments <- readLines(file("../data/lda_examples/mini_corpus.txt"))
corpus <- lexicalize(readLines(file("../data/lda_examples/mini_corpus.txt")))


###################################################
### code chunk number 31: ldaExampleShowRawDocuments
###################################################
cat(rawDocuments,sep="\n")


###################################################
### code chunk number 32: ldaExampleRunI
###################################################
set.seed(20)
system.time(m <- lda.collapsed.gibbs.sampler(corpus$documents,K=2,vocab=corpus$vocab,burnin=9999,num.iterations=10000,alpha=1,eta=0.1))


###################################################
### code chunk number 33: ldaExample
###################################################
pretty.print.lda(visualize.2.topic(corpus,m$assignments))


###################################################
### code chunk number 34: ldaExampleRunII
###################################################
set.seed(1)
system.time(m2 <- lda.collapsed.gibbs.sampler(corpus$documents,K=2,vocab=corpus$vocab,burnin=9999,num.iterations=10000,alpha=0.01,eta=0.1))


###################################################
### code chunk number 35: ldaExampleII
###################################################
pretty.print.lda(visualize.2.topic(corpus,m2$assignments))


###################################################
### code chunk number 36: varyetaPrep
###################################################
#### how likely are words <i1,j1> and <i2,j2> to be in the same topic as a function of eta?
### first run some models
run.models <- function(alpha,eta,n.iter=100,seeds=1:100) {
	 chains <- lapply(seeds,function(seed) {
	 	set.seed(seed)
	 	lda.collapsed.gibbs.sampler(corpus$documents,K=2,vocab=corpus$vocab,burnin=n.iter-1,num.iterations=n.iter,alpha=alpha,eta=eta)
	 })
	 assignments <- lapply(1:length(chains[[1]]$assignments),function(i) {
	 	sapply(chains,function(chain) chain$assignments[[i]])
	 	
	 })
	return(list(assignments=assignments))
}

### then apply this function to the model output
same.topic.rate <- function(assignments,i1,j1,i2,j2) 
	mean(assignments[[i1]][j1,]==assignments[[i2]][j2,])
etas <- c(seq(0.05,1.05,by=0.2),2)
set.seed(1)
system.time(res.varyeta <- sapply(etas,function(eta) {
	ms <- run.models(1,eta,seeds=1:1000)
	return(c(same.topic.rate(ms$assignments,1,5,1,7),same.topic.rate(ms$assignments,2,2,4,5),same.topic.rate(ms$assignments,2,2,4,2),same.topic.rate(ms$assignments,1,5,4,10),same.topic.rate(ms$assignments,1,5,5,9)))
}))


###################################################
### code chunk number 37: varyEta
###################################################
for(i in 1:nrow(res.varyeta)) {
	if(i==1)
	  plot(etas,res.varyeta[i,],type="l",ylim=c(0,1),lwd=3,ylab="Probability of same topic assignment",xlab=expression(sigma[phi]))
	else
	  lines(etas,res.varyeta[i,],lty=i,lwd=3)
}
legend(0.5,0.3,expression(paste("Two instances of ",italic(dog)," in D1"),
			paste(italic(dog)," in D2 and D4"),
			paste(italic(dog)," in D2 and ",italic(cat), " in D4"),
			paste(italic(dog)," in D1 and ",italic(mat), " in D4"),
			paste(italic(dog)," in D1 and ",italic(sun), " in D5")),lty=1:5,lwd=3)


###################################################
### code chunk number 38: varyAlphaPrep
###################################################
alphas <- c(0.1,0.25,0.5,0.75,1,1.5,2,5,10)
system.time(res.varyalpha <- sapply(alphas,function(alpha) {
	ms <- run.models(alpha,0.1,seeds=1:1000,n.iter=100)
	return(c(same.topic.rate(ms$assignments,1,5,1,7),same.topic.rate(ms$assignments,2,2,4,5),same.topic.rate(ms$assignments,2,2,4,2),same.topic.rate(ms$assignments,1,5,4,10),same.topic.rate(ms$assignments,1,5,5,9)))
}))


###################################################
### code chunk number 39: varyAlpha
###################################################
for(i in 1:nrow(res.varyalpha)) {
	if(i==1)
	  plot(alphas,res.varyalpha[i,],type="l",ylim=c(0,1),lwd=3,ylab="Probability of same topic assignment",xlab=expression(sigma[theta]))
	else
	  lines(alphas,res.varyalpha[i,],lty=i,lwd=3)
}
legend(4,0.3,expression(paste("Two instances of ",italic(dog)," in D1"),
			paste(italic(dog)," in D2 and D4"),
			paste(italic(dog)," in D2 and ",italic(cat), " in D4"),
			paste(italic(dog)," in D1 and ",italic(mat), " in D4"),
			paste(italic(dog)," in D1 and ",italic(sun), " in D5")),lty=1:5,lwd=3)