### R code from vignette source '../multivariate_probability_chapter/multivariate_probability.Rnw'

###################################################
### code chunk number 1: helperFunction
###################################################
roundN <- function(x,decimals=2,fore=5) sprintf(paste("%",fore,".",decimals,"f",sep=""),x)


###################################################
### code chunk number 2: jointCPD
###################################################
plot(c(),c(),xlim=c(200,800),ylim=c(500,2500),xlab="f1",ylab="f2")
rect(480,940,530,1020,col=8)


###################################################
### code chunk number 3: getCorrelationCoefficient
###################################################
dat <- matrix(c(0.224,0.014,0.655,0.107),2,2)
mu1 <- apply(dat,1,sum)[2]
mu2 <- apply(dat,2,sum)[2]
var1 <- mu1*(1-mu1)
var2 <- mu2*(1-mu2)
f <- function(x,y) dat[x+1,y+1]*(x-mu1)*(y-mu2)
mapply(f,c(0,0,1,1),c(0,1,0,1))
cov12 <- sum(mapply(f,c(0,0,1,1),c(0,1,0,1))) ## covariance
cor12 <- sum(mapply(f,c(0,0,1,1),c(0,1,0,1)))/sqrt(var1*var2) ## correlation


###################################################
### code chunk number 4: multivariateNormalPerspectivePlot
###################################################
library(mvtnorm)
sigma.xx <- 1
sigma.yy <- 4
sigma.xy <- 1.5 # positive covariance
sigma <- matrix(c(sigma.xx,sigma.xy,sigma.xy,sigma.yy),ncol=2)
x <- seq(-5,5,by=0.25)
y <- x
f <- function(x,y)  { 
  #cat("X: ", x,"\n")
  #cat("Y:", y," \n")
  xy <- cbind(x,y)
  #cat("XY: ", xy,"\n")
  dmvnorm(xy,c(0,0),sigma)
}
z <- outer(x,y,f)
persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue", 
  ltheta = 120, shade = 0.75,xlab="X1",ylab="X2",zlab="p(X1,X2)")


###################################################
### code chunk number 5: multivariateNormalContourPlot
###################################################
contour(x, y, z, method = "edge",xlab=expression(X[1]),ylab=expression(X[2]))
# do the same thing again with sigma.xy <- 1.  
# Max sigma.xy for this case is 2


###################################################
### code chunk number 6: f1f2plotRaw
###################################################
my.cols <- rainbow(5)

x <- read.table("../data/peterson_barney_data/pb.txt",header=T,quote="")
x$Character <- factor(sapply(as.character(x$Vowel), function(x) {
	if(x %in% "iy")
		return("i")
	if(x %in% "uw")
		return("u")
	if(x %in% "eh")
		return("E")
	if(x %in% "aa")
		return("a")
	if(x %in% "uh")
		return("U")
	return("X")
	}))
	fems <- subset(x, Type=="w")
fems.aiuEU <- subset(fems, Vowel %in% c("iy","uw","aa","eh","uh"))

my.xlim <- with(fems.aiuEU, rev(range(F2)))
my.ylim <- with(fems.aiuEU, rev(range(F1)))
with(fems.aiuEU,plot(F1 ~ F2, col=my.cols[Character],pch=as.character(Character),ylim=my.ylim,xlim=my.xlim))


###################################################
### code chunk number 7: f1f2plotMVNorms
###################################################
library(ellipse)
library(plyr)
dat <- ddply(fems.aiuEU, "Character", function(df) {
	return(c(mu.F1=mean(df$F1),
				mu.F2=mean(df$F2),
				sigma.F1=var(df$F1),
				sigma.F2=var(df$F2),
				sigma.F1F2=cov(df$F1,df$F2)
				))
	})
plot(c(),c(),ylim=my.ylim,xlim=my.xlim,xlab="F2",ylab="F1")
for(i in 1:(dim(dat)[1])) {
	with(dat[i,], lines(ellipse(matrix(c(sigma.F2,rep(sigma.F1F2,2),sigma.F1),2,2),centre=c(mu.F2,mu.F1)),col=my.cols[i]))
	text(x=dat[i,]$mu.F2,y=dat[i,]$mu.F1,labels=dat[i,]$Character,col=my.cols[i])
}


###################################################
### code chunk number 8: prepConditionalEntropyStuff
###################################################
entropy <- function(x) { x <- x/sum(x);  -1 * sum(x[x>0]*log2(x[x>0])) }
cross.entropy <- function(y,x) { ## y is target/reference, x is guess
  y <- y/sum(y)
  x <- x/sum(x)
  -1 * sum(y[y>0] * log2(x[y>0]))
}

mutual.information <- function(xy) { ## assumes that xy is a 2-d array
  x <- apply(xy,1,sum)
  y <- apply(xy,2,sum)
  xy.independence <- x %o% y
  cross.entropy(xy,xy.independence) - entropy(xy)
}
pos1 <- read.table("../data/parsed-brown-first-pos-of-sentences-with-two-words.txt",quote="",stringsAsFactors=F)
pos2 <- read.table("../data/parsed-brown-second-pos-of-sentences-with-two-words.txt",quote="",stringsAsFactors=F)
pos12 <- data.frame(pos1=pos1$V1,pos2=pos2$V1,stringsAsFactors=F)
pos12 <- pos12[with(pos12,grepl("^[A-Za-z$]+$",pos1) & grepl("^[A-Za-z$]+$",pos2)),]
pos12.factor <- with(pos12,data.frame(pos1=factor(pos12$pos1,levels=union(pos1,pos2)),pos2=factor(pos12$pos2,levels=union(pos1,pos2))))
pos1.probs <- xtabs(~pos12$pos1)/nrow(pos12)
pos2.probs <- xtabs(~pos12$pos2)/nrow(pos12)
pos12.probs <- xtabs(~ pos1 + pos2, pos12)/nrow(pos12)