### R code from vignette source '../multivariate_probability_chapter/multivariate_probability.Rnw' ################################################### ### code chunk number 1: helperFunction ################################################### roundN <- function(x,decimals=2,fore=5) sprintf(paste("%",fore,".",decimals,"f",sep=""),x) ################################################### ### code chunk number 2: jointCPD ################################################### plot(c(),c(),xlim=c(200,800),ylim=c(500,2500),xlab="f1",ylab="f2") rect(480,940,530,1020,col=8) ################################################### ### code chunk number 3: getCorrelationCoefficient ################################################### dat <- matrix(c(0.224,0.014,0.655,0.107),2,2) mu1 <- apply(dat,1,sum)[2] mu2 <- apply(dat,2,sum)[2] var1 <- mu1*(1-mu1) var2 <- mu2*(1-mu2) f <- function(x,y) dat[x+1,y+1]*(x-mu1)*(y-mu2) mapply(f,c(0,0,1,1),c(0,1,0,1)) cov12 <- sum(mapply(f,c(0,0,1,1),c(0,1,0,1))) ## covariance cor12 <- sum(mapply(f,c(0,0,1,1),c(0,1,0,1)))/sqrt(var1*var2) ## correlation ################################################### ### code chunk number 4: multivariateNormalPerspectivePlot ################################################### library(mvtnorm) sigma.xx <- 1 sigma.yy <- 4 sigma.xy <- 1.5 # positive covariance sigma <- matrix(c(sigma.xx,sigma.xy,sigma.xy,sigma.yy),ncol=2) x <- seq(-5,5,by=0.25) y <- x f <- function(x,y) { #cat("X: ", x,"\n") #cat("Y:", y," \n") xy <- cbind(x,y) #cat("XY: ", xy,"\n") dmvnorm(xy,c(0,0),sigma) } z <- outer(x,y,f) persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue", ltheta = 120, shade = 0.75,xlab="X1",ylab="X2",zlab="p(X1,X2)") ################################################### ### code chunk number 5: multivariateNormalContourPlot ################################################### contour(x, y, z, method = "edge",xlab=expression(X[1]),ylab=expression(X[2])) # do the same thing again with sigma.xy <- 1. # Max sigma.xy for this case is 2 ################################################### ### code chunk number 6: f1f2plotRaw ################################################### my.cols <- rainbow(5) x <- read.table("../data/peterson_barney_data/pb.txt",header=T,quote="") x$Character <- factor(sapply(as.character(x$Vowel), function(x) { if(x %in% "iy") return("i") if(x %in% "uw") return("u") if(x %in% "eh") return("E") if(x %in% "aa") return("a") if(x %in% "uh") return("U") return("X") })) fems <- subset(x, Type=="w") fems.aiuEU <- subset(fems, Vowel %in% c("iy","uw","aa","eh","uh")) my.xlim <- with(fems.aiuEU, rev(range(F2))) my.ylim <- with(fems.aiuEU, rev(range(F1))) with(fems.aiuEU,plot(F1 ~ F2, col=my.cols[Character],pch=as.character(Character),ylim=my.ylim,xlim=my.xlim)) ################################################### ### code chunk number 7: f1f2plotMVNorms ################################################### library(ellipse) library(plyr) dat <- ddply(fems.aiuEU, "Character", function(df) { return(c(mu.F1=mean(df$F1), mu.F2=mean(df$F2), sigma.F1=var(df$F1), sigma.F2=var(df$F2), sigma.F1F2=cov(df$F1,df$F2) )) }) plot(c(),c(),ylim=my.ylim,xlim=my.xlim,xlab="F2",ylab="F1") for(i in 1:(dim(dat)[1])) { with(dat[i,], lines(ellipse(matrix(c(sigma.F2,rep(sigma.F1F2,2),sigma.F1),2,2),centre=c(mu.F2,mu.F1)),col=my.cols[i])) text(x=dat[i,]$mu.F2,y=dat[i,]$mu.F1,labels=dat[i,]$Character,col=my.cols[i]) } ################################################### ### code chunk number 8: prepConditionalEntropyStuff ################################################### entropy <- function(x) { x <- x/sum(x); -1 * sum(x[x>0]*log2(x[x>0])) } cross.entropy <- function(y,x) { ## y is target/reference, x is guess y <- y/sum(y) x <- x/sum(x) -1 * sum(y[y>0] * log2(x[y>0])) } mutual.information <- function(xy) { ## assumes that xy is a 2-d array x <- apply(xy,1,sum) y <- apply(xy,2,sum) xy.independence <- x %o% y cross.entropy(xy,xy.independence) - entropy(xy) } pos1 <- read.table("../data/parsed-brown-first-pos-of-sentences-with-two-words.txt",quote="",stringsAsFactors=F) pos2 <- read.table("../data/parsed-brown-second-pos-of-sentences-with-two-words.txt",quote="",stringsAsFactors=F) pos12 <- data.frame(pos1=pos1$V1,pos2=pos2$V1,stringsAsFactors=F) pos12 <- pos12[with(pos12,grepl("^[A-Za-z$]+$",pos1) & grepl("^[A-Za-z$]+$",pos2)),] pos12.factor <- with(pos12,data.frame(pos1=factor(pos12$pos1,levels=union(pos1,pos2)),pos2=factor(pos12$pos2,levels=union(pos1,pos2)))) pos1.probs <- xtabs(~pos12$pos1)/nrow(pos12) pos2.probs <- xtabs(~pos12$pos2)/nrow(pos12) pos12.probs <- xtabs(~ pos1 + pos2, pos12)/nrow(pos12)