### R code from vignette source '../generalized_linear_models_chapter/generalized_linear_models.Rnw'

###################################################
### code chunk number 1: setOptionsAndLoadLibraries
###################################################
library(lattice)
library(plyr)
options(lineWidth=60)
myFormat <- function(...) {
  tmp <- format(...)
  return(sub("e(.*)","\\\\\\\\times 10^{\\1}",tmp))
}


###################################################
### code chunk number 2: lmPrediction
###################################################
x <- seq(-5,5,by=0.1)
y <- x / 2 + 1


###################################################
### code chunk number 3: lmPredictionPrint
###################################################
print(xyplot(y ~ x, type="l",lwd=3,ylim=c(-5,5),xlim=c(-5,5),col="black"),main="Predicted mean response y given x for a linear model")


###################################################
### code chunk number 4: lmDistribution
###################################################
x <- seq(-5,5,by=0.1)
y <- seq(-5,5,by=0.1)
grid <- expand.grid(x=x,y=y)
grid[["z"]] <- dnorm(grid[["y"]],mean=(1/2) * grid[["x"]] + 1, sd=2)


###################################################
### code chunk number 5: lmDistributionPrint
###################################################
print(levelplot(z ~ x*y,grid, main="Conditional probability density P(y|x) for a linear model"),ylim=c(-5,5),xlim=c(-5,5))


###################################################
### code chunk number 6: lmSimpleExampleFreq
###################################################
library(languageR)
plot(exp(RTlexdec) ~ WrittenFrequency, data=english, pch=".")
rt.lm <- lm(exp(RTlexdec) ~ WrittenFrequency, data=english)
abline(rt.lm,col=6,lwd=4)


###################################################
### code chunk number 7: lmSimpleExampleFreqRedux
###################################################
library(languageR)
plot(exp(RTlexdec) ~ WrittenFrequency, data=english, pch=".")
rt.lm <- lm(exp(RTlexdec) ~ WrittenFrequency, data=english)
abline(rt.lm,col=6,lwd=4)


###################################################
### code chunk number 8: lmThreePts
###################################################
errbar <- function(x0,y0,x1,y1,...) {
  arrows(x0,y0,x1,y1,angle=90,...)
  arrows(x1,y1,x0,y0,angle=90,...)
}
x <- c(4,6,8)
y <- c(800,775,700)
old.par <- par(lwd=2)
plot(x,y,xlim=c(3.5,8.5),ylim=c(690,850))
abline(1000,-37.5, lty=2,col=2) # goes through points 2 & 3
errbar(4.01,801,4.01,850,lty=2,col=2)
abline(900,-25, lty=3,col=3) # goes through points 1 & 3
errbar(6.01,750,6.01,774,lty=3,col=3)
abline(850,-12.5, lty=4,col=4) # goes through points 2 & 3
errbar(8.01,750,8.01,701,lty=4,col=4)
abline(910,-25, lty=1,col=1) # goes through points 1 & 3
errbar(3.99,810,3.99,800,lty=1,col=1)
errbar(5.99,760,5.99,775,lty=1,col=1)
errbar(7.99,710,7.99,700,lty=1,col=1)
legend(7,850,c("Summed squared error","2500","625","2500","425"), lty=c(NA,2,3,4,1),col=c(NA,2,3,4,1),cex=1)
par(old.par)


###################################################
### code chunk number 9: lmThreePointsPrint (eval = FALSE)
###################################################
## errbar <- function(x0,y0,x1,y1,...) {
##   arrows(x0,y0,x1,y1,angle=90,...)
##   arrows(x1,y1,x0,y0,angle=90,...)
## }
## x <- c(4,6,8)
## y <- c(800,775,700)
## old.par <- par(lwd=2)
## plot(x,y,xlim=c(3.5,8.5),ylim=c(690,850))
## abline(1000,-37.5, lty=2,col=2) # goes through points 2 & 3
## errbar(4.01,801,4.01,850,lty=2,col=2)
## abline(900,-25, lty=3,col=3) # goes through points 1 & 3
## errbar(6.01,750,6.01,774,lty=3,col=3)
## abline(850,-12.5, lty=4,col=4) # goes through points 2 & 3
## errbar(8.01,750,8.01,701,lty=4,col=4)
## abline(910,-25, lty=1,col=1) # goes through points 1 & 3
## errbar(3.99,810,3.99,800,lty=1,col=1)
## errbar(5.99,760,5.99,775,lty=1,col=1)
## errbar(7.99,710,7.99,700,lty=1,col=1)
## legend(7,850,c("Summed squared error","2500","625","2500","425"), lty=c(NA,2,3,4,1),col=c(NA,2,3,4,1),cex=1)
## par(old.par)


###################################################
### code chunk number 10: rt3lm
###################################################
rt3.lm <- lm(y ~ x)
rt3.lm
sum(resid(rt3.lm)^2)


###################################################
### code chunk number 11: englishYoungI
###################################################
english.young <- subset(english,AgeSubject=="young")
attach(english.young)
rt.freq.lm <- lm(exp(RTnaming) ~ WrittenFrequency)


###################################################
### code chunk number 12: englishYoungII
###################################################
rt.res <- resid(rt.freq.lm)
rt.ncount.lm <- lm(rt.res ~ Ncount)
plot(Ncount, rt.res)
abline(rt.ncount.lm,col=2,lwd=3)
detach()


###################################################
### code chunk number 13: englishYoung
###################################################
english.young <- subset(english,AgeSubject=="young")
attach(english.young)
rt.freq.lm <- lm(exp(RTnaming) ~ WrittenFrequency)
rt.freq.lm
rt.res <- resid(rt.freq.lm)
rt.ncount.lm <- lm(rt.res ~ Ncount)
plot(Ncount, rt.res)
abline(rt.ncount.lm,col=2,lwd=3)
detach()
rt.ncount.lm


###################################################
### code chunk number 14: rtBothLM
###################################################
rt.both.lm <- lm(exp(RTnaming) ~ WrittenFrequency + Ncount, data=english.young)
rt.both.lm


###################################################
### code chunk number 15: sampleMean
###################################################
#Sample mean
dat <- rnorm(100,0,1)
hist(dat,breaks=20,prob=T, ylim=c(0,1),xlab="Observed data",main="",cex=2)
arrows(t.test(dat)$conf.int[1],0.9,t.test(dat)$conf.int[2],0.9,
  code=3,angle=90,lwd=3,col=2)
points(mean(dat),0.9,pch=19,cex=2)
arrows(-0.3,0.5,-0.05,0.85,lwd=2,col=3)
text(-0.9,0.7,"PROC",cex=2,col=3)


###################################################
### code chunk number 16: intSlope
###################################################
#intercept and slope of a linear model
N <- 100
x <- runif(N,0,5)
y <- x + 1 + rnorm(N)
plot(x,y)
arrows(4,2,4.98,2,lwd=4,col=3)
text(4.2,1,"PROC",cex=2,col=3)


###################################################
### code chunk number 17: confRegion
###################################################
boundary.t <- function(N,k=3,level=0.95) sqrt(k*qf(level,k,N-k))
library(ellipse)
xy.lm <- lm(y ~ x)
plot(ellipse(vcov(xy.lm),centre=coef(xy.lm),t=boundary.t(N)),ylim=c(0.3,1.7),
  xlim=c(-1,2),type="l",col=2,lwd=3,xlab=expression(alpha),ylab=expression(beta),cex=2)
points(coef(xy.lm)[1],coef(xy.lm)[2],cex=2,pch=19)
arrows(0.4,coef(xy.lm)[2]+sqrt(vcov(xy.lm)[2,2]),0.4,
  coef(xy.lm)[2]-sqrt(vcov(xy.lm)[2,2]),code=3,angle=90,lwd=3,col=3)
arrows(coef(xy.lm)[1]+sqrt(vcov(xy.lm)[1,1]),0.4,
  coef(xy.lm)[1]-sqrt(vcov(xy.lm)[1,1]),0.4,code=3,angle=90,lwd=3,col=3)


###################################################
### code chunk number 18: confRegionPrint
###################################################
#Sample mean
dat <- rnorm(100,0,1)
hist(dat,breaks=20,prob=T, ylim=c(0,1),xlab="Observed data",main="",cex=2)
arrows(t.test(dat)$conf.int[1],0.9,t.test(dat)$conf.int[2],0.9,
  code=3,angle=90,lwd=3,col=2)
points(mean(dat),0.9,pch=19,cex=2)
arrows(-0.3,0.5,-0.05,0.85,lwd=2,col=3)
text(-0.9,0.7,"PROC",cex=2,col=3)
#intercept and slope of a linear model
N <- 100
x <- runif(N,0,5)
y <- x + 1 + rnorm(N)
plot(x,y)
arrows(4,2,4.98,2,lwd=4,col=3)
text(4.2,1,"PROC",cex=2,col=3)
boundary.t <- function(N,k=3,level=0.95) sqrt(k*qf(level,k,N-k))
library(ellipse)
xy.lm <- lm(y ~ x)
plot(ellipse(vcov(xy.lm),centre=coef(xy.lm),t=boundary.t(N)),ylim=c(0.3,1.7),
  xlim=c(-1,2),type="l",col=2,lwd=3,xlab=expression(alpha),ylab=expression(beta),cex=2)
points(coef(xy.lm)[1],coef(xy.lm)[2],cex=2,pch=19)
arrows(0.4,coef(xy.lm)[2]+sqrt(vcov(xy.lm)[2,2]),0.4,
  coef(xy.lm)[2]-sqrt(vcov(xy.lm)[2,2]),code=3,angle=90,lwd=3,col=3)
arrows(coef(xy.lm)[1]+sqrt(vcov(xy.lm)[1,1]),0.4,
  coef(xy.lm)[1]-sqrt(vcov(xy.lm)[1,1]),0.4,code=3,angle=90,lwd=3,col=3)


###################################################
### code chunk number 19: writFreq
###################################################
library(ellipse)
rt.lm <- lm(exp(RTnaming) ~ WrittenFrequency, english.young)
#summary(rt.lm)
#vcov(rt.lm) # The diagonals are the variances of the coeff. estimates!
plot(ellipse(vcov(rt.lm),centre=coef(rt.lm),t=boundary.t(dim(english.young)[1])),type="l")
points(coef(rt.lm)[1],coef(rt.lm)[2],cex=5,col=2,pch=19)


###################################################
### code chunk number 20: writFreqPlot
###################################################
with(english.young,plot(exp(RTnaming) ~ WrittenFrequency, pch='.'))
abline(rt.lm,lwd=3,col="magenta")


###################################################
### code chunk number 21: writFreqFig
###################################################
library(ellipse)
rt.lm <- lm(exp(RTnaming) ~ WrittenFrequency, english.young)
#summary(rt.lm)
#vcov(rt.lm) # The diagonals are the variances of the coeff. estimates!
plot(ellipse(vcov(rt.lm),centre=coef(rt.lm),t=boundary.t(dim(english.young)[1])),type="l")
points(coef(rt.lm)[1],coef(rt.lm)[2],cex=5,col=2,pch=19)


###################################################
### code chunk number 22: freqFamScatterplot
###################################################
with(english.young,plot(WrittenFrequency,Familiarity,pch="."))


###################################################
### code chunk number 23: freqFamCI
###################################################
library(ellipse)
dat <- english.young

plot.cr <- function(dat,full=TRUE) {
  my.col <- ifelse(full,"green","magenta")
  my.lty <- ifelse(full,1,5)
  my.pch <- ifelse(full,19,21)
  rt.lm <- lm(exp(RTnaming) ~ WrittenFrequency + Familiarity, dat)
  rt.lm.summary <- summary(rt.lm)
  my.beta.hat <- coef(rt.lm)[2:3]
  my.vcov <- vcov(rt.lm)[2:3,2:3]
  pts <- ellipse(my.vcov,centre=my.beta.hat,t=boundary.t(dim(dat)[1])) # -3 because there are three parameters in the model
  if(full) {
    f <- lines
  } else {
    f <- plot
  }
  ##f <- ifelse(full,function(...) lines(...),function(...) plot(...))
  f(pts,type="l",ylim=c(min(pts[,2]),max(0,max(pts[,2]))),xlim=c(min(pts[,1]),max(0,max(pts[,1]))),lty=my.lty,col=my.col,lwd=2)
  points(my.beta.hat[1],my.beta.hat[2],cex=1,col=my.col,pch=my.pch)
  ## confidence interval on WrittenFrequency
  y <- rep(-0.15- 0.15*full,2)
  x <- with(rt.lm.summary,coefficients[2,1]+ qt(0.975,dim(dat)[1]-3)*c(-coefficients[2,2],coefficients[2,2])) 
  arrows(x[1],y[1],x[2],y[2],angle=90,length=0.05,col=my.col,lty=my.lty,code=3)
  ## confidence interval on Familiarity
  x <- rep(-0.15- 0.15*full,2)
  y <- with(rt.lm.summary,coefficients[3,1]+ qt(0.975,dim(dat)[1]-3)*c(-coefficients[3,2],coefficients[3,2])) 
  arrows(x[1],y[1],x[2],y[2],angle=90,length=0.05,col=my.col,lty=my.lty,code=3)
  if(! full) {
    abline(0,0,lty=3) ## add x axis
    lines(c(0,0),c(-50,10),lty=3) ## add y axis
  }
}

set.seed(11)
dat <- english.young[sample(1:(dim(english.young)[1]),200),] ## 200 is what we want
plot.cr(dat,full=FALSE)
plot.cr(english.young,full=TRUE)

lm.0.small <- lm(exp(RTnaming) ~ 1, dat)
lm.1.small <- lm(exp(RTnaming) ~ WrittenFrequency, dat)
lm.2.small <- lm(exp(RTnaming) ~ WrittenFrequency + Familiarity, dat)
lm.2.full <- lm(exp(RTnaming) ~ WrittenFrequency + Familiarity, english.young)
beta.hat.full <- coef(lm.2.full)[2:3]
beta.se.add.full <- qt(0.975,dim(english.young)[1]-3) * summary(lm.2.full)[["coefficients"]][2:3,2]
beta.hat.small <- coef(lm.2.small)[2:3]
beta.se.add.small <- qt(0.975,dim(dat)[1]-3) *summary(lm.2.small)[["coefficients"]][2:3,2]


###################################################
### code chunk number 24: generalized_linear_models.Rnw:1161-1162
###################################################
this.F.statistic.1.2 <- 197 * (sum(residuals(lm.1.small)^2) -sum(residuals(lm.2.small)^2)) /sum(residuals(lm.2.small)^2)


###################################################
### code chunk number 25: generalized_linear_models.Rnw:1184-1185
###################################################
this.F.statistic.0.2 <- 197/2 * (sum(residuals(lm.0.small)^2) -sum(residuals(lm.2.small)^2)) /sum(residuals(lm.2.small)^2)


###################################################
### code chunk number 26: fricationANOVAsimple
###################################################
m.0 <- lm(exp(RTnaming) ~ 1, subset(english,AgeSubject=="young"))
m.A <- lm(exp(RTnaming) ~ 1 + Frication, subset(english,AgeSubject=="young"))
resid.0 <- sum(resid(m.0)^2)
resid.A <- sum(resid(m.A)^2)


###################################################
### code chunk number 27: fricationANOVAsimple
###################################################
mfa.0 <- lm(exp(RTnaming) ~ Frication + AgeSubject, english)
mfa.A <- lm(exp(RTnaming) ~ Frication * AgeSubject, english)
df <- with(english,expand.grid(Frication=levels(Frication),AgeSubject=levels(AgeSubject)))
predictions.0 <- with(df,tapply(predict(mfa.0,newdata=df),list(Frication,AgeSubject),function(x) x[1]))
true.means <- with(english,tapply(exp(RTnaming),list(Frication,AgeSubject),mean))
resid.mfa.0 <- sum(resid(mfa.0)^2)
resid.mfa.A <- sum(resid(mfa.A)^2)
mfa.f.statistic <- (resid.mfa.0-resid.mfa.A)/3 / (resid.mfa.A/mfa.A[["df.residual"]])


###################################################
### code chunk number 28: simplestRMANOVAexample
###################################################
set.seed(2)
library(mvtnorm)
n <- 20
m <- 20
beta <- c(0.6,0.2) ## beta[1] corresponds to the intercept; beta[2] corresponds to the intercept plus the effect
Sigma.b <- matrix(c(0.3,0,0,0.3),2,2) ## in this case, condition-specific speaker sensitivities are uncorrelated
sigma.e <- 0.3
df.1 <- expand.grid(embedding=factor(c("Unembedded","Embedded")),lexicalization=factor(paste("L",1:m,sep="")))
df <- df.1
for(i in 1:(n-1))
  df <- rbind(df,df.1)
B <- rmvnorm(m,mean=c(0,0),sigma=Sigma.b)
df$y <- with(df,beta[embedding] + B[cbind(lexicalization,(as.numeric(embedding)))] + rnorm(dim(df)[1],0,sigma.e))
m <- aov(y ~ embedding + Error(lexicalization/embedding),df)


###################################################
### code chunk number 29: alexopolouKellerSetup
###################################################
library(lme4)


###################################################
### code chunk number 30: alexopolouKeller
###################################################
dat <- read.table("../data/alexopolou_keller_2007/language_paper_data/english/experiment1/results/RES_EN")
names(dat) <- c("subj.numeric","item.numeric","island.numeric","embedding.numeric","resumption.numeric","response")
dat$subj <- factor(paste("S",dat$subj.numeric,sep=""))
dat$item <- factor(paste("I",dat$item.numeric,sep=""))
dat$island <- factor(c("bare","that","weak.whether","strong.rc")[dat$island.numeric+1])
dat$embedding <- factor(c("none","single","double")[dat$embedding.numeric+1])
dat$resumption <- factor(c("gap","resumptive")[dat$resumption.numeric]) ## resumption is 1/2 not 0/1
##with(subset(dat,island=="bare"),tapply(response,list(embedding,resumption),mean))
datbare <- subset(dat,island=="bare")
datbare.byitem <- with(datbare,aggregate(list(response=response),list(item=item,embedding=embedding,resumption=resumption),mean))

summary(aov(response ~ resumption + Error(item/(resumption)),datbare.byitem))

## summary(aov(response ~ embedding*resumption + Error(item/(embedding*resumption)),datbare.byitem))
## m.byitem <- lmer(response ~ embedding*resumption + (embedding*resumption | item),datbare.byitem,REML=F)
## m.byitem.no.embedding <- lmer(response ~ embedding*resumption - embedding + (embedding*resumption | item),datbare.byitem,REML=F)
## m.byitem.no.resumption <- lmer(response ~ embedding*resumption - resumption + (embedding*resumption | item),datbare.byitem,REML=F)
## m <- lmer(response ~ embedding*resumption + (embedding*resumption | subj) + (embedding*resumption | item),datbare,REML=F)
## summary(m)


###################################################
### code chunk number 31: anovaExample
###################################################
dat1 <- read.table("../data/rohde_levy_kehler_data/results.final.txt",
  quote="",sep="\t",header=T)
dat1 <- subset(dat1,subj != "subj2" & subj != "subj10" 
              & subj != "subj50") # these subjects answered questions at chance
dat1$subj  <- factor(dat1$subj) # eliminate these levels from the factor                        
spillover.1 <- subset(dat1,expt==1 & correct ==1  
         & crit=="RC_VERB+1") # focus on first spillover region, 
                              # only correctly-answered sentences
spillover.1$verb <- factor(spillover.1$verb) # remove "NONE" 
                                             # levels of factor
spillover.1$attachment <- factor(spillover.1$attachment)
spillover.1$cond <- with(spillover.1,factor(paste(verb,attachment)))
cutoff <- mean(spillover.1$rt) + 4*sd(spillover.1$rt)
#sum(abs(z) > 4) # 14 points are flagged this way as outliers
#length(z) # 14 out of 933 is a lot of outliers at 4sd!!!! 
            # But this is typical for self-paced reading experiments 
spillover.1.to.analyze <- subset(spillover.1,rt <= cutoff)


###################################################
### code chunk number 32: anovaExamplePlot
###################################################
print(densityplot(~ rt | cond,spillover.1,layout=c(1,4),panel=function(...) {
  panel.densityplot(...)
  llines(rep(cutoff,2),c(-0.1,1),lty=2)
  }))


###################################################
### code chunk number 33: logitTransform
###################################################
invlogit <- function(x) exp(x)/(1+exp(x))
x <- seq(-5,5,by=0.01)
plot(x,invlogit(x), type="l",xlab=expression(eta),ylab=expression(pi))


###################################################
### code chunk number 34: xtabsDativeI
###################################################
library(languageR)
xtabs(~ PronomOfRec + RealizationOfRecipient,dative)


###################################################
### code chunk number 35: firstLogisticRegression
###################################################
library(rms)
response <- ifelse(dative$RealizationOfRecipient=="PP",
                1,0) # code PO realization as success, DO as failure
lrm(response ~ PronomOfRec, dative)


###################################################
### code chunk number 36: multipleXtabs
###################################################
tab <- xtabs(~ RealizationOfRecipient + PronomOfRec + PronomOfTheme, dative)
tab
apply(tab,c(2,3),function(x) x[2] / sum(x))


###################################################
### code chunk number 37: multipleLogit
###################################################
library(rms)
dative.lrm <- lrm(response ~ PronomOfRec + PronomOfTheme, dative)
dative.lrm


###################################################
### code chunk number 38: logitLikelihoodRatioTest
###################################################
m.0 <- glm(response ~ PronomOfRec + PronomOfTheme,dative,family="binomial")
m.A <- glm(response ~ PronomOfRec*Modality + PronomOfTheme*Modality,dative,family="binomial")
anova(m.0,m.A)


###################################################
### code chunk number 39: pChiSq
###################################################
1-pchisq(9.07,3)


###################################################
### code chunk number 40: binomialsLogLinearLogit
###################################################
dat <- read.table("../data/binomials_data/single_count_binomials.txt",header=T,fill=T,as.is=T)
summary(glm(Response ~ Percept + Syl + Freq - 1, dat,family="binomial"))$coef


###################################################
### code chunk number 41: logLinearLikelihood
###################################################
library(lattice)
loglinlik = function(lambda,n) {
  explambda = exp(lambda)
  Z = 1/(sum(explambda) + 1)
  ##cat(explambda,"\t",Z*explambda[1], "\t", Z*explambda[2],"\t",Z,"\n")
  return( (Z*explambda[1])^n[1] * (Z*explambda[2])^n[2] * (Z)^n[3] )
}
my.grayscale <- gray(seq(0.9,0.1,by=-0.01))
x <- seq(-5,5,by=0.05)
n <- c(7,4,1)
lambda <- expand.grid(x,x)
lambda[["lik"]] <- apply(as.matrix(lambda),1,function(x) loglinlik(x, n))


###################################################
### code chunk number 42: logLinearLikelihoodPlot
###################################################
print(levelplot(lik ~ Var1 + Var2, lambda, xlab=expression(lambda[1]),ylab=expression(lambda[2]),colorkey=F,col.regions=my.grayscale))


###################################################
### code chunk number 43: logLinearPrior
###################################################
lambda[["prior"]] <- apply(as.matrix(lambda[,1:2]),1, function(x) exp(-1 * sum(x^2)))


###################################################
### code chunk number 44: logLinearPriorPlot
###################################################
print(levelplot(prior ~ Var1 + Var2, lambda,xlab=expression(lambda[1]),ylab=expression(lambda[2]),colorkey=F,col.regions=my.grayscale))


###################################################
### code chunk number 45: logLinearPosterior
###################################################
lambda[["posterior"]] <- lambda$lik * lambda$prior


###################################################
### code chunk number 46: logLinearPosteriorPlot
###################################################
print(levelplot(prior ~ Var1 + Var2, lambda,xlab=expression(lambda[1]),ylab=expression(lambda[2]),colorkey=F,col.regions=my.grayscale))


###################################################
### code chunk number 47: needsDoingLogLin
###################################################
dat <- read.table("../data/needs_doing_data/needs.txt",header=T)
dat$Response <- ifelse(dat$Response=="ing",1,0)
dat$Anim1 <- factor(ifelse(dat$Anim=="abst","abst","conc"))
model.logit <- glm(Response ~ Anim1 + sqrt(Dep.Length), data=dat, family=binomial)
# data processing to get data in format for log-linear/Poisson model
dat.for.loglin <- with(dat,as.data.frame(as.table(tapply(Response, list(Anim1=Anim1,Dep.Length=Dep.Length,Response=Response),length))))
names(dat.for.loglin)[4] <- "x"
dat.for.loglin$DL <- dat.for.loglin$Dep.Length
dat.for.loglin$Dep.Length <- as.numeric(as.character(dat.for.loglin$DL))
dat.for.loglin$Response <- as.numeric(as.character(dat.for.loglin$Response))
dat.for.loglin$x <- sapply(dat.for.loglin$x, function(x) ifelse(is.na(x), 0, x))
model.loglin <- glm(x ~ Anim1*DL + Response + Response:(Anim1 + sqrt(Dep.Length)),data=dat.for.loglin,family="poisson")
summary(model.loglin)$coef[c(32,62,63),]
summary(model.logit)$coef


###################################################
### code chunk number 48: lexdecNdens
###################################################
m1 <- lm(exp(RTlexdec) ~ Ncount, subset(english, AgeSubject %in% "young"))
summary(m1)
m2 <- lm(exp(RTlexdec) ~ WrittenFrequency + Ncount, subset(english, AgeSubject %in% "young"))
summary(m2)
m0 <- lm(exp(RTlexdec) ~ WrittenFrequency, subset(english, AgeSubject %in% "young"))
anova(m0, m2)


###################################################
### code chunk number 49: lexdecNonwordNdens
###################################################
dat.nonwords <- 
  read.table("../data/bicknell_etal_2010jml_data/nonword-data.txt",header=T)
m1 <- lm(rt ~ neighbors + prime.freq, dat.nonwords)
m0 <- lm(rt ~ prime.freq, dat.nonwords)
summary(m1)
anova(m0, m1)


###################################################
### code chunk number 50: stratifiedVariance
###################################################
library(mvtnorm)
library(lme4)
beta <- c(70,70) ## beta[1] corresponds to the intercept; beta[2] corresponds to the intercept plus the effect
Sigma.b <- matrix(c(10,0,0,10),2,2) ## in this case, condition-specific speaker sensitivities are uncorrelated
sigma.e <- 10
m <- 6
n <- 10
f <- function(ignore) {
  df <- df.rep(expand.grid(x=c("Initial","Medial"),subj=paste("S",rep(1:m))),n)
  B <- rmvnorm(m,mean=c(0,0),sigma=Sigma.b)
  df$y <- with(df,beta[x] + B[cbind(subj,(as.numeric(x)))] + rnorm(dim(df)[1],0,sigma.e))
  p.wrong <- anova(lm(y ~ x*subj,df))[1,5]
  p.rm <- summary(aov(y ~ x + Error(subj/(x)),df))[["Error: subj:x"]][[1]][1,5]
  return(c(p.wrong=p.wrong,p.rm=p.rm))
}
system.time(p.vals <- sapply(1:100,f))


###################################################
### code chunk number 51: stratifiedVarianceShow
###################################################
cat("Frequency of Type I error for NON-repeated measures analysis:",mean(p.vals[1,] < 0.05))
cat("Frequency of Type I error for repeated-measures analysis:",mean(p.vals[2,] < 0.05))


###################################################
### code chunk number 52: rohdeEtalSecondSpilloverRegion
###################################################
dat.rlk <- read.table("../data/rohde_levy_kehler_data/rohde-etal-IC-RC-spr.txt", header=T)
means <- with(subset(dat.rlk,! filler),tapply(rt,crit,mean))
sds <- with(subset(dat.rlk,! filler),tapply(rt,crit,sd))
dat.rlk$is.outlier <- with(dat.rlk,rt > means[crit] + 3*sds[crit] & ! filler)

f <- function(dat,remove.outliers=F) {
  if(remove.outliers)  dat <- subset(dat, ! is.outlier)
  cat("***By-subjects analysis:***\n")
  dat.bysubj <- ddply(subset(dat,crit %in% "RC_VERB+2"),c("subj","verb","attachment"),function(df) return(c(rt=mean(df$rt))))
  cat("***Condition means:***\n")
  print(with(dat.bysubj, tapply(rt,list(verb,attachment),mean)))
  print(summary(aov(rt ~ verb * attachment + Error(subj/(verb*attachment)),dat.bysubj)))
  cat("***By-items analysis:***\n")
  dat.byitem <- ddply(subset(dat,crit %in% "RC_VERB+2"),c("item","verb","attachment"),function(df) return(c(rt=mean(df$rt))))
  cat("***Condition means:***\n")
  print(with(dat.byitem, tapply(rt,list(verb,attachment),mean)))
  print(summary(aov(rt ~ verb * attachment + Error(item/(verb*attachment)),dat.byitem)))
}


###################################################
### code chunk number 53: rohdeEtalSecondSpilloverRegionShow
###################################################
f(dat.rlk,F) ## no outlier removal
f(dat.rlk,T) ## with outlier removal


###################################################
### code chunk number 54: PronQuantSetup
###################################################
library(languageR)


###################################################
### code chunk number 55: PronQuant
###################################################
pq <- function(rec,th) {
  if(rec=="pronominal" & th=="nonpronominal")
      1
  else if(rec=="nonpronominal" & th=="pronominal")
      -1
  else
      0
  } 
PronQuant <- with(dative,mapply(pq,PronomOfRec,PronomOfTheme))
m.2 <- glm(RealizationOfRecipient=="PP" ~ PronQuant,
           data=dative,family="binomial")


###################################################
### code chunk number 56: PronQuantPrintResults
###################################################
summary(m.2)


###################################################
### code chunk number 57: PronQuantModelComparison
###################################################
m.1 <- glm(RealizationOfRecipient=="PP" ~ PronomOfRec + PronomOfTheme,
           dative,family="binomial")


###################################################
### code chunk number 58: PronQuantModelComparisonPrintResults
###################################################
anova(m.2,m.1,test="Chisq")


###################################################
### code chunk number 59: symmetricBaselinePriors
###################################################

### part 3: no prior penalty on intercept, but penalty on all else
library(rms)
dat <- data.frame(x=rep(c("pro","pro","notPro","notPro"),c(8,2,2,8)),y=rep(c("NP","PP","NP","PP"),c(8,2,2,8)))
m <- lrm(y~x,dat,penalty=1)
predict(m,dat,type="fitted")