### R code from vignette source '../generalized_linear_models_chapter/generalized_linear_models.Rnw' ################################################### ### code chunk number 1: setOptionsAndLoadLibraries ################################################### library(lattice) library(plyr) options(lineWidth=60) myFormat <- function(...) { tmp <- format(...) return(sub("e(.*)","\\\\\\\\times 10^{\\1}",tmp)) } ################################################### ### code chunk number 2: lmPrediction ################################################### x <- seq(-5,5,by=0.1) y <- x / 2 + 1 ################################################### ### code chunk number 3: lmPredictionPrint ################################################### print(xyplot(y ~ x, type="l",lwd=3,ylim=c(-5,5),xlim=c(-5,5),col="black"),main="Predicted mean response y given x for a linear model") ################################################### ### code chunk number 4: lmDistribution ################################################### x <- seq(-5,5,by=0.1) y <- seq(-5,5,by=0.1) grid <- expand.grid(x=x,y=y) grid[["z"]] <- dnorm(grid[["y"]],mean=(1/2) * grid[["x"]] + 1, sd=2) ################################################### ### code chunk number 5: lmDistributionPrint ################################################### print(levelplot(z ~ x*y,grid, main="Conditional probability density P(y|x) for a linear model"),ylim=c(-5,5),xlim=c(-5,5)) ################################################### ### code chunk number 6: lmSimpleExampleFreq ################################################### library(languageR) plot(exp(RTlexdec) ~ WrittenFrequency, data=english, pch=".") rt.lm <- lm(exp(RTlexdec) ~ WrittenFrequency, data=english) abline(rt.lm,col=6,lwd=4) ################################################### ### code chunk number 7: lmSimpleExampleFreqRedux ################################################### library(languageR) plot(exp(RTlexdec) ~ WrittenFrequency, data=english, pch=".") rt.lm <- lm(exp(RTlexdec) ~ WrittenFrequency, data=english) abline(rt.lm,col=6,lwd=4) ################################################### ### code chunk number 8: lmThreePts ################################################### errbar <- function(x0,y0,x1,y1,...) { arrows(x0,y0,x1,y1,angle=90,...) arrows(x1,y1,x0,y0,angle=90,...) } x <- c(4,6,8) y <- c(800,775,700) old.par <- par(lwd=2) plot(x,y,xlim=c(3.5,8.5),ylim=c(690,850)) abline(1000,-37.5, lty=2,col=2) # goes through points 2 & 3 errbar(4.01,801,4.01,850,lty=2,col=2) abline(900,-25, lty=3,col=3) # goes through points 1 & 3 errbar(6.01,750,6.01,774,lty=3,col=3) abline(850,-12.5, lty=4,col=4) # goes through points 2 & 3 errbar(8.01,750,8.01,701,lty=4,col=4) abline(910,-25, lty=1,col=1) # goes through points 1 & 3 errbar(3.99,810,3.99,800,lty=1,col=1) errbar(5.99,760,5.99,775,lty=1,col=1) errbar(7.99,710,7.99,700,lty=1,col=1) legend(7,850,c("Summed squared error","2500","625","2500","425"), lty=c(NA,2,3,4,1),col=c(NA,2,3,4,1),cex=1) par(old.par) ################################################### ### code chunk number 9: lmThreePointsPrint (eval = FALSE) ################################################### ## errbar <- function(x0,y0,x1,y1,...) { ## arrows(x0,y0,x1,y1,angle=90,...) ## arrows(x1,y1,x0,y0,angle=90,...) ## } ## x <- c(4,6,8) ## y <- c(800,775,700) ## old.par <- par(lwd=2) ## plot(x,y,xlim=c(3.5,8.5),ylim=c(690,850)) ## abline(1000,-37.5, lty=2,col=2) # goes through points 2 & 3 ## errbar(4.01,801,4.01,850,lty=2,col=2) ## abline(900,-25, lty=3,col=3) # goes through points 1 & 3 ## errbar(6.01,750,6.01,774,lty=3,col=3) ## abline(850,-12.5, lty=4,col=4) # goes through points 2 & 3 ## errbar(8.01,750,8.01,701,lty=4,col=4) ## abline(910,-25, lty=1,col=1) # goes through points 1 & 3 ## errbar(3.99,810,3.99,800,lty=1,col=1) ## errbar(5.99,760,5.99,775,lty=1,col=1) ## errbar(7.99,710,7.99,700,lty=1,col=1) ## legend(7,850,c("Summed squared error","2500","625","2500","425"), lty=c(NA,2,3,4,1),col=c(NA,2,3,4,1),cex=1) ## par(old.par) ################################################### ### code chunk number 10: rt3lm ################################################### rt3.lm <- lm(y ~ x) rt3.lm sum(resid(rt3.lm)^2) ################################################### ### code chunk number 11: englishYoungI ################################################### english.young <- subset(english,AgeSubject=="young") attach(english.young) rt.freq.lm <- lm(exp(RTnaming) ~ WrittenFrequency) ################################################### ### code chunk number 12: englishYoungII ################################################### rt.res <- resid(rt.freq.lm) rt.ncount.lm <- lm(rt.res ~ Ncount) plot(Ncount, rt.res) abline(rt.ncount.lm,col=2,lwd=3) detach() ################################################### ### code chunk number 13: englishYoung ################################################### english.young <- subset(english,AgeSubject=="young") attach(english.young) rt.freq.lm <- lm(exp(RTnaming) ~ WrittenFrequency) rt.freq.lm rt.res <- resid(rt.freq.lm) rt.ncount.lm <- lm(rt.res ~ Ncount) plot(Ncount, rt.res) abline(rt.ncount.lm,col=2,lwd=3) detach() rt.ncount.lm ################################################### ### code chunk number 14: rtBothLM ################################################### rt.both.lm <- lm(exp(RTnaming) ~ WrittenFrequency + Ncount, data=english.young) rt.both.lm ################################################### ### code chunk number 15: sampleMean ################################################### #Sample mean dat <- rnorm(100,0,1) hist(dat,breaks=20,prob=T, ylim=c(0,1),xlab="Observed data",main="",cex=2) arrows(t.test(dat)$conf.int[1],0.9,t.test(dat)$conf.int[2],0.9, code=3,angle=90,lwd=3,col=2) points(mean(dat),0.9,pch=19,cex=2) arrows(-0.3,0.5,-0.05,0.85,lwd=2,col=3) text(-0.9,0.7,"PROC",cex=2,col=3) ################################################### ### code chunk number 16: intSlope ################################################### #intercept and slope of a linear model N <- 100 x <- runif(N,0,5) y <- x + 1 + rnorm(N) plot(x,y) arrows(4,2,4.98,2,lwd=4,col=3) text(4.2,1,"PROC",cex=2,col=3) ################################################### ### code chunk number 17: confRegion ################################################### boundary.t <- function(N,k=3,level=0.95) sqrt(k*qf(level,k,N-k)) library(ellipse) xy.lm <- lm(y ~ x) plot(ellipse(vcov(xy.lm),centre=coef(xy.lm),t=boundary.t(N)),ylim=c(0.3,1.7), xlim=c(-1,2),type="l",col=2,lwd=3,xlab=expression(alpha),ylab=expression(beta),cex=2) points(coef(xy.lm)[1],coef(xy.lm)[2],cex=2,pch=19) arrows(0.4,coef(xy.lm)[2]+sqrt(vcov(xy.lm)[2,2]),0.4, coef(xy.lm)[2]-sqrt(vcov(xy.lm)[2,2]),code=3,angle=90,lwd=3,col=3) arrows(coef(xy.lm)[1]+sqrt(vcov(xy.lm)[1,1]),0.4, coef(xy.lm)[1]-sqrt(vcov(xy.lm)[1,1]),0.4,code=3,angle=90,lwd=3,col=3) ################################################### ### code chunk number 18: confRegionPrint ################################################### #Sample mean dat <- rnorm(100,0,1) hist(dat,breaks=20,prob=T, ylim=c(0,1),xlab="Observed data",main="",cex=2) arrows(t.test(dat)$conf.int[1],0.9,t.test(dat)$conf.int[2],0.9, code=3,angle=90,lwd=3,col=2) points(mean(dat),0.9,pch=19,cex=2) arrows(-0.3,0.5,-0.05,0.85,lwd=2,col=3) text(-0.9,0.7,"PROC",cex=2,col=3) #intercept and slope of a linear model N <- 100 x <- runif(N,0,5) y <- x + 1 + rnorm(N) plot(x,y) arrows(4,2,4.98,2,lwd=4,col=3) text(4.2,1,"PROC",cex=2,col=3) boundary.t <- function(N,k=3,level=0.95) sqrt(k*qf(level,k,N-k)) library(ellipse) xy.lm <- lm(y ~ x) plot(ellipse(vcov(xy.lm),centre=coef(xy.lm),t=boundary.t(N)),ylim=c(0.3,1.7), xlim=c(-1,2),type="l",col=2,lwd=3,xlab=expression(alpha),ylab=expression(beta),cex=2) points(coef(xy.lm)[1],coef(xy.lm)[2],cex=2,pch=19) arrows(0.4,coef(xy.lm)[2]+sqrt(vcov(xy.lm)[2,2]),0.4, coef(xy.lm)[2]-sqrt(vcov(xy.lm)[2,2]),code=3,angle=90,lwd=3,col=3) arrows(coef(xy.lm)[1]+sqrt(vcov(xy.lm)[1,1]),0.4, coef(xy.lm)[1]-sqrt(vcov(xy.lm)[1,1]),0.4,code=3,angle=90,lwd=3,col=3) ################################################### ### code chunk number 19: writFreq ################################################### library(ellipse) rt.lm <- lm(exp(RTnaming) ~ WrittenFrequency, english.young) #summary(rt.lm) #vcov(rt.lm) # The diagonals are the variances of the coeff. estimates! plot(ellipse(vcov(rt.lm),centre=coef(rt.lm),t=boundary.t(dim(english.young)[1])),type="l") points(coef(rt.lm)[1],coef(rt.lm)[2],cex=5,col=2,pch=19) ################################################### ### code chunk number 20: writFreqPlot ################################################### with(english.young,plot(exp(RTnaming) ~ WrittenFrequency, pch='.')) abline(rt.lm,lwd=3,col="magenta") ################################################### ### code chunk number 21: writFreqFig ################################################### library(ellipse) rt.lm <- lm(exp(RTnaming) ~ WrittenFrequency, english.young) #summary(rt.lm) #vcov(rt.lm) # The diagonals are the variances of the coeff. estimates! plot(ellipse(vcov(rt.lm),centre=coef(rt.lm),t=boundary.t(dim(english.young)[1])),type="l") points(coef(rt.lm)[1],coef(rt.lm)[2],cex=5,col=2,pch=19) ################################################### ### code chunk number 22: freqFamScatterplot ################################################### with(english.young,plot(WrittenFrequency,Familiarity,pch=".")) ################################################### ### code chunk number 23: freqFamCI ################################################### library(ellipse) dat <- english.young plot.cr <- function(dat,full=TRUE) { my.col <- ifelse(full,"green","magenta") my.lty <- ifelse(full,1,5) my.pch <- ifelse(full,19,21) rt.lm <- lm(exp(RTnaming) ~ WrittenFrequency + Familiarity, dat) rt.lm.summary <- summary(rt.lm) my.beta.hat <- coef(rt.lm)[2:3] my.vcov <- vcov(rt.lm)[2:3,2:3] pts <- ellipse(my.vcov,centre=my.beta.hat,t=boundary.t(dim(dat)[1])) # -3 because there are three parameters in the model if(full) { f <- lines } else { f <- plot } ##f <- ifelse(full,function(...) lines(...),function(...) plot(...)) f(pts,type="l",ylim=c(min(pts[,2]),max(0,max(pts[,2]))),xlim=c(min(pts[,1]),max(0,max(pts[,1]))),lty=my.lty,col=my.col,lwd=2) points(my.beta.hat[1],my.beta.hat[2],cex=1,col=my.col,pch=my.pch) ## confidence interval on WrittenFrequency y <- rep(-0.15- 0.15*full,2) x <- with(rt.lm.summary,coefficients[2,1]+ qt(0.975,dim(dat)[1]-3)*c(-coefficients[2,2],coefficients[2,2])) arrows(x[1],y[1],x[2],y[2],angle=90,length=0.05,col=my.col,lty=my.lty,code=3) ## confidence interval on Familiarity x <- rep(-0.15- 0.15*full,2) y <- with(rt.lm.summary,coefficients[3,1]+ qt(0.975,dim(dat)[1]-3)*c(-coefficients[3,2],coefficients[3,2])) arrows(x[1],y[1],x[2],y[2],angle=90,length=0.05,col=my.col,lty=my.lty,code=3) if(! full) { abline(0,0,lty=3) ## add x axis lines(c(0,0),c(-50,10),lty=3) ## add y axis } } set.seed(11) dat <- english.young[sample(1:(dim(english.young)[1]),200),] ## 200 is what we want plot.cr(dat,full=FALSE) plot.cr(english.young,full=TRUE) lm.0.small <- lm(exp(RTnaming) ~ 1, dat) lm.1.small <- lm(exp(RTnaming) ~ WrittenFrequency, dat) lm.2.small <- lm(exp(RTnaming) ~ WrittenFrequency + Familiarity, dat) lm.2.full <- lm(exp(RTnaming) ~ WrittenFrequency + Familiarity, english.young) beta.hat.full <- coef(lm.2.full)[2:3] beta.se.add.full <- qt(0.975,dim(english.young)[1]-3) * summary(lm.2.full)[["coefficients"]][2:3,2] beta.hat.small <- coef(lm.2.small)[2:3] beta.se.add.small <- qt(0.975,dim(dat)[1]-3) *summary(lm.2.small)[["coefficients"]][2:3,2] ################################################### ### code chunk number 24: generalized_linear_models.Rnw:1161-1162 ################################################### this.F.statistic.1.2 <- 197 * (sum(residuals(lm.1.small)^2) -sum(residuals(lm.2.small)^2)) /sum(residuals(lm.2.small)^2) ################################################### ### code chunk number 25: generalized_linear_models.Rnw:1184-1185 ################################################### this.F.statistic.0.2 <- 197/2 * (sum(residuals(lm.0.small)^2) -sum(residuals(lm.2.small)^2)) /sum(residuals(lm.2.small)^2) ################################################### ### code chunk number 26: fricationANOVAsimple ################################################### m.0 <- lm(exp(RTnaming) ~ 1, subset(english,AgeSubject=="young")) m.A <- lm(exp(RTnaming) ~ 1 + Frication, subset(english,AgeSubject=="young")) resid.0 <- sum(resid(m.0)^2) resid.A <- sum(resid(m.A)^2) ################################################### ### code chunk number 27: fricationANOVAsimple ################################################### mfa.0 <- lm(exp(RTnaming) ~ Frication + AgeSubject, english) mfa.A <- lm(exp(RTnaming) ~ Frication * AgeSubject, english) df <- with(english,expand.grid(Frication=levels(Frication),AgeSubject=levels(AgeSubject))) predictions.0 <- with(df,tapply(predict(mfa.0,newdata=df),list(Frication,AgeSubject),function(x) x[1])) true.means <- with(english,tapply(exp(RTnaming),list(Frication,AgeSubject),mean)) resid.mfa.0 <- sum(resid(mfa.0)^2) resid.mfa.A <- sum(resid(mfa.A)^2) mfa.f.statistic <- (resid.mfa.0-resid.mfa.A)/3 / (resid.mfa.A/mfa.A[["df.residual"]]) ################################################### ### code chunk number 28: simplestRMANOVAexample ################################################### set.seed(2) library(mvtnorm) n <- 20 m <- 20 beta <- c(0.6,0.2) ## beta[1] corresponds to the intercept; beta[2] corresponds to the intercept plus the effect Sigma.b <- matrix(c(0.3,0,0,0.3),2,2) ## in this case, condition-specific speaker sensitivities are uncorrelated sigma.e <- 0.3 df.1 <- expand.grid(embedding=factor(c("Unembedded","Embedded")),lexicalization=factor(paste("L",1:m,sep=""))) df <- df.1 for(i in 1:(n-1)) df <- rbind(df,df.1) B <- rmvnorm(m,mean=c(0,0),sigma=Sigma.b) df$y <- with(df,beta[embedding] + B[cbind(lexicalization,(as.numeric(embedding)))] + rnorm(dim(df)[1],0,sigma.e)) m <- aov(y ~ embedding + Error(lexicalization/embedding),df) ################################################### ### code chunk number 29: alexopolouKellerSetup ################################################### library(lme4) ################################################### ### code chunk number 30: alexopolouKeller ################################################### dat <- read.table("../data/alexopolou_keller_2007/language_paper_data/english/experiment1/results/RES_EN") names(dat) <- c("subj.numeric","item.numeric","island.numeric","embedding.numeric","resumption.numeric","response") dat$subj <- factor(paste("S",dat$subj.numeric,sep="")) dat$item <- factor(paste("I",dat$item.numeric,sep="")) dat$island <- factor(c("bare","that","weak.whether","strong.rc")[dat$island.numeric+1]) dat$embedding <- factor(c("none","single","double")[dat$embedding.numeric+1]) dat$resumption <- factor(c("gap","resumptive")[dat$resumption.numeric]) ## resumption is 1/2 not 0/1 ##with(subset(dat,island=="bare"),tapply(response,list(embedding,resumption),mean)) datbare <- subset(dat,island=="bare") datbare.byitem <- with(datbare,aggregate(list(response=response),list(item=item,embedding=embedding,resumption=resumption),mean)) summary(aov(response ~ resumption + Error(item/(resumption)),datbare.byitem)) ## summary(aov(response ~ embedding*resumption + Error(item/(embedding*resumption)),datbare.byitem)) ## m.byitem <- lmer(response ~ embedding*resumption + (embedding*resumption | item),datbare.byitem,REML=F) ## m.byitem.no.embedding <- lmer(response ~ embedding*resumption - embedding + (embedding*resumption | item),datbare.byitem,REML=F) ## m.byitem.no.resumption <- lmer(response ~ embedding*resumption - resumption + (embedding*resumption | item),datbare.byitem,REML=F) ## m <- lmer(response ~ embedding*resumption + (embedding*resumption | subj) + (embedding*resumption | item),datbare,REML=F) ## summary(m) ################################################### ### code chunk number 31: anovaExample ################################################### dat1 <- read.table("../data/rohde_levy_kehler_data/results.final.txt", quote="",sep="\t",header=T) dat1 <- subset(dat1,subj != "subj2" & subj != "subj10" & subj != "subj50") # these subjects answered questions at chance dat1$subj <- factor(dat1$subj) # eliminate these levels from the factor spillover.1 <- subset(dat1,expt==1 & correct ==1 & crit=="RC_VERB+1") # focus on first spillover region, # only correctly-answered sentences spillover.1$verb <- factor(spillover.1$verb) # remove "NONE" # levels of factor spillover.1$attachment <- factor(spillover.1$attachment) spillover.1$cond <- with(spillover.1,factor(paste(verb,attachment))) cutoff <- mean(spillover.1$rt) + 4*sd(spillover.1$rt) #sum(abs(z) > 4) # 14 points are flagged this way as outliers #length(z) # 14 out of 933 is a lot of outliers at 4sd!!!! # But this is typical for self-paced reading experiments spillover.1.to.analyze <- subset(spillover.1,rt <= cutoff) ################################################### ### code chunk number 32: anovaExamplePlot ################################################### print(densityplot(~ rt | cond,spillover.1,layout=c(1,4),panel=function(...) { panel.densityplot(...) llines(rep(cutoff,2),c(-0.1,1),lty=2) })) ################################################### ### code chunk number 33: logitTransform ################################################### invlogit <- function(x) exp(x)/(1+exp(x)) x <- seq(-5,5,by=0.01) plot(x,invlogit(x), type="l",xlab=expression(eta),ylab=expression(pi)) ################################################### ### code chunk number 34: xtabsDativeI ################################################### library(languageR) xtabs(~ PronomOfRec + RealizationOfRecipient,dative) ################################################### ### code chunk number 35: firstLogisticRegression ################################################### library(rms) response <- ifelse(dative$RealizationOfRecipient=="PP", 1,0) # code PO realization as success, DO as failure lrm(response ~ PronomOfRec, dative) ################################################### ### code chunk number 36: multipleXtabs ################################################### tab <- xtabs(~ RealizationOfRecipient + PronomOfRec + PronomOfTheme, dative) tab apply(tab,c(2,3),function(x) x[2] / sum(x)) ################################################### ### code chunk number 37: multipleLogit ################################################### library(rms) dative.lrm <- lrm(response ~ PronomOfRec + PronomOfTheme, dative) dative.lrm ################################################### ### code chunk number 38: logitLikelihoodRatioTest ################################################### m.0 <- glm(response ~ PronomOfRec + PronomOfTheme,dative,family="binomial") m.A <- glm(response ~ PronomOfRec*Modality + PronomOfTheme*Modality,dative,family="binomial") anova(m.0,m.A) ################################################### ### code chunk number 39: pChiSq ################################################### 1-pchisq(9.07,3) ################################################### ### code chunk number 40: binomialsLogLinearLogit ################################################### dat <- read.table("../data/binomials_data/single_count_binomials.txt",header=T,fill=T,as.is=T) summary(glm(Response ~ Percept + Syl + Freq - 1, dat,family="binomial"))$coef ################################################### ### code chunk number 41: logLinearLikelihood ################################################### library(lattice) loglinlik = function(lambda,n) { explambda = exp(lambda) Z = 1/(sum(explambda) + 1) ##cat(explambda,"\t",Z*explambda[1], "\t", Z*explambda[2],"\t",Z,"\n") return( (Z*explambda[1])^n[1] * (Z*explambda[2])^n[2] * (Z)^n[3] ) } my.grayscale <- gray(seq(0.9,0.1,by=-0.01)) x <- seq(-5,5,by=0.05) n <- c(7,4,1) lambda <- expand.grid(x,x) lambda[["lik"]] <- apply(as.matrix(lambda),1,function(x) loglinlik(x, n)) ################################################### ### code chunk number 42: logLinearLikelihoodPlot ################################################### print(levelplot(lik ~ Var1 + Var2, lambda, xlab=expression(lambda[1]),ylab=expression(lambda[2]),colorkey=F,col.regions=my.grayscale)) ################################################### ### code chunk number 43: logLinearPrior ################################################### lambda[["prior"]] <- apply(as.matrix(lambda[,1:2]),1, function(x) exp(-1 * sum(x^2))) ################################################### ### code chunk number 44: logLinearPriorPlot ################################################### print(levelplot(prior ~ Var1 + Var2, lambda,xlab=expression(lambda[1]),ylab=expression(lambda[2]),colorkey=F,col.regions=my.grayscale)) ################################################### ### code chunk number 45: logLinearPosterior ################################################### lambda[["posterior"]] <- lambda$lik * lambda$prior ################################################### ### code chunk number 46: logLinearPosteriorPlot ################################################### print(levelplot(prior ~ Var1 + Var2, lambda,xlab=expression(lambda[1]),ylab=expression(lambda[2]),colorkey=F,col.regions=my.grayscale)) ################################################### ### code chunk number 47: needsDoingLogLin ################################################### dat <- read.table("../data/needs_doing_data/needs.txt",header=T) dat$Response <- ifelse(dat$Response=="ing",1,0) dat$Anim1 <- factor(ifelse(dat$Anim=="abst","abst","conc")) model.logit <- glm(Response ~ Anim1 + sqrt(Dep.Length), data=dat, family=binomial) # data processing to get data in format for log-linear/Poisson model dat.for.loglin <- with(dat,as.data.frame(as.table(tapply(Response, list(Anim1=Anim1,Dep.Length=Dep.Length,Response=Response),length)))) names(dat.for.loglin)[4] <- "x" dat.for.loglin$DL <- dat.for.loglin$Dep.Length dat.for.loglin$Dep.Length <- as.numeric(as.character(dat.for.loglin$DL)) dat.for.loglin$Response <- as.numeric(as.character(dat.for.loglin$Response)) dat.for.loglin$x <- sapply(dat.for.loglin$x, function(x) ifelse(is.na(x), 0, x)) model.loglin <- glm(x ~ Anim1*DL + Response + Response:(Anim1 + sqrt(Dep.Length)),data=dat.for.loglin,family="poisson") summary(model.loglin)$coef[c(32,62,63),] summary(model.logit)$coef ################################################### ### code chunk number 48: lexdecNdens ################################################### m1 <- lm(exp(RTlexdec) ~ Ncount, subset(english, AgeSubject %in% "young")) summary(m1) m2 <- lm(exp(RTlexdec) ~ WrittenFrequency + Ncount, subset(english, AgeSubject %in% "young")) summary(m2) m0 <- lm(exp(RTlexdec) ~ WrittenFrequency, subset(english, AgeSubject %in% "young")) anova(m0, m2) ################################################### ### code chunk number 49: lexdecNonwordNdens ################################################### dat.nonwords <- read.table("../data/bicknell_etal_2010jml_data/nonword-data.txt",header=T) m1 <- lm(rt ~ neighbors + prime.freq, dat.nonwords) m0 <- lm(rt ~ prime.freq, dat.nonwords) summary(m1) anova(m0, m1) ################################################### ### code chunk number 50: stratifiedVariance ################################################### library(mvtnorm) library(lme4) beta <- c(70,70) ## beta[1] corresponds to the intercept; beta[2] corresponds to the intercept plus the effect Sigma.b <- matrix(c(10,0,0,10),2,2) ## in this case, condition-specific speaker sensitivities are uncorrelated sigma.e <- 10 m <- 6 n <- 10 f <- function(ignore) { df <- df.rep(expand.grid(x=c("Initial","Medial"),subj=paste("S",rep(1:m))),n) B <- rmvnorm(m,mean=c(0,0),sigma=Sigma.b) df$y <- with(df,beta[x] + B[cbind(subj,(as.numeric(x)))] + rnorm(dim(df)[1],0,sigma.e)) p.wrong <- anova(lm(y ~ x*subj,df))[1,5] p.rm <- summary(aov(y ~ x + Error(subj/(x)),df))[["Error: subj:x"]][[1]][1,5] return(c(p.wrong=p.wrong,p.rm=p.rm)) } system.time(p.vals <- sapply(1:100,f)) ################################################### ### code chunk number 51: stratifiedVarianceShow ################################################### cat("Frequency of Type I error for NON-repeated measures analysis:",mean(p.vals[1,] < 0.05)) cat("Frequency of Type I error for repeated-measures analysis:",mean(p.vals[2,] < 0.05)) ################################################### ### code chunk number 52: rohdeEtalSecondSpilloverRegion ################################################### dat.rlk <- read.table("../data/rohde_levy_kehler_data/rohde-etal-IC-RC-spr.txt", header=T) means <- with(subset(dat.rlk,! filler),tapply(rt,crit,mean)) sds <- with(subset(dat.rlk,! filler),tapply(rt,crit,sd)) dat.rlk$is.outlier <- with(dat.rlk,rt > means[crit] + 3*sds[crit] & ! filler) f <- function(dat,remove.outliers=F) { if(remove.outliers) dat <- subset(dat, ! is.outlier) cat("***By-subjects analysis:***\n") dat.bysubj <- ddply(subset(dat,crit %in% "RC_VERB+2"),c("subj","verb","attachment"),function(df) return(c(rt=mean(df$rt)))) cat("***Condition means:***\n") print(with(dat.bysubj, tapply(rt,list(verb,attachment),mean))) print(summary(aov(rt ~ verb * attachment + Error(subj/(verb*attachment)),dat.bysubj))) cat("***By-items analysis:***\n") dat.byitem <- ddply(subset(dat,crit %in% "RC_VERB+2"),c("item","verb","attachment"),function(df) return(c(rt=mean(df$rt)))) cat("***Condition means:***\n") print(with(dat.byitem, tapply(rt,list(verb,attachment),mean))) print(summary(aov(rt ~ verb * attachment + Error(item/(verb*attachment)),dat.byitem))) } ################################################### ### code chunk number 53: rohdeEtalSecondSpilloverRegionShow ################################################### f(dat.rlk,F) ## no outlier removal f(dat.rlk,T) ## with outlier removal ################################################### ### code chunk number 54: PronQuantSetup ################################################### library(languageR) ################################################### ### code chunk number 55: PronQuant ################################################### pq <- function(rec,th) { if(rec=="pronominal" & th=="nonpronominal") 1 else if(rec=="nonpronominal" & th=="pronominal") -1 else 0 } PronQuant <- with(dative,mapply(pq,PronomOfRec,PronomOfTheme)) m.2 <- glm(RealizationOfRecipient=="PP" ~ PronQuant, data=dative,family="binomial") ################################################### ### code chunk number 56: PronQuantPrintResults ################################################### summary(m.2) ################################################### ### code chunk number 57: PronQuantModelComparison ################################################### m.1 <- glm(RealizationOfRecipient=="PP" ~ PronomOfRec + PronomOfTheme, dative,family="binomial") ################################################### ### code chunk number 58: PronQuantModelComparisonPrintResults ################################################### anova(m.2,m.1,test="Chisq") ################################################### ### code chunk number 59: symmetricBaselinePriors ################################################### ### part 3: no prior penalty on intercept, but penalty on all else library(rms) dat <- data.frame(x=rep(c("pro","pro","notPro","notPro"),c(8,2,2,8)),y=rep(c("NP","PP","NP","PP"),c(8,2,2,8))) m <- lrm(y~x,dat,penalty=1) predict(m,dat,type="fitted")