# This is a sampler of the R code used for the Mental Lexicon talk # "Form and meaning in the reading of Chinese compounds" # . # The analyses rely on tools in the R packages "lme4" # (which permits subjects and items to be treated as random simultaneously) # and "nlme" (which has some tools for plotting, but only for by-subject analyses). # Thanks to Harald Baayen and Lee Wurm for tips, but neither is responsible # for any misunderstandings -- so caveat emptor! # I first demo how the analyses were done (as summarized in slides 16ff) # (note that the slides show mean RTs, though the stats were actually based on # ALL raw data, and using log(RT) rather than RT). # Then I show how I made the partial effect plots in slide 17. # The data file consists of the following variables: # Subject: ID numbers for subjects (permits grouping as random variable) # Item: ID numbers for items (permits grouping as random variable) # LR: direction (1 = LR, -1 = RL) # Alone: whether direction differed across groups or within (1 = separate experiments, -1 = mixed) # C1S: 1st char transparency (1 = T, -1 = O) # C2S: ditto for 2nd char # C1F: log freq of 1st char # C2F: ditto for 2nd char # WF: log word freq # RC: whether compound is right-headed (1 = yes, -1 = no) # Load the lme4 package (it must first be installed from CRAN) library(lme4) # Name the data (this is for horizontal-running text only) hdat = read.table("HorizontalData.txt",T) # Treat the headers in the data file as variable names attach(hdat) # Test if RT is normally distributed qqnorm(RT) # It's not normal qqnorm(log(RT)) # Now it's better # Simplify notation logRT = log(RT) # The simplest model (by subject only) hdat.lme01 = lmer(logRT~LR*Alone*C1S*C2S+C1F*C2F+WF+(1|Subject), data=hdat) hdat.lme01 # The simplest model (by subject and by item) hdat.lme02 = lmer(logRT~LR*Alone*C1S*C2S+C1F*C2F+WF+(1|Subject)+(1|Item), data=hdat) hdat.lme02 # The above two models are nested, so can be compared to see if items should indeed # be treated as random anova(hdat.lme01,hdat.lme02) # Add RC as additive effect hdat.lmeRC01 = lmer(logRT~LR*Alone*C1S*C2S+RC+C1F*C2F+WF+(1|Subject), data=hdat) hdat.lmeRC01 hdat.lmeRC02 = lmer(logRT~LR*Alone*C1S*C2S+RC+C1F*C2F+WF+(1|Subject)+(1|Item), data=hdat) hdat.lmeRC02 anova(hdat.lmeRC01,hdat.lmeRC02) # Include interactions with RC hdat.lmeRCint01 = lmer(logRT~LR*Alone*C1S*C2S*RC+C1F*C2F+WF+(1|Subject), data=hdat) hdat.lmeRCint01 ## BEST MODEL: By-subj-&-item, including "+/-right-headed compounds" as factor ## and its interactions with direction and whether the direction was within or between subject groups hdat.lmeRCint02 = lmer(logRT~LR*Alone*C1S*C2S*RC+C1F*C2F+WF+(1|Subject)+(1|Item), data=hdat) hdat.lmeRCint02 anova(hdat.lmeRCint01,hdat.lmeRCint02) # Without including C1F*C2F, the effect of C2F is counterintuitive: negative! hdat.lmeRCintnoCFint02 = lmer(logRT~LR*Alone*C1S*C2S*RC+C1F+C2F+WF+(1|Subject)+(1|Item), data=hdat) hdat.lmeRCintnoCFint02 anova(hdat.lmeRCintnoCFint02,hdat.lmeRCint02) # Comparing the relevance of RC and its interactions anova(hdat.lmeRCint02,hdat.lmeRC02) anova(hdat.lmeRC02,hdat.lme02) anova(hdat.lmeRCint02,hdat.lme02) # Now make partial effect plots, which requires a different package library(nlme) # As a reminder, here's the model (by-subj only, using nlme syntax): # hdat.lme = lmer(logRT~LR*Alone*C1S*C2S*RC+C1F*C2F+WF, random=~1|Subject, data=hdat) # But this time we must compute interactions "by hand" so model can plot properly # (not sure why this is, but it just is) LRAlone = LR*Alone LRC1S = LR*C1S AloneC1S = Alone*C1S LRC2S = LR*C2S AloneC2S = Alone*C2S C1SC2S = C1S*C2S LRRC = LR*RC AloneRC = Alone*RC C1SRC = C1S*RC C2SRC = C2S*RC C1FC2F = C1F*C2F LRAloneC1S = LR*Alone*C1S LRAloneC2S = LR*Alone*C2S LRC1SC2S = LR*C1S*C2S AloneC1SC2S = Alone*C1S*C2S LRAloneRC = LR*Alone*RC LRC1SRC = LR*C1S*RC AloneC1SRC = Alone*C1S*RC LRC2SRC = LR*C2S*RC AloneC2SRC = Alone*C2S*RC C1SC2SRC = C1S*C2S*RC LRAloneC1SC2S = LR*Alone*C1S*C2S LRAloneC1SRC = LR*Alone*C1S*RC LRAloneC2SRC = LR*Alone*C2S*RC LRC1SC2SRC = LR*C1S*C2S*RC AloneC1SC2SRC = Alone*C1S*C2S*RC LRAloneC1SC2SRC = LR*Alone*C1S*C2S*RC # Now redefine model using those hand-made interaction factors: hdat.lme = lme(logRT ~ LR + Alone + C1S + C2S + RC + C1F + C2F + WF + LRAlone + LRC1S + AloneC1S + LRC2S + AloneC2S + C1SC2S + LRRC + AloneRC + C1SRC + C2SRC + C1FC2F + LRAloneC1S + LRAloneC2S + LRC1SC2S + AloneC1SC2S + LRAloneRC + LRC1SRC + AloneC1SRC + LRC2SRC + AloneC2SRC + C1SC2SRC + LRAloneC1SC2S + LRAloneC1SRC + LRAloneC2SRC + LRC1SC2SRC + AloneC1SC2SRC + LRAloneC1SC2SRC, random=~1|Subject, data=hdat) # Unlike the lmer function (in lme4 package), the lme function (in nlme package) # lets you extract the coefficients table, so here it is: round(summary(hdat.lme)$tTable,4) # Now define the new dataframe that keeps everything constant except # the independent variable we're looking at # First define the number of observations # (This value is arbitrary; with linear models, it'll be straight anyway) nobs = 8 # Define fake data frame with fixed values: predictdata=data.frame( Subject = rep("99999", nobs), LR = rep(1, nobs), TD = rep(1, nobs), Alone = rep(1, nobs), RC = rep(1, nobs), CC = rep(1, nobs), EC = rep(1, nobs), C1S = rep(1, nobs), C2S = rep(1, nobs), C1F = rep(median(hdat$C1F), nobs), C2F = rep(median(hdat$C2F), nobs), WF = rep(median(hdat$WF), nobs), LRAlone = rep(1, nobs), LRC1S = rep(1, nobs), AloneC1S = rep(1, nobs), LRC2S = rep(1, nobs), AloneC2S = rep(1, nobs), C1SC2S = rep(1, nobs), LRRC = rep(1, nobs), AloneRC = rep(1, nobs), C1SRC = rep(1, nobs), C2SRC = rep(1, nobs), C1FC2F = rep(1, nobs), LRAloneC1S = rep(1, nobs), LRAloneC2S = rep(1, nobs), LRC1SC2S = rep(1, nobs), AloneC1SC2S = rep(1, nobs), LRAloneRC = rep(1, nobs), LRC1SRC = rep(1, nobs), AloneC1SRC = rep(1, nobs), LRC2SRC = rep(1, nobs), AloneC2SRC = rep(1, nobs), C1SC2SRC = rep(1, nobs), LRAloneC1SC2S = rep(1, nobs), LRAloneC1SRC = rep(1, nobs), LRAloneC2SRC = rep(1, nobs), LRC1SC2SRC = rep(1, nobs), AloneC1SC2SRC = rep(1, nobs), LRAloneC1SC2SRC = rep(1, nobs) ) # Default plotting range go across all RT values; # you can improve appearance of plot by adjusting range later ylimit = c(min(logRT), max(logRT)) # In this demo, I'm making four plots: C1F, C2F, C1F*C2F, WF # so let's arrange them in a nice 2*2 matrix, for easier comparison par(mfrow = c(2,2)) # C1F effect predictX = predictdata # Copy data for safe keeping predictX$C1F = seq(min(hdat$C1F), max(hdat$C1F), length = nobs) plot(as.numeric(predict(hdat.lme, predictX, level=0)) ~ predictX$C1F, type="l", ylim=ylimit, xlab="CF1", ylab="log RT", col = "red") # C2F effect predictX = predictdata predictX$C2F = seq(min(hdat$C2F), max(hdat$C2F), length = nobs) plot(as.numeric(predict(hdat.lme, predictX, level=0)) ~ predictX$C2F, type="l", ylim=ylimit, xlab="CF2", ylab="log RT", col = "red") # C1F*C2F interaction predictX = predictdata predictX$C1FC2F = seq(min(hdat$C1F*hdat$C2F), max(hdat$C1F*hdat$C2F), length = nobs) plot(as.numeric(predict(hdat.lme, predictX, level=0)) ~ predictX$C1FC2F, type="l", ylim=ylimit, xlab="CF1 * CF2", ylab="log RT", col = "red") # WF effect predictX = predictdata predictX$WF = seq(min(hdat$WF), max(hdat$WF), length = nobs) plot(as.numeric(predict(hdat.lme, predictX, level=0)) ~ predictX$WF, type="l", ylim=ylimit, xlab="WF", ylab="log RT", col = "red") # Close up shop detach(hdat)