R is a free software environment for statistical computing and graphics.
http://www.r-project.org/
Home
| Getting Started
| Schedule
| References
| FAQ
| Discussion | Tom's site
For more information, please contact Paul Geissler (Paul_Geissler@usgs.gov).
Michael Crawley, 2007,The R Book, Chapter 16.
See the text for descriptions. You can copy and paste the statements below into thr R Commander script window and execute them.
Anything after # on a line is a comment. I have added annotations as comments and shown the output as comments following each command.
Use when you know how many are in contrasting categories (alive/dead, male/female, infected/uninfected). With Poisson count date,
we know how many times an event occurred, but not how many times it did NOT occur.
rm(list = ls()) # removes previous variables d=read.table("http://www.bio.ic.ac.uk/research/mjcraw/therbook/data/sexratio.txt",header=T); attach(d); d # density females males # sex ratio in insects # 1 1 1 0 # 2 4 3 1 # 3 10 7 3 # 4 22 18 4 # 5 55 22 33 # 6 121 41 80 # 7 210 52 158 # 8 444 79 365 par(mfrow=c(1,2)); p=males/(males+females) plot(density,p,ylab="proportion male") plot(log(density),p,ylab="proportion male"); par(mfrow=c(1,1)) y=cbind(males,females) m1=glm(y~density,binomial); summary(m1) # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 0.0807368 0.1550376 0.521 0.603 # density 0.0035101 0.0005116 6.862 6.81e-12 *** # (Dispersion parameter for binomial family taken to be 1) # Null deviance: 71.159 on 7 degrees of freedom # Residual deviance: 22.091 on 6 degrees of freedom # should equal df # AIC: 54.618 m2=glm(y~log(density),binomial); summary(m2) # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) -2.65927 0.48758 -5.454 4.92e-08 *** # log(density) 0.69410 0.09056 7.665 1.80e-14 *** # (Dispersion parameter for binomial family taken to be 1) # Null deviance: 71.1593 on 7 degrees of freedom # Residual deviance: 5.6739 on 6 degrees of freedom # should equal df # AIC: 38.201 # better model x=seq(0,6,01); plot(log(density),p,ylab="proportion male"); lines(x,predict(m2,list(density=exp(x)),type="response")) #### estimating LD50 and LD90 from bioassay date ########################### page 576 rm(list = ls()) # removes previous variables d=read.table("http://www.bio.ic.ac.uk/research/mjcraw/therbook/data/bioassay.txt",header=T); attach(d); d # dose dead batch # size # 1 1 2 100 # 2 3 10 90 # 3 10 40 98 # 4 30 96 100 # 5 100 98 100 y=cbind(dead,batch-dead); m=glm(y~log(dose),binomial); dose.p(m,p=c(0.5,0.9,0.95)) # Dose SE # p = 0.50: 2.306981 0.07772065 # LD50 lethal dose 50% of population # p = 0.90: 3.425506 0.12362080 # LD90 # p = 0.95: 3.805885 0.15150043 # LD95 #### proportion data with categorical explanatory variables ################## page 577 rm(list = ls()) # removes previous variables d=read.table("http://www.bio.ic.ac.uk/research/mjcraw/therbook/data/germination.txt",header=T); attach(d); d # Germanation of seeds of two genotypes of the parasitic plant Orobanche and two extracts from # host plants (bean and cucomber) that were used to stimulate germination. Two-way factorial analysis of deviance. # count sample Orobanche extract # 1 10 39 a75 bean # . . . y=cbind(count,sample-count); m1=glm(y~Orobanche*extract,binomial); summary(m1) # Residual deviance: 33.278 on 17 degrees of freedom # overdispersion - they should be equal m1=glm(y~Orobanche*extract,quasibinomial); summary(m1) # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -0.4122 0.2513 -1.640 0.1193 # Orobanche[T.a75] -0.1459 0.3045 -0.479 0.6379 # extract[T.cucumber] 0.5401 0.3409 1.584 0.1315 # Orobanche[T.a75]:extract[T.cucumber] 0.7781 0.4181 1.861 0.0801 . # (Dispersion parameter for quasibinomial family taken to be 1.861832) # Null deviance: 98.719 on 20 degrees of freedom # Residual deviance: 33.278 on 17 degrees of freedom # should equal df, use quasibinomial m2=glm(y~Orobanche+extract,quasibinomial); summary(m2) # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -0.7005 0.2199 -3.186 0.00512 ** # Orobanche[T.a75] 0.2705 0.2257 1.198 0.24635 # extract[T.cucumber] 1.0647 0.2104 5.061 8.14e-05 *** # (Dispersion parameter for quasibinomial family taken to be 2.128368) # Null deviance: 98.719 on 20 degrees of freedom # Residual deviance: 39.686 on 18 degrees of freedom anova(m1,m2,test="F") # use F test with quasibinomial # Analysis of Deviance Table # Model 1: y ~ Orobanche * extract # Model 2: y ~ Orobanche + extract # Resid. Df Resid. Dev Df Deviance F Pr(>F) # 1 17 33.278 # 2 18 39.686 -1 -6.408 3.4418 0.08099 . # interaction is not significant # No evidence of different responses to extracts. anova(m2,test="F") # Analysis of Deviance Table # Terms added sequentially (first to last) # Df Deviance Resid. Df Resid. Dev F Pr(>F) # NULL 20 98.719 # Orobanche 1 2.544 19 96.175 1.1954 0.2887 # extract 1 56.489 18 39.686 26.5412 6.692e-05 *** m3=glm(y~extract,quasibinomial); anova(m2,m3,test="F") # Analysis of Deviance Table # Model 1: y ~ Orobanche + extract # Model 2: y ~ extract # Resid. Df Resid. Dev Df Deviance F Pr(>F) # 1 18 39.686 # 2 19 42.751 -1 -3.065 1.4401 0.2457 summary(m3) # final model # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -0.5122 0.1531 -3.345 0.0034 ** # estimates on transformed scale # extract[T.cucumber] 1.0574 0.2118 4.992 8.1e-05 *** # (Dispersion parameter for quasibinomial family taken to be 2.169821) # Null deviance: 98.719 on 20 degrees of freedom # Residual deviance: 42.751 on 19 degrees of freedom tapply(predict(m3,type="response"),extract,mean) # bean cucumber # predicted proportions germinate # 0.3746835 0.6330275 tapply(count,extract,sum)/tapply(sample,extract,sum) bean cucumber # observed proportions (divide totals) 0.3746835 0.6330275 #### analysis of covariance with binomial data ########################## page 581 # Flowering in 5 varities of perennial plats, sprayed with growth promoters. rm(list = ls()) # removes previous variables d=read.table("http://www.bio.ic.ac.uk/research/mjcraw/therbook/data/flowering.txt",header=T); attach(d); d # flowered number dose variety # 1 0 12 1 A # . . . y=cbind(flowered, number-flowered); pf=flowered/number; scatterplot(pf~dose|variety) m=glm(y~dose*variety,binomial); summary(m) # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) -4.59165 1.03215 -4.449 8.64e-06 *** # dose 0.41262 0.10033 4.113 3.91e-05 *** # variety[T.B] 3.06197 1.09317 2.801 0.005094 ** # variety[T.C] 1.23248 1.18812 1.037 0.299576 # variety[T.D] 3.17506 1.07516 2.953 0.003146 ** # variety[T.E] -0.71466 1.54849 -0.462 0.644426 # dose:variety[T.B] -0.34282 0.10239 -3.348 0.000813 *** # dose:variety[T.C] -0.23039 0.10698 -2.154 0.031274 * # dose:variety[T.D] -0.30481 0.10257 -2.972 0.002961 ** # dose:variety[T.E] -0.00649 0.13292 -0.049 0.961057 # (Dispersion parameter for binomial family taken to be 1) # Null deviance: 303.350 on 29 degrees of freedom # Residual deviance: 51.083 on 20 degrees of freedom # overdispersion may be due to lack of fit. ## run a model for each variety separately plt=function(v){ ty=y[variety==v,]; td=dose[variety==v]; mt=glm(ty~td,binomial) pd=seq(0,32,0.2); pp=predict(mt,list(td=pd),type="response") p=ty[,1]/(ty[,1]+ty[,2]); plot(p~td); lines(pd,pp) } par(mfrow=c(2,3)); plt("A");plt("B"); plt("C"); plt("D"); plt("E"); par(mfrow=c(1,1)) ## Some varieties do not fit the logistic model and need a "hump". Add I(td^2) plt=function(v){ ty=y[variety==v,]; td=dose[variety==v]; mt=glm(ty~td+I(td^2),binomial) pd=seq(0,32,0.2); pp=predict(mt,list(td=pd),type="response") p=ty[,1]/(ty[,1]+ty[,2]); plot(p~td); lines(pd,pp) } par(mfrow=c(2,3)); plt("A");plt("B"); plt("C"); plt("D"); plt("E"); par(mfrow=c(1,1)) m=glm(y~dose*variety+I(dose^2),binomial); summary(m) # Residual deviance: 33.641 on 19 degrees of freedom # overdispersion m1=glm(y~dose*variety+I(dose^2),quasibinomial); summary(m1) # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -5.081310 1.524669 -3.333 0.00350 ** # dose 0.539558 0.149004 3.621 0.00182 ** # variety[T.B] 2.787401 1.616284 1.725 0.10084 # variety[T.C] 0.345351 1.837732 0.188 0.85293 # variety[T.D] 2.761404 1.588723 1.738 0.09837 . # variety[T.E] -0.949808 2.233017 -0.425 0.67536 # I(dose^2) -0.006167 0.002211 -2.789 0.01171 * # dose:variety[T.B] -0.259977 0.146017 -1.780 0.09100 . # dose:variety[T.C] -0.141160 0.153381 -0.920 0.36894 # dose:variety[T.D] -0.228755 0.145574 -1.571 0.13259 # dose:variety[T.E] 0.012697 0.184766 0.069 0.94593 ## We can now proceed with model simplification. #### converting complex contegency tables to proportions ############################## page 584 rm(list = ls()) # removes previous variables d=read.table("http://www.bio.ic.ac.uk/research/mjcraw/therbook/data/lizards.txt",header=T); attach(d); d # n sun height perch time species # Schoener's lizards # 1 20 Shade High Broad Morning opalinus # . . . ds=d[order(species,sun,height,perch,time),]; attach(ds); # must be in order to use cbind g=ds[species=="grahamii",]; o=ds[species=="opalinus",] liz=data.frame(sun=g$sun,height=g$height,perch=g$perch,time=g$time,ag=g$n,ao=o$n); attach(liz); liz # sum height perch time ag ao # 1 Shade High Broad Afternoon 4 4 # . . . y=cbind(ag,ao); m1=glm(y~sun*height*perch*time,binomial); m2=step(m1); summary(m2) # Start: AIC=102.82 # Df Deviance AIC # - sun:height:perch:time 1 2.180e-10 100.82 # <none> 3.582e-10 102.82 # Step: AIC=100.82 # Df Deviance AIC # - sun:height:time 2 0.442 97.266 # - sun:perch:time 2 0.810 97.634 # - height:perch:time 2 3.222 100.046 # <none> 2.18e-10 100.824 # - sun:height:perch 1 2.709 101.533 # Step: AIC=97.27 # Df Deviance AIC # - sun:perch:time 2 1.071 93.896 # <none> 0.442 97.266 # - height:perch:time 2 4.648 97.472 # - sun:height:perch 1 3.111 97.936 # Step: AIC=93.9 # Df Deviance AIC # - sun:time 2 3.340 92.165 # <none> 1.071 93.896 # - sun:height:perch 1 3.302 94.126 # - height:perch:time 2 5.791 94.615 # Step: AIC=92.16 # Df Deviance AIC # <none> 3.340 92.165 # - sun:height:perch 1 5.827 92.651 # - height:perch:time 2 8.542 93.366 # Call: # glm(formula = y ~ sun + height + perch + time + sun:height + # sun:perch + height:perch + height:time + perch:time + sun:height:perch + # height:perch:time, family = binomial) # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) -0.8297 0.5120 -1.620 0.1051 # sun[T.Sun] 0.4791 0.4744 1.010 0.3126 # height[T.Low] -20.9069 6551.9690 -0.003 0.9975 # perch[T.Narrow] 0.1751 0.7481 0.234 0.8149 # time[T.Mid.day] -0.9101 0.4272 -2.130 0.0331 * # time[T.Morning] -0.9314 0.4637 -2.009 0.0445 * # sun[T.Sun]:height[T.Low] 19.7912 6551.9690 0.003 0.9976 # sun[T.Sun]:perch[T.Narrow] 0.2416 0.6987 0.346 0.7295 # height[T.Low]:perch[T.Narrow] 21.1239 6551.9691 0.003 0.9974 # height[T.Low]:time[T.Mid.day] -0.2446 0.9279 -0.264 0.7920 # height[T.Low]:time[T.Morning] 0.5732 0.9260 0.619 0.5359 # perch[T.Narrow]:time[T.Mid.day] 0.2140 0.6486 0.330 0.7415 # perch[T.Narrow]:time[T.Morning] 0.7106 0.6980 1.018 0.3087 # sun[T.Sun]:height[T.Low]:perch[T.Narrow] -19.9665 6551.9691 -0.003 0.9976 # height[T.Low]:perch[T.Narrow]:time[T.Mid.day] -0.6021 1.3489 -0.446 0.6553 # height[T.Low]:perch[T.Narrow]:time[T.Morning] -3.2054 1.6106 -1.990 0.0466 * # (Dispersion parameter for binomial family taken to be 1) # Null deviance: 70.1018 on 22 degrees of freedom # Residual deviance: 3.3403 on 7 degrees of freedom # Crawley does not use quasibinomial # AIC: 92.165 m3=step(m1,k=log(length(sun))); summary(m3) # using BIC does not select an many parameters # Start: AIC=131.1 # Df Deviance AIC # - sun:height:perch:time 1 2.180e-10 127.92 # <none> 3.582e-10 131.10 # Step: AIC=127.92 # Df Deviance AIC # - sun:height:time 2 0.442 122.005 # - sun:perch:time 2 0.810 122.373 # - height:perch:time 2 3.222 124.785 # - sun:height:perch 1 2.709 127.450 # <none> 2.18e-10 127.919 # Step: AIC=122 # Df Deviance AIC # - sun:perch:time 2 1.071 116.279 # - height:perch:time 2 4.648 119.855 # - sun:height:perch 1 3.111 121.497 # <none> 0.442 122.005 # Step: AIC=116.28 # Df Deviance AIC # - sun:time 2 3.340 112.191 # - height:perch:time 2 5.791 114.642 # - sun:height:perch 1 3.302 115.331 # <none> 1.071 116.279 # Step: AIC=112.19 # Df Deviance AIC # - height:perch:time 2 8.542 111.037 # - sun:height:perch 1 5.827 111.500 # <none> 3.340 112.191 # Step: AIC=111.04 # Df Deviance AIC # - perch:time 2 8.551 104.690 # - height:time 2 9.483 105.622 # - sun:height:perch 1 10.903 110.220 # <none>gt; 8.542 111.037 # Step: AIC=104.69 # Df Deviance AIC # - height:time 2 9.491 99.274 # - sun:height:perch 1 10.909 103.870 # <none> 8.551 104.690 # Step: AIC=99.27 # Df Deviance AIC # - sun:height:perch 1 11.767 98.371 # <none> 9.491 99.274 # - time 2 21.278 104.704 # Step: AIC=98.37 # Df Deviance AIC # - sun:perch 1 11.783 95.210 # - height:perch 1 11.979 95.406 # - sun:height 1 13.848 97.275 # <none> 11.767 98.371 # - time 2 23.245 103.493 # Step: AIC=95.21 # Df Deviance AIC # - height:perch 1 11.984 92.233 # - sun:height 1 13.923 94.172 # <none> 11.783 95.210 # - time 2 23.385 100.456 # Step: AIC=92.23 # Df Deviance AIC # - sun:height 1 14.205 91.275 # <none> 11.984 92.233 # - time 2 23.714 97.606 # - perch 1 24.921 101.991 # Step: AIC=91.28 # Df Deviance AIC # <none> 14.205 91.275 # - sun 1 21.892 95.784 # - time 2 25.802 96.516 # - perch 1 27.335 101.227 # - height 1 36.271 110.163 # Call: # glm(formula = y ~ sun + height + perch + time, family = binomial) # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) -1.2079 0.3536 -3.416 0.000634 *** # sun[T.Sun] 0.8473 0.3224 2.628 0.008585 ** # height[T.Low] -1.1300 0.2571 -4.395 1.11e-05 *** # perch[T.Narrow] 0.7626 0.2113 3.610 0.000306 *** # time[T.Mid.day] -0.9639 0.2816 -3.423 0.000619 *** # time[T.Morning] -0.7368 0.2990 -2.464 0.013730 * # (Dispersion parameter for binomial family taken to be 1) # Null deviance: 70.102 on 22 degrees of freedom # Residual deviance: 14.205 on 17 degrees of freedom # AIC: 83.029 anova(m2,test="Chisq") # Analysis of Deviance Table # Model: binomial, link: logit # Response: y # Terms added sequentially (first to last) # Df Deviance Resid. Df Resid. Dev P(>|Chi|) # NULL 22 70.102 # sun 1 6.472 21 63.629 0.011 # height 1 26.134 20 37.495 3.185e-07 # perch 1 11.694 19 25.802 0.001 # time 2 11.597 17 14.205 0.003 # sun:height 1 2.220 16 11.984 0.136 # sun:perch 1 0.005 15 11.979 0.941 # height:perch 1 0.212 14 11.767 0.645 # height:time 2 0.858 12 10.909 0.651 # perch:time 2 0.006 10 10.903 0.997 # sun:height:perch 1 2.361 9 8.542 0.124 # height:perch:time 2 5.201 7 3.340 0.074 anova(m3,test="Chisq") # Analysis of Deviance Table # Model: binomial, link: logit # Response: y # Terms added sequentially (first to last) # Df Deviance Resid. Df Resid. Dev P(>|Chi|) # AIC and BIC give very different models # NULL 22 70.102 # sun 1 6.472 21 63.629 0.011 # height 1 26.134 20 37.495 3.185e-07 # perch 1 11.694 19 25.802 0.001 # time 2 11.597 17 14.205 0.003