# NRES 798 201501 Lab 5 # Objectives of todays labs # Implement and understand ANOVA analysis (fixed factor) # Perform pairwise evaluations beween multiple comparisons (Tukey's test) # Create publication suitable figures # Run an ANCOVA and evaluate model complexity # data in the package "faraway" will be analysed in today's lab install.packages("faraway") library(faraway) ######################################### # ANOVA: one-way ######################################### # Data on the time it takes blood from animials on different diets to coagulate. # From Box, Hunter and Hunter 1978. data(coagulation) # TODO: How many diet levels are examined? # TODO: How many replicates (animals) were sampled in each level? # TODO: Examine the data and determine if it is normaly distrubited and homoscedastic # TODO: Box-plot: are there potential problems with data from any of the levels? # TODO: Q-Q plot: what does the plot suggest about the normality, skewness and kurtosis of the data? # TODO: Residuals does the data appear to be homoscedastic? # Why are only three levels displayed # Hint: Use jitter() for the plotting of the fit values. What does this do? # TODO: Normality test: what does the w and p-value indicate about the normality? # Testing for Homogeneity of variances # Bartlett's test can be used to test if the variance is the same for three or more samples # e.g. bartlett.test(coag ~ diet, data=coagulation) # TODO: Perform an ANOVA and evaluate whether or not there are differences beween the diet levels. # TODO: How do the coefficient estimates for each level differ if a zero intercept is included or not? # i.e. coag ~ diet vs. coag ~ diet -1, # TODO: Determine which levels are statitically different from each other # Tukey's test (or the Honest Significant Difference test) is commonly used to where differences occur between multiple comparisons. # Method 1 install.packages("agricolae") # package agricolae contains the function HSD.test library(agricolae) # format HSD.test(responce,levels,DFerror,MSerror,group=TRUE) summary(aov(coag ~ diet,coagulation)) HSDresults <- HSD.test(coagulation$coag,coagulation$diet,20,5.6,group = TRUE) str(HSDresults) # determine the structure of the output HSDresults$groups # show where the differences occur # TODO: how do you interpret the "groups" differences? HSDresults # produce the full output # TODO: what is the exact p-value associated with each multiple comparison? # Method 2 TukeyHSD(aov(coag ~ diet, coagulation)) # TODO: How do you interpret the "p adj" for each pair? # TODO: What is the best way to plot one-way ANOVA data? # e.g. boxplot() or bar.group() (function in "agricolae") bar.group(HSDresults$groups,ylim=c(0,80),density=4,border="blue") # TODO: create a publication quality figure using boxplot() that includes meaningfull X and Y lables, and designations for where the differences between multiple comparisons occur. # Hint: use "xlab", "ylab", "ylim", "cex" and importantly "mtext" and/or "text" ######################################### # ANOVA two-way ######################################### data(rats) # data on the survial time of rats after consumeing 1 of 3 poisions (factor 1), and in 4 treatments (factor 2) # TODO: what happens when the plot function is used on a formula? # e.g. plot(time ~ treat + poison, data=rats) # TODO: use lm() to constuct a two-way ANOVA with an interaction term # Hint: the format for multiple regression also apples to multi-way ANOVA # TODO: how do you interpret the interaction coefficient estimates? (e.g. summary(lm())) # plotting interactions interaction.plot(rats$treat,rats$poison,rats$time) interaction.plot(rats$poison,rats$treat,rats$time) # TODO: Plot the residuals of the model and evaluate if the current model is sufficient. # TODO: How do Log and reciprocal (1/y) transformations influence the fit? # TODO: Based on the anova(lm()) output can the model be simplified, and should it be simiplified? # Hint look at both anova() and summary() # TODO: Run a "simpler" model and compare the output. # TODO: Creat a publication grade figure that includes the interaction (interaction.plot()) # TODO: modify the legend and use different colors for one factor and different line types for the second factor. ######################################### # Random block design ANOVA ######################################### data(penicillin) # Data examining the yield of penicillin produced from 4 differet processes (treat) # Large homogeneous batches of the raw material is difficult to produce so blocks (blends) are used. # TODO: Plot the model to graphically examine the treatment and the blocking factor # TODO; Does the treatment effect or the blocking effect appear to be larger? # TODO: Run the random block ANOVA and interpret the treatment and the blocking factor. # TODO: Use interaction.plot() to examine if and how treatment potentialy interacts with the blocking factor. # TODO: Should an interaction term ever be included in the model. ######################################### # Nested ANOVA ######################################### # Fomula for nested ANOVA: y ~ a/b/c , read a, with b nested in a, with c nested in b and a # Example data of stream DOC sampled from reaches nested wihtin streams dat <- data.frame(DOC = rnorm(20,mean = 5, sd = 1.3), reach = rep(letters[1:4], 5), stream = rep(c("Douglas","Speedy","Spey","Obrian","Willow"), each = 4)) # TODO: examine how the sum of squares is calculated in a factorial vs. nested structure # The correct model specification with the reach error nested in the stream error would look like nmod <- aov(DOC ~ stream + Error(stream/reach), dat) summary(nmod) # The function lmer() will be used later to calculate F and p-values ######################################### # ANCOVA ######################################### # Download the Gain.txt data and place in working directory (From Crawley) datpath <- "C:/Users/Che/UNBC_work/Courses/NRES-798/NRES-798-Labs/Gain.txt" dat <- read.table(datpath,header=TRUE) # Data on the weight gain of individuals as a function of Sex and Genotype with age as a potential covariate # TODO: examine the relationship between weight and the three factors # TODO: construct the full (maximum) model including both categorical factors and the continuous covariate # e.g. y ~ a*b*c # TODO evaluate if any of the higher order interactions can be removed.