library(ggplot2) #Exercise 1: standard error Tab10 <- read.table("./Table 1.10 Drennan.txt", header = T) # First calculate the mean and standard deviation of the lengths, and determine the number of rows in Tab10 Tab10 <- read.table("./Table 1.10 Drennan.txt", header = T) Length10 <- Tab10[,2] Length10_mean <- mean(Length10) Length10_sd <- sd(Length10) Tab10_nrow <- nrow(Tab10) # Then, use the equations given above to calculate the standard errors, and the confidence interval Length10_SE <- Length10_sd / sqrt(Tab10_nrow) Length10_CI95 <- 1.96 * Length10_SE # The paste() command can then be used to produce text output paste("The mean length is", round(Length10_mean, digits = 2), "±", round(Length10_CI95, digits = 2), "(CL = 0.95)") paste("The probability is 95% that the sample is from a population with a mean length between", round(Length10_mean - Length10_CI95, digits = 2), "and", round(Length10_mean + Length10_CI95, digits = 2)) #Exercise 2: error bar plots Tab35 <- read.table("./Table 3.5 Drennan.txt", header = T) EBA35 <- Tab35$AREA_HA[Tab35$PERIOD == "EBA"] LBA35 <- Tab35$AREA_HA[Tab35$PERIOD == "LBA"] EBA35_mean <- mean(EBA35) EBA35_sd <- sd(EBA35) EBA35_nrow <- NROW(EBA35) LBA35_mean <- mean(LBA35) LBA35_sd <- sd(LBA35) LBA35_nrow <- NROW(LBA35) EBA35_SE <- EBA35_sd / sqrt(EBA35_nrow) EBA35_CI95 <- 1.96 * EBA35_SE EBA35_CI95LO <- EBA35_mean - EBA35_CI95 / 2 EBA35_CI95HI <- EBA35_mean + EBA35_CI95 / 2 LBA35_SE <- LBA35_sd / sqrt(LBA35_nrow) LBA35_CI95 <- 1.96 * LBA35_SE LBA35_CI95LO <- LBA35_mean - LBA35_CI95 / 2 LBA35_CI95HI <- LBA35_mean + LBA35_CI95 / 2 output_df = data.frame( period = c('EBA', 'LBA'), mean = c(EBA35_mean, LBA35_mean), sd = c(EBA35_sd, LBA35_sd), n = c(EBA35_nrow, LBA35_nrow), SE = c(EBA35_SE, LBA35_SE), CI95 = c(EBA35_CI95, LBA35_CI95), CI95_lo = c(EBA35_CI95LO, LBA35_CI95LO), CI95_hi = c(EBA35_CI95HI, LBA35_CI95HI) ) print(output_df) write.table(output_df, file = "df_ErrorBars.txt") new_plot <- ggplot(data = output_df, aes(period, mean)) new_plot <- new_plot + geom_point() new_plot <- new_plot + geom_errorbar(aes(ymin = (CI95_lo), ymax = (CI95_hi)), width = 0.2) new_plot #Exercise 3: t-test Tab35 <- read.table("./Table 3.5 Drennan.txt", header = T) # we are going to depart from the calculations below for the mean, standard deviation # and standard error for EBA35 and LBA35 EBA35 <- Tab35$AREA_HA[Tab35$PERIOD == "EBA"] LBA35 <- Tab35$AREA_HA[Tab35$PERIOD == "LBA"] EBA35_mean <- mean(EBA35) EBA35_sd <- sd(EBA35) EBA35_nrow <- NROW(EBA35) LBA35_mean <- mean(LBA35) LBA35_sd <- sd(LBA35) LBA35_nrow <- NROW(LBA35) EBA35_SE <- EBA35_sd / sqrt(EBA35_nrow) # first, calculate the pooled standard deviation Sp <- sqrt( ( (EBA35_nrow - 1) * EBA35_sd * EBA35_sd + (LBA35_nrow - 1) * LBA35_sd * LBA35_sd) / (EBA35_nrow + LBA35_nrow - 2) ) # then, calculate the pooled standard error SEp <- Sp * sqrt(1 / EBA35_nrow + 1 / LBA35_nrow) # and finally, calculate the value of t t <- (EBA35_mean - LBA35_mean) / SEp # and compare this to the outcome of the t.test() command t.test(EBA35, LBA35) # Exercise 4: ANOVA # first, let's make some boxplots of the floor areas per site boxplot(Tab124$AREA ~ Tab124$SITE) # then, calculate the basic statistics needed for the comparison # mean, variance and number of observations per site # this can be done quickly using the tapply() command site_mean <- tapply(Tab124$AREA, Tab124$SITE, mean) site_var <- tapply(Tab124$AREA, Tab124$SITE, var) site_n <- tapply(Tab124$AREA, Tab124$SITE, NROW) # if you want to display things nicely, pull them together using the cbind() command and print aov_stats <- cbind(site_n, site_mean, site_var) print(aov_stats) total_n <- nrow(Tab124) m <- nrow(aov_stats) site_n_var <- ((site_n - 1) * site_var) site_m_var <- (site_mean - mean(Tab124$AREA)) ^ 2 varW <- sum(site_n_var) / (total_n - m) varB <- sum(site_n * site_m_var) / (m - 1) Fvalue <- varB / varW # the p-value for the calculated value of F can be obtained using the pf() command pvalue <- 1 - pf(q = Fvalue, m - 1, total_n - 1) aov_results <- cbind(total_n, m, varW, varB, Fvalue, pvalue) print(aov_results) # these outcomes match the result of the aov() command in R summary(aov(Tab124$AREA ~ Tab124$SITE)) # N.B. The `aov()` command will only provide full output when combined with the `summary()` command. # Exercise 5: Tukey HSD Tab124 <- read.table("./Table 12.4 Drennan.txt", header = T) # the trick is to run aov() on the floor area and site number, parse this into the TukeyHSD() command, # and store the results in list of values # this information can then be used directly to print and plot the outcomes tukeyhsd <- TukeyHSD(aov(Tab124$AREA ~ Tab124$SITE)) print(tukeyhsd) plot(tukeyhsd)