# Analysing new set of data for Biogeosciences paper # By Tatsuro Tanioka (tanio003@umn.edu) # started 200309, testing out for Fe experiments library(meta) library(Matrix) library(metafor) library(lattice) library(ggplot2) library(ggpubr) library(dplyr) library(gridExtra) library(lemon) library(readxl) library(data.table) library(gridExtra) # Setting the directory getwd() list.files() res_path<-paste(getwd(),"/results/scalc/res/",sep="") plot_path<-paste(getwd(),"/results/scalc/plots/",sep="") #dev.off() # clears figures # Load a function to calcualte s-factor #source("functions_scalc.R") source("functions_eff_logrr.R") #### 0. Read data from excel spreadsheet dataex <- read_excel("New_data_190211a.xlsx",sheet = "Fe") #dataex.N #head(dataex.P) #str(dataex.P) # function for conditional data replacement (modified from Kevin Ushey) mutate_when <- function(data, print = FALSE, ...) { dots <- eval(substitute(alist(...))) for (i in seq(1, length(dots), by = 2)) { condition <- eval(dots[[i]], envir = data) condition <- !is.na(condition) & condition == TRUE mutations <- eval(dots[[i + 1]], envir = data[condition, , drop = FALSE]) if (print[1] != FALSE & !all(condition == FALSE)) {print(names(mutations)) print(cbind(data[condition, c(print, names(mutations))], "substituted value" = mutations[[1]])) } data[condition, names(mutations)] <- mutations } return(data) } # Removing SDs exceeding the max theoretical value of N*sqrt(X) cat( '\nsubstituting SDs and means for cases reporting an SD exceeding their max theoretical value\n' ) dataex %<>% mutate_when( Sc > Yc * sqrt(Nc), list(Sc = NA), print = c("Study","Es_id","Nc","Yc") ) dataex %<>% mutate_when( St > Yt * sqrt(Nt), list(St = NA), print = c("Study","Es_id","Nt","Yt") ) # Converting C:N to N:C and C:P to P:C cat( 'converting SD_C:N and SD_C:P to SD_N:C and SD_P:C in permil' ) dataex %<>% mutate_when( !is.na(Yc) & Yc > 0, list(Sc = 1000.0/Yc * (Sc/Yc)), print = c("Study","Es_id","Nc","Sc") ) dataex %<>% mutate_when( !is.na(Yt) & Yt > 0, list(St = 1000.0/Yt * (St/Yt)), print = c("Study","Es_id","Nt","St") ) cat( 'converting C:N and C:P to N:C and P:C in permil' ) dataex %<>% mutate_when( !is.na(Yc) & Yc > 0, list(Yc = 1000.0/Yc), print = c("Study","Es_id","Nc","Yc") ) dataex %<>% mutate_when( !is.na(Yt) & Yt > 0, list(Yt = 1000.0/Yt), print = c("Study","Es_id","Nt","Yt") ) #### Calculating s-factor for individual experiemntal unit #### col_names<-c("Es_id","Study","Species","Phylum","PFT","Variable","Mode","Phase","Temp","LD","Ntype","logRR","var_logRR", "CIlow_logRR","CIup_logRR") res<-data.frame(matrix(NA,nrow=length(unique(dataex$Es_id)),ncol=length(col_names))) names(res)<-col_names a=0 for (i in sort(unique(dataex$Es_id))) { a=a+1 dataex[which(dataex$Es_id==i),]->tmp tmp$Es_id[1]->res$Es_id[a] as.vector(tmp$Study[1])->res$Study[a] as.vector(tmp$Species[1])->res$Species[a] as.vector(tmp$Phylum[1])->res$Phylum[a] as.vector(tmp$PFT[1])->res$PFT[a] as.vector(tmp$Variable[1])->res$Variable[a] as.vector(tmp$Mode[1])->res$Mode[a] as.vector(tmp$Phase[1])->res$Phase[a] as.vector(tmp$Temp[1])->res$Temp[a] as.vector(tmp$LD[1])->res$LD[a] as.vector(tmp$LD[1])->res$LD[a] as.vector(tmp$Ntype[1])->res$Ntype[a] # as.vector(tmp$conditions)->res2[a,5:8] # as.vector(tmp$Variable[1])->res$response_variable[a] #dat<-data.frame(Xt=tmp$Xt,Xc=tmp$Xc,Yt=tmp$Yt,Yc=tmp$Yc,St=tmp$St,Sc=tmp$Sc,Nt=tmp$Nt,Nc=tmp$Nc) dat2<-data.frame(Yt=tmp$Yt,Yc=tmp$Yc,Nt=tmp$Nt,Nc=tmp$Nc,St=tmp$St,Sc=tmp$Sc) #twopoint_sfactor(dat)->res[a,12:19] # calls a function (from functions_scalc.R) which calculates s-factor for individual experiment twopoint_logrr(dat2)->res[a,12:15] # calls a function (from functions_eff_logrr.R) which calculates ln(RR) for individual experiment } res # check the output # Export to CSV write.csv(res,paste(res_path,"res_all_190211a_Fe.csv",sep=""),sep="\t",row.names = T) #### Carrying out meta-analysis #### # Separate Variables based on CP and CN resSort <- split(res, list(res$Variable)) resSort_CP <- resSort$CP resSort_CN <- resSort$CN (total_CP_num <- nrow(resSort_CP)) (total_CN_num <- nrow(resSort_CN)) #### Carrying out meta-analysis mixed-effects model #### logrr_all_CP <- rma(data=resSort_CP, yi=logRR,vi = var_logRR, method = "REML") logrr_all_CN <- rma(data=resSort_CN, yi=logRR, vi = var_logRR, method = "REML") # Remove any data with a studentized residual value exceeding the aboslute value of 3 following Viechtbauer et al. (2010), Research Synthesis Methods,1(2), 112-125. resSort_CP %>% filter(abs(rstudent(logrr_all_CP)$resid) < 3.0) -> resSort_CP_select resSort_CN %>% filter(abs(rstudent(logrr_all_CN)$resid) < 3.0) -> resSort_CN_select # Total number of papers excluded (total_CP_num_removed <- nrow(resSort_CP) - nrow(resSort_CP_select)) (total_CN_num_removed <- nrow(resSort_CN) - nrow(resSort_CN_select)) # Redo meta-analysis with outliers removed (logrr_all_CP <- rma(data=resSort_CP_select, yi=logRR, vi = var_logRR,method = "REML")) (logrr_all_CN <- rma(data=resSort_CN_select, yi=logRR, vi = var_logRR, method = "REML")) logrr_all_CP logrr_all_CN #### Subgroup analyses using ln(RR)#### ##### 1. PFT (Coc,Dia,Din,Euk,Pro,Dz) #### # C:P (logrr_all_CP_PFT <- rma(data=resSort_CP_select, yi=logRR, vi = var_logRR, mods = ~ PFT, method = "REML")) # p = 0.72 summary(logrr_all_CP_PFT) resSort_CP_select %>% count(PFT) (logrr_all_CP_PFT_Dia <- logrr_all_CP_PFT$beta[1]) (logrr_all_CP_PFT_Dia_Cilb <- logrr_all_CP_PFT$ci.lb[1]) (logrr_all_CP_PFT_Dia_Ciub <- logrr_all_CP_PFT$ci.ub[1]) (logrr_all_CP_PFT_Euk <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$beta[3]) (logrr_all_CP_PFT_Euk_Cilb <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$ci.lb[3]) (logrr_all_CP_PFT_Euk_Ciub <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$ci.ub[3]) (logrr_all_CP_PFT_Pro <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$beta[4]) (logrr_all_CP_PFT_Pro_Cilb <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$ci.lb[4]) (logrr_all_CP_PFT_Pro_Ciub <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$ci.ub[4]) (logrr_all_CP_PFT_Dz <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$beta[2]) (logrr_all_CP_PFT_Dz_Cilb <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$ci.lb[2]) (logrr_all_CP_PFT_Dz_Ciub <- logrr_all_CP_PFT$beta[1] + logrr_all_CP_PFT$ci.ub[2]) # C:N (logrr_all_CN_PFT <- rma(data=resSort_CN_select, yi=logRR, vi = var_logRR, mods = ~ PFT, method = "REML")) # p = 0.26 summary(logrr_all_CN_PFT) resSort_CN_select %>% count(PFT) (logrr_all_CN_PFT_Dia <- logrr_all_CN_PFT$beta[1]) (logrr_all_CN_PFT_Dia_Cilb <- logrr_all_CN_PFT$ci.lb[1]) (logrr_all_CN_PFT_Dia_Ciub <- logrr_all_CN_PFT$ci.ub[1]) (logrr_all_CN_PFT_Euk <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$beta[3]) (logrr_all_CN_PFT_Euk_Cilb <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$ci.lb[3]) (logrr_all_CN_PFT_Euk_Ciub <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$ci.ub[3]) (logrr_all_CN_PFT_Pro <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$beta[4]) (logrr_all_CN_PFT_Pro_Cilb <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$ci.lb[4]) (logrr_all_CN_PFT_Pro_Ciub <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$ci.ub[4]) (logrr_all_CN_PFT_Dz <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$beta[2]) (logrr_all_CN_PFT_Dz_Cilb <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$ci.lb[2]) (logrr_all_CN_PFT_Dz_Ciub <- logrr_all_CN_PFT$beta[1] + logrr_all_CN_PFT$ci.ub[2]) ##### 2. PFT (Eukaryotes or Prokaryotes) #### # C:P resSort_CP_select$PFT_eukpro <- ifelse((resSort_CP_select$PFT == "Pro" |resSort_CP_select$PFT == "Dz"), c("Prokaryotes"), c("Eukaryotes")) (logrr_all_CP_PFT_eukpro <- rma(data=resSort_CP_select, yi=logRR, vi = var_logRR, mods = ~ PFT_eukpro, method = "REML")) # p = 0.43 resSort_CP_select %>% count(PFT_eukpro) (logrr_all_CP_PFT_Eukaryotes <- logrr_all_CP_PFT_eukpro$beta[1]) (logrr_all_CP_PFT_Eukaryotes_Cilb <- logrr_all_CP_PFT_eukpro$ci.lb[1]) (logrr_all_CP_PFT_Eukaryotes_Ciub <- logrr_all_CP_PFT_eukpro$ci.ub[1]) (logrr_all_CP_Prokaryotes <- logrr_all_CP_PFT_eukpro$beta[1] + logrr_all_CP_PFT_eukpro$beta[2]) (logrr_all_CP_Prokaryotes_Cilb <- logrr_all_CP_PFT_eukpro$beta[1] + logrr_all_CP_PFT_eukpro$ci.lb[2]) (logrr_all_CP_Prokaryotes_Ciub <- logrr_all_CP_PFT_eukpro$beta[1] + logrr_all_CP_PFT_eukpro$ci.ub[2]) # C:N resSort_CN_select$PFT_eukpro <- ifelse((resSort_CN_select$PFT == "Pro" |resSort_CN_select$PFT == "Dz"), c("Prokaryotes"), c("Eukaryotes")) (logrr_all_CN_PFT_eukpro <- rma(data=resSort_CN_select, yi=logRR, vi = var_logRR, mods = ~ PFT_eukpro, method = "REML")) # p = 0.14 resSort_CN_select %>% count(PFT_eukpro) (logrr_all_CN_PFT_Eukaryotes <- logrr_all_CN_PFT_eukpro$beta[1]) (logrr_all_CN_PFT_Eukaryotes_Cilb <- logrr_all_CN_PFT_eukpro$ci.lb[1]) (logrr_all_CN_PFT_Eukaryotes_Ciub <- logrr_all_CN_PFT_eukpro$ci.ub[1]) (logrr_all_CN_Prokaryotes <- logrr_all_CN_PFT_eukpro$beta[1] + logrr_all_CN_PFT_eukpro$beta[2]) (logrr_all_CN_Prokaryotes_Cilb <- logrr_all_CN_PFT_eukpro$beta[1] + logrr_all_CN_PFT_eukpro$ci.lb[2]) (logrr_all_CN_Prokaryotes_Ciub <- logrr_all_CN_PFT_eukpro$beta[1] + logrr_all_CN_PFT_eukpro$ci.ub[2]) ##### 3. PFT (Temperate or cold-water species) #### # Use temperature of 10 degC as a cut-off # C:P resSort_CP_select$PFT_Temp <- ifelse(((resSort_CP_select$Temp >= 10) | is.na(resSort_CP_select$Temp)), c("Temperate"), c("Cold")) (logrr_all_CP_PFT_Temp<- rma(data=resSort_CP_select, yi=logRR, vi = var_logRR, mods = ~ PFT_Temp, method = "REML")) # p = 0.67 resSort_CP_select %>% count(PFT_Temp) (logrr_all_CP_PFT_Cold <- logrr_all_CP_PFT_Temp$beta[1]) (logrr_all_CP_PFT_Cold_Cilb <- logrr_all_CP_PFT_Temp$ci.lb[1]) (logrr_all_CP_PFT_Cold_Ciub <- logrr_all_CP_PFT_Temp$ci.ub[1]) (logrr_all_CP_Hot <- logrr_all_CP_PFT_Temp$beta[1] + logrr_all_CP_PFT_Temp$beta[2]) (logrr_all_CP_Hot_Cilb <- logrr_all_CP_PFT_Temp$beta[1] + logrr_all_CP_PFT_Temp$ci.lb[2]) (logrr_all_CP_Hot_Ciub <- logrr_all_CP_PFT_Temp$beta[1] + logrr_all_CP_PFT_Temp$ci.ub[2]) # C:N resSort_CN_select$PFT_Temp <- ifelse(((resSort_CN_select$Temp >= 10) | is.na(resSort_CN_select$Temp)), c("Temperate"), c("Cold")) (logrr_all_CN_PFT_Temp<- rma(data=resSort_CN_select, yi=logRR, vi = var_logRR, mods = ~ PFT_Temp, method = "REML")) # p = 0.83 resSort_CN_select %>% count(PFT_Temp) (logrr_all_CN_PFT_Cold <- logrr_all_CN_PFT_Temp$beta[1]) (logrr_all_CN_PFT_Cold_Cilb <- logrr_all_CN_PFT_Temp$ci.lb[1]) (logrr_all_CN_PFT_Cold_Ciub <- logrr_all_CN_PFT_Temp$ci.ub[1]) (logrr_all_CN_Hot <- logrr_all_CN_PFT_Temp$beta[1] + logrr_all_CN_PFT_Temp$beta[2]) (logrr_all_CN_Hot_Cilb <- logrr_all_CN_PFT_Temp$beta[1] + logrr_all_CN_PFT_Temp$ci.lb[2]) (logrr_all_CN_Hot_Ciub <- logrr_all_CN_PFT_Temp$beta[1] + logrr_all_CN_PFT_Temp$ci.ub[2]) ##### 3. Culture Mode (Batch, semi-continuous, chemostat, turbidostat) #### # C:P (logrr_all_CP_mode <- rma(data=resSort_CP_select, yi=logRR, vi =var_logRR, mods = ~ Mode, method = "REML")) summary(logrr_all_CP_mode) resSort_CP_select %>% count(Mode) (logrr_all_CP_mode_Batch <- logrr_all_CP_mode$beta[1]) (logrr_all_CP_mode_Batch_Cilb <- logrr_all_CP_mode$ci.lb[1]) (logrr_all_CP_mode_Batch_Ciub <- logrr_all_CP_mode$ci.ub[1]) (logrr_all_CP_mode_Semi <- logrr_all_CP_mode$beta[1] + logrr_all_CP_mode$beta[2]) (logrr_all_CP_mode_Semi_Cilb <- logrr_all_CP_mode$beta[1] + logrr_all_CP_mode$ci.lb[2]) (logrr_all_CP_mode_Semi_Ciub <- logrr_all_CP_mode$beta[1] + logrr_all_CP_mode$ci.ub[2]) # C:N (logrr_all_CN_mode <- rma(data=resSort_CN_select, yi=logRR, vi =var_logRR, mods = ~ Mode, method = "REML")) summary(logrr_all_CN_mode) resSort_CN_select %>% count(Mode) (logrr_all_CN_mode_Batch <- logrr_all_CN_mode$beta[1]) (logrr_all_CN_mode_Batch_Cilb <- logrr_all_CN_mode$ci.lb[1]) (logrr_all_CN_mode_Batch_Ciub <- logrr_all_CN_mode$ci.ub[1]) (logrr_all_CN_mode_Semi <- logrr_all_CN_mode$beta[1] + logrr_all_CN_mode$beta[2]) (logrr_all_CN_mode_Semi_Cilb <- logrr_all_CN_mode$beta[1] + logrr_all_CN_mode$ci.lb[2]) (logrr_all_CN_mode_Semi_Ciub <- logrr_all_CN_mode$beta[1] + logrr_all_CN_mode$ci.ub[2]) ##### 4. Growth Phase (Lag, Exponential, Stationary, Unspecified) #### # C:P (logrr_all_CP_phase <- rma(data=resSort_CP_select, yi=logRR, vi =var_logRR, mods = ~ Phase, method = "REML")) summary(logrr_all_CP_phase) resSort_CP_select %>% count(Phase) (logrr_all_CP_phase_Expo <- logrr_all_CP_phase$beta[1]) (logrr_all_CP_phase_Expo_Cilb <- logrr_all_CP_phase$ci.lb[1]) (logrr_all_CP_phase_Expo_Ciub <- logrr_all_CP_phase$ci.ub[1]) (logrr_all_CP_phase_Stat <- logrr_all_CP_phase$beta[1] + logrr_all_CP_phase$beta[2]) (logrr_all_CP_phase_Stat_Cilb <- logrr_all_CP_phase$beta[1] + logrr_all_CP_phase$ci.lb[2]) (logrr_all_CP_phase_Stat_Ciub <- logrr_all_CP_phase$beta[1] + logrr_all_CP_phase$ci.ub[2]) # C:N (logrr_all_CN_phase <- rma(data=resSort_CN_select, yi=logRR, vi =var_logRR, mods = ~ Phase, method = "REML")) summary(logrr_all_CN_phase) resSort_CN_select %>% count(Phase) (logrr_all_CN_phase_Expo <- logrr_all_CN_phase$beta[1]) (logrr_all_CN_phase_Expo_Cilb <- logrr_all_CN_phase$ci.lb[1]) (logrr_all_CN_phase_Expo_Ciub <- logrr_all_CN_phase$ci.ub[1]) (logrr_all_CN_phase_Stat <- logrr_all_CN_phase$beta[1] + logrr_all_CN_phase$beta[2]) (logrr_all_CN_phase_Stat_Cilb <- logrr_all_CN_phase$beta[1] + logrr_all_CN_phase$ci.lb[2]) (logrr_all_CN_phase_Stat_Ciub <- logrr_all_CN_phase$beta[1] + logrr_all_CN_phase$ci.ub[2])