# R SCRIPT USED FOR DATA ANALYSIS IN THE PAPER ENTITLED
# "LED light gradient as a screening tool for light quality responses in model plant species"
# FROM
# Pierre LEJEUNE, Anthony FRATAMICO, Frédéric BOUCHÉ, Samuel HUERGA-FERNÁNDEZ, Pierre TOCQUIN, Claire PÉRILLEUX
# 
# Written by Anthony FRATAMICO, PhD at University of Liège (Belgium)
# Laboratory of Plant Physiology
# 
# Last revision: 2020-10-06
#
# =================================================================================================
# LIBRARIES
# -------------------------------------------------------------------------------------------------
library(data.table)
library(ggplot2)
# =================================================================================================
# OPTIONS
# -------------------------------------------------------------------------------------------------
# Output results?
write_result <- TRUE # Write result or not
write_graphs <- FALSE # Write graphs or not
# Folders
output_dir <- file.path(getwd(), "output")
graphParamBySp_dir <- file.path(output_dir, "graphParamBySp") # see section "GRAPHS: REGRESSIONS BY SPECIES, TIME AND PARAMETERS"
graphPvalVsTime_dir <- file.path(output_dir, "graphPvalVsTime")
graphRVsTime_dir <- file.path(output_dir, "graphRVsTime")
graphSlopeVsTime_dir <- file.path(output_dir, "graphSlopeVsTime")
# Create output directories if necessary
if(write_result | write_graphs){
  dir.create(output_dir, showWarnings = FALSE)
}
if(write_graphs){
  dir.create(graphParamBySp_dir, showWarnings = FALSE)
  dir.create(graphPvalVsTime_dir, showWarnings = FALSE)
  dir.create(graphRVsTime_dir, showWarnings = FALSE)
  dir.create(graphSlopeVsTime_dir, showWarnings = FALSE)
}
# =================================================================================================
# IMPORT DATA SET
# -------------------------------------------------------------------------------------------------
data <- fread("selected_data.csv")
# Red/Blue ratio
data[, log_rb := log10(pfd_red/pfd_blue)]
# =================================================================================================
# REGRESSIONS
# -------------------------------------------------------------------------------------------------
# Function
makeReg <- function(x, y){
  # Data
  dt <- na.omit(data.table(x,y))
  npts <- nrow(dt)
  # Output list
  result <- list("npts" = npts,
                 "slope" = NA,
                 "intercept" = NA,
                 "rsq" = NA,
                 "pval_slope" = NA,
                 "pval_intercept" = NA)

  if(npts<2){ # minimum of two points for regression
    return(result)
  }
  # Store regression'parameters
  reg <- lm(y ~ x, dt)
  regCoef <- coef(reg)
  regSum <- summary(reg)
  # Fill results in list
  result$slope = as.numeric(regCoef[2])
  result$intercept = as.numeric(regCoef[1])
  if("r.squared" %in% names(regSum)){
    result$rsq <- as.numeric(regSum$r.squared)
  }
  if(nrow(regSum$coefficients) > 0){
    result$pval_intercept <- as.numeric(regSum$coefficients[1,4])
  }
  if(nrow(regSum$coefficients) > 1){
    result$pval_slope <- as.numeric(regSum$coefficients[2,4])
  }
  return(result)
}
# -------------------------------------------------------------------------------------------------
# Apply function
result <- data[treatment != "White", # Remove "White" treatment because no regression possible (length of unique 'x' = 1)
               lapply(makeReg(x = log_rb, y = value), as.numeric),
               by=list(species, treatment, das, dag, parameter)]
# -------------------------------------------------------------------------------------------------
# Categories of significance
result[pval_slope > 0.05, signif := "NS"] # Not Significant
result[pval_slope <= 0.05 & pval_slope > 0.01, signif := "LS"] # Low Significant
result[pval_slope <= 0.01 & pval_slope > 0.001, signif := "MS"] # Mean Significant
result[pval_slope <= 0.001, signif := "HS"] # High Significant
# -------------------------------------------------------------------------------------------------
# Write table
if(write_result){
  write.csv(x=result, file=file.path(output_dir, "processed_data.csv"), row.names = F)
}
# =================================================================================================
# GRAPHS: REGRESSIONS BY SPECIES, TIME AND PARAMETERS 
# -------------------------------------------------------------------------------------------------
if(write_graphs){
  for(sp in unique(result$species)){
    for(param in unique(result$parameter)){
      # Data
      dataGradient <- data[species==sp & parameter == param & treatment == "Gradient"]
      dataWhite <- data[species==sp & parameter == param & treatment == "White"]
      dataReg <- result[species==sp & parameter == param]
      nbCol <- length(unique(dataGradient$dag))
      # Plot
      gReg <- ggplot(data=dataGradient,
                     mapping=aes(x=log_rb, y=value, shape=treatment))+
        # points
        geom_jitter(data=dataWhite,
                    mapping=aes(x=log_rb, y=value, shape=treatment),
                    size=1, width=0.1)+
        geom_jitter(size=2, width=0.1)+
        scale_shape_manual(values=c(1,19)) +
        # Regression
        stat_smooth(aes(x=log_rb, y=value), method = "lm", size = 1.5, color="white") +
        geom_abline(data=dataReg, # Check makeReg vs stat_smooth
                    aes(slope=slope, intercept=intercept, color=signif),
                    lwd=1)+
        # Significance
        scale_color_manual(name="",
                           values=c('#BBBBBB','#CCBB44','#228833','#4477AA'),
                           breaks=c("NS", "LS", "MS", "HS"),
                           labels=c("NS", expression("p"<=0.05), expression("p"<=0.01), expression("p"<=0.001))) +
        # Facet
        facet_wrap(.~dag, scales="free_y", ncol = nbCol)+
        # Labs
        labs(title=paste("Specie:", sp, "\tParameter:", param),
             x=expression(Log[10]~(PFD[Red]/PFD[Blue])),
             y=param,
             color="p-value",
             shape="Treatment")
      
      # Output
      png(filename = file.path(graphParamBySp_dir, paste0(gsub(x=sp, pattern=" ", replacement="-"), "_", param, ".png")),
          res=300, width=500*nbCol+250, height=500+250)
      print(gReg)
      dev.off()
    }
  }
}
# =================================================================================================
# GRAPHS: REGRESSION PARAMETERS VS TIME BY SPECIE AND PARAMETER
# -------------------------------------------------------------------------------------------------
if(write_graphs){
  # Dimensions of facet and PNG file
  nbParam <- length(unique(result$parameter))
  nbCol <- ceiling(nbParam^0.5)
  nbRow <- ceiling(nbParam/nbCol)
  # -----------------------------------------------------------------------------------------------
  # P-VALUE
  if(TRUE){ # Enable/Disable
    for(sp in unique(result$species)){
      # Plot
      gPval <- ggplot(data=result[species == sp],
                      mapping=aes(x=dag, y=pval_slope, color=signif))+
        # V-Line = Day of the end of gradient and return to white light
        geom_vline(aes(xintercept=30), lwd=0.75, col="gray40")+
        geom_vline(aes(xintercept=30), lwd=0.5, lty="dashed", col="white")+
        # Points
        geom_point(shape=19)+
        # Significance
        scale_color_manual(name="p-value",
                           values=c('#BBBBBB','#CCBB44','#228833','#4477AA'),
                           breaks=c("NS", "LS", "MS", "HS"),
                           labels=c("NS", expression("p"<=0.05), expression("p"<=0.01), expression("p"<=0.001))) +
        # Facet
        facet_wrap(.~parameter, scales = "free_y", ncol = nbCol)+
        # Labs
        scale_y_log10()+
        labs(title=paste("Specie:", sp, "\t Regression p-value"),
             x="Time (days)",
             y="p-value of slope")
      # Output
      png(filename = file.path(graphPvalVsTime_dir, paste0(gsub(x=sp, pattern=" ", replacement="-"), "_PvalVsTime.png")),
          res=300, width=500*nbCol+250, height=500*nbRow)
      print(gPval)
      dev.off()
    }
  }
  # -----------------------------------------------------------------------------------------------
  # R²
  if(TRUE){ # Enable/Disable
    for(sp in unique(result$species)){
      # Plot
      gPval <- ggplot(data=result[species == sp],
                      mapping=aes(x=dag, y=rsq, color=signif))+
        # V-Line = Day of the end of gradient and return to white light
        geom_vline(aes(xintercept=30), lwd=0.75, col="gray40")+
        geom_vline(aes(xintercept=30), lwd=0.5, lty="dashed", col="white")+
        # Points
        geom_point(shape=19)+
        # Significance
        scale_color_manual(name="p-value",
                           values=c('#BBBBBB','#CCBB44','#228833','#4477AA'),
                           breaks=c("NS", "LS", "MS", "HS"),
                           labels=c("NS", expression("p"<=0.05), expression("p"<=0.01), expression("p"<=0.001))) +
        # Facet
        facet_wrap(.~parameter, scales = "free_y", ncol = nbCol)+
        # Labs
        labs(title=paste("Specie:", sp, "\t Regression R²"),
             x="Time (days)",
             y="R²")
      # Output
      png(filename = file.path(graphRVsTime_dir, paste0(gsub(x=sp, pattern=" ", replacement="-"), "_RVsTime.png")),
          res=300, width=500*nbCol+250, height=500*nbRow)
      print(gPval)
      dev.off()
    }
  }
  # -----------------------------------------------------------------------------------------------
  # SLOPE
  if(TRUE){ # Enable/Disable
    for(sp in unique(result$species)){
      # Plot
      gPval <- ggplot(data=result[species == sp],
                      mapping=aes(x=dag, y=slope, color=signif))+
        # V-Line = Day of the end of gradient and return to white light
        geom_vline(aes(xintercept=30), lwd=0.75, col="gray40")+
        geom_vline(aes(xintercept=30), lwd=0.5, lty="dashed", col="white")+
        # Points
        geom_point(shape=19)+
        # Significance
        scale_color_manual(name="p-value",
                           values=c('#BBBBBB','#CCBB44','#228833','#4477AA'),
                           breaks=c("NS", "LS", "MS", "HS"),
                           labels=c("NS", expression("p"<=0.05), expression("p"<=0.01), expression("p"<=0.001))) +
        # Facet
        facet_wrap(.~parameter, scales = "free_y", ncol = nbCol)+
        # Labs
        labs(title=paste("Specie:", sp, "\t Regression slope"),
             x="Time (days)",
             y="Slope")
      # Output
      png(filename = file.path(graphSlopeVsTime_dir, paste0(gsub(x=sp, pattern=" ", replacement="-"), "_SlopeVsTime.png")),
          res=300, width=500*nbCol+250, height=500*nbRow)
      print(gPval)
      dev.off()
    }
  }
}