#CO2 cumulation ###################################################
#Merge all the datafiles in 1 big dataframe, discard low R² 
folder_path <-  "C:/Users/AVienne/OneDrive - Universiteit Antwerpen/Labexp 3 (start Oct 2022)/Picarro/Slopes"
files <- list.files(path = folder_path, full.names = TRUE)
# Read all the matched Excel files into a list
all_slopefiles <- lapply(files, read.csv)
all_slopefiles[[32]]<- all_slopefiles[[32]] [,colnames(all_slopefiles[[1]])] #this df had more columns
all_slopefiles_df <- do.call(rbind, all_slopefiles)   
all_slopefiles_df$Date <- gsub("19_01_2023", "16_01_2023", all_slopefiles_df$Date)

#we did not measure at 19/1/2023, the file should be 16/1/2023, day 55 is the correct one!!!
#Replace run two sorting scripts again!!!

#Merged in the line above, now sort & add time in days after amendment
library (lubridate)
all_slopefiles_df$day <- as.numeric(dmy(all_slopefiles_df$Date) - dmy("22/11/2022") ) #amendmentdate = subtracted here
all_slopefiles_df <- all_slopefiles_df [which(all_slopefiles_df$R2_CO2_12>0.9 & all_slopefiles_df$R2_CO2_13>0.9 ),]
#1046 / 1182 , approx. 88% of the data is kept

# plot (all_slopefiles_df$day,all_slopefiles_df$dCH4_ppm_s, ylim = c(0,1))
# points(all_slopefiles_df[which(all_slopefiles_df$R2_CH4<0.9),]$day, all_slopefiles_df[which(all_slopefiles_df$R2_CH4<0.9),]$dCH4_ppm_s, col = "red")

#fill data gaps for temperature from another file to calculate density 
library(readxl)
manual_co2_data <- read_excel("C:/Users/AVienne/OneDrive - Universiteit Antwerpen/Labexp 3 (start Oct 2022)/CO2_CH4_dataframe.xlsx",                        sheet = "CO2dataframe")
indices_NA_temp <- c(which(is.na (all_slopefiles_df$T_.C))  , which (all_slopefiles_df$T_.C==0)) #600 points

for (i in (1: length (indices_NA_temp))){
  index <- indices_NA_temp[i]
  DAY <-   all_slopefiles_df[index, ]$day
  Date <-   all_slopefiles_df[index, ]$Date
  
  if (DAY >max (manual_co2_data$Day)){
    #add the correct temperature for all these dates manually
    if (Date=="24_10_2023") {all_slopefiles_df[index, ]$T_.C <- 18     }
    if (Date=="25_10_2023") {all_slopefiles_df[index, ]$T_.C <- 19     }
    if (Date=="9_11_2023") {all_slopefiles_df[index, ]$T_.C <- 21    }
    if (Date=="23_11_2023") {all_slopefiles_df[index, ]$T_.C <- 20.5    }
    if (Date=="13_12_2023") {all_slopefiles_df[index, ]$T_.C <- 19    }
  }
  else {   
    pot <- all_slopefiles_df[index, ]$PotNo
    all_slopefiles_df[index, ]$T_.C <-     manual_co2_data[which(manual_co2_data$Day == DAY & manual_co2_data$potNo==pot), ]$temperature
  }}


#do an identical loop for SWC, note that there were some soilmoisture data points as SWC (0.xx) and some as %, filter these out!!
#for statistics of slope and SWC
all_slopefiles_df$CH4density <- 101325*16/(8.314*(273+all_slopefiles_df$T_.C))/1000
all_slopefiles_df$CO2density <- 101325*44/(8.314*(273+all_slopefiles_df$T_.C))/1000

#convert to µmol GHG/m2/s:

#unit check: µLCO2/L air/s *gCH4/L CO2 * L air in chamber / Area chamber / gCO2/mol CO2 = µmol CO2/m2/s, same for CH4
all_slopefiles_df$µmolCH4m2s <- all_slopefiles_df$CH4slope*all_slopefiles_df$CH4density*0.98125/0.008171/16
all_slopefiles_df$µmolCO2m2s <- all_slopefiles_df$CO2slope*all_slopefiles_df$CO2density*0.98125/0.008171/44

data <- all_slopefiles_df


#add treatment column
data$Treatment <- NA  # Initialize the Treatment column

# Assign treatments based on PotNo ranges
data$Treatment[which (data$PotNo >= 1 & data$PotNo <= 5)] <- "C3"
data$Treatment[which(data$PotNo >= 6 & data$PotNo <= 10)] <- "FB3"
data$Treatment[data$PotNo >= 11 & data$PotNo <= 15] <- "CB3"
data$Treatment[data$PotNo >= 16 & data$PotNo <= 20] <- "CD3"
data$Treatment[data$PotNo >= 21 & data$PotNo <= 25] <- "MD3"
data$Treatment[data$PotNo >= 26 & data$PotNo <= 30] <- "FD3"
data$Treatment[data$PotNo >= 31 & data$PotNo <= 35] <- "C1"
data$Treatment[data$PotNo >= 41 & data$PotNo <= 45] <- "FB1"
data$Day <- data$day

#merge manual CO2 data (EGM data start)
manual_co2_data$PotNo <- manual_co2_data$potNo
colnames(data)
colnames(manual_co2_data)

# Find the common columns
common_columns <- intersect(colnames(data), colnames(manual_co2_data))
manual_co2_selected <- manual_co2_data[, common_columns]
manual_co2_selected$Date <- as.Date(manual_co2_selected$Date, format = "%d_%m_%Y")
manual_co2_selected <- manual_co2_selected[which (manual_co2_selected$Day<17),]
data$Date <- as.Date(data$Date, format = "%d_%m_%Y")
data_selected <- data[, common_columns]
data_selected <- rbind(data_selected, manual_co2_selected)
data_selected <- data_selected[-which (data_selected$Treatment =="C1Bac" |   data_selected$Treatment =="FB1Bac"),]
####Cumulation
CH4molarmass <- 16
CO2molarmass <- 44

#### For loop for each pot
data_selected<- data_selected[complete.cases(data_selected$µmolCO2m2s),]
Treatmentvector <- unique (data_selected$PotNo)
big_df <- list () 
dayvector <- sort(unique (data_selected$Day))
dayvector <- dayvector[-which(dayvector==0)]

a<- length(dayvector)
CO2 <- matrix(0, nrow = a+1, ncol = length(Treatmentvector))
CO2avgCumul <- matrix(0, nrow = a+1, ncol = length(Treatmentvector))
previous_meanCO2 <- NA  # Initialize previous_meanCO2


for (i in (1: (length(unique(dayvector))-1))){
for (j in (1: length(Treatmentvector))) { 
    potno <- Treatmentvector[j]
    lastday <- dayvector[i + 1]
    firstday <- dayvector[i]
    firstCO2 <- data_selected[which(data_selected$PotNo == potno & data_selected$Day == firstday),]$µmolCO2m2s
    lastCO2 <- data_selected[which(data_selected$PotNo == potno & data_selected$Day == lastday),]$µmolCO2m2s
    
    if (length(firstCO2) == 0 & length(lastCO2) == 0) {
      if (!is.na(previous_meanCO2)) {
        meanCO2 <- previous_meanCO2  # Use previous non-zero meanCO2 value
      } else {
        next  # Skip to the next iteration if previous_meanCO2 is not available
      }
    } else if (length(firstCO2) == 0) {
      meanCO2 <- lastCO2
    } else if (length(lastCO2) == 0) {
      meanCO2 <- firstCO2
    } else {
      meanCO2 <- mean(firstCO2, lastCO2)
    }
    # Store the current meanCO2 for future use
    previous_meanCO2 <- meanCO2
    
    Daydiff_s <- (lastday-firstday)*3600*24
  CO2gain <- meanCO2*Daydiff_s
 CO2[i+1,j] <- CO2gain*44*10000/10^(12)
 CO2avgCumul [i+1, j] <-  CO2avgCumul [i, j] + CO2[i+1,j] #ton CO2/ha
  }}

TonC_havector_389d <- CO2avgCumul[31,]*12/44
vector_treatments <- c(rep ("C3",4),rep ("FB3",5),rep ("CB3",5),rep ("CD3",5),rep ("MD3",5),rep ("FD3",5),rep ("C1",5),rep ("FB1",5))
df <- data.frame(potnr = Treatmentvector, TonC_havector_389d = TonC_havector_389d, Treatment = vector_treatments)

#add factors to df 
df$SOC  <- as.numeric (with(df,
                              ifelse(Treatment == "C1", 1,
                                     ifelse(Treatment == "FB1", 1, 3))))

df$Basalt <- as.numeric (with(df,
                                ifelse(Treatment == "FB1", 1,
                                       ifelse(Treatment == "FB3", 1,
                                              ifelse(Treatment == "CB3", 1,0)))))

df$Dunite <- as.numeric (with(df,
                                ifelse(Treatment == "FD3", 1,
                                       ifelse(Treatment == "CD3", 1,
                                              ifelse(Treatment == "MD3", 1,0)))))


df$rock <-  with(df,
                   ifelse(Treatment == "FB3", "Basalt",
                          ifelse(Treatment == "CB3","Basalt" ,
                                 ifelse(Treatment == "FB1","Basalt" ,
                                        ifelse(Treatment == "C1","aNone" ,
                                               ifelse(Treatment == "C3", "aNone","Dunite"))))))


df$sizedunite <- as.numeric (with(df,
                                    ifelse(Treatment == "FD3", 71,
                                           ifelse(Treatment == "MD3", 278,
                                                  ifelse(Treatment == "CD3", 1030,
                                                         ifelse(Treatment %in% c("FB1", "FB3"), 0,
                                                                ifelse(Treatment == "CB3", 0, 0)))))))

df$sizebasalt <- as.numeric (with(df,
                                    ifelse(Treatment == "FD3", 0,
                                           ifelse(Treatment == "MD3", 0,
                                                  ifelse(Treatment == "CD3", 0,
                                                         ifelse(Treatment %in% c("FB1", "FB3"), 292,
                                                                ifelse(Treatment == "CB3", 670, 0)))))))


#split dataset
dfCO2 <- df[complete.cases(df$TonC_havector_389d ),]
dfCO2FB_C <- dfCO2[which(dfCO2$Treatment=="C1"|dfCO2$Treatment=="C3"|dfCO2$Treatment=="FB3"|dfCO2$Treatment=="FB1"),]
dfCO2D_B_size <- dfCO2[-which(dfCO2$Treatment=="C1"|dfCO2$Treatment=="FB1"),]

#stats
#full factorial Basalt: 1) normalize for T & SWC, 
modelCO2 <- lm (data = dfCO2FB_C, TonC_havector_389d ~ rock+SOC  + (1|potnr))
summary (modelCO2)

modelCO2 <- lm (data = dfCO2D_B_size, TonC_havector_389d ~ rock+sizedunite+sizebasalt + (1|potnr))
summary (modelCO2)

library (dplyr)
# Calculate mean and SE
summary_df <- dfCO2 %>%
  group_by(Treatment) %>%
  summarise(
    mean_TonC = mean(TonC_havector_389d),
    se_TonC = sd(TonC_havector_389d) / sqrt(n())
  )

# Plot
ggplot(summary_df, aes(x = Treatment, y = mean_TonC)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  geom_errorbar(aes(ymin = mean_TonC - se_TonC, ymax = mean_TonC + se_TonC), width = 0.2) +
  labs(title = "Mean ± SE of TonC_havector_389d by Treatment", x = "Treatment", y = "TonC_havector_389d") +
  theme_minimal()