# #load experimental data Tessier
# #SIC calculation
# setwd ("C:/Users/AVienne/OneDrive - Universiteit Antwerpen/Labexp 3 (start Oct 2022)/PAPER_")
# library (readxl)
# data <- read_excel("SIC_calculationdata.xlsx", sheet = "Carbonate", col_names = T)
# data <- data [-which (data$Treatment=="FB1Bac"|data$Treatment=="C1Bac"),]
# blankdata <- data[which (data$Treatment=="BLANK"),]
# elements <- c("Fe","K","Mg","Na")
# data <- data[-which (data$Treatment=="BLANK"),]
# 
# #Blank normalization
# data$Fe_fraction2  <- data$Fe_fraction2 - mean (blankdata$Fe_fraction2)
# data$K_fraction2  <- data$K_fraction2 - mean (blankdata$Fe_fraction2)
# data$Mg_fraction2 <- data$Mg_fraction2 - mean (blankdata$Fe_fraction2)
# data$Ca_fraction2  <- data$Ca_fraction2 - mean (blankdata$Fe_fraction2)
# 
# data$Fe <- data$Fe_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g
# data$K <- data$K_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g
# data$Mg<- data$Mg_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g
# data$Ca<- data$Ca_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g
# 
# Puresilicatesdata <- data[is.na(data$soilmass), ]
# data <- data[!is.na(data$soilmass), ]
# Treatments <- unique (data$Treatment)
# Treatments_letters <- unique(gsub("[0-9]", "", Treatments))
# Treatments <- Treatments_letters [-which (Treatments_letters=="C")]
# 
# #rock/soil ratio? 
# combined_data <- data.frame()
# 
# for (i in (1: length (Treatments))) {
#   data$ Dunite_soil <- 0.11 / data$soilmass #g/g
#   data$Basalt_soil <- 1.1 / data$soilmass #g/g
#   treatment <- Treatments[i]
#   
#   corrected_data <- data[grep(paste0("^",treatment), data$Treatment), ]
#   
#   #to do add if to use dunite or basalt/soil ratio if the rock is dunite or basalt 
#   
#   #correction for all elements:
#   if (treatment == "FB" |treatment == "CB" ){  
#     corrected_data$Ca <- corrected_data$Ca - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Ca)
#     corrected_data$Fe <- corrected_data$Fe - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Fe)
#     corrected_data$K <- corrected_data$K - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$K)
#     corrected_data$Mg <- corrected_data$Mg - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Mg)}
#   
#   if (treatment == "CD" |treatment == "MD" |treatment == "FD" ){ 
#     corrected_data$Ca <- corrected_data$Ca - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Ca)
#     corrected_data$Fe <- corrected_data$Fe - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Fe)
#     corrected_data$K <- corrected_data$K - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$K)
#     corrected_data$Mg <- corrected_data$Mg - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Mg)}
#   
#   combined_data <- rbind(combined_data, corrected_data)
# }
# data_control <- data[which (data$Treatment=="C3"|data$Treatment=="C1" ),]
# combined_data <- rbind(combined_data, data_control)
# #elements in combined_data are in µg/g --> µmol/g
# combined_data$Fe_mol <- combined_data$Fe / 55.845
# combined_data$K_mol <- combined_data$K / 39.098
# combined_data$Ca_mol <- combined_data$Ca / 40.078
# combined_data$Mg_mol <- combined_data$Mg / 24.305
# #µmol C 
# combined_data$molC_g <- 10^(-6)*((combined_data$Fe_mol+combined_data$Mg_mol+combined_data$Ca_mol)*2+combined_data$K_mol)
# combined_data$SICpercent <- 100*combined_data$molC_g*12 
# library (Rmisc)
# sum = summarySE(  combined_data     , measurevar = "SICpercent", groupvar = "Treatment" )
# library (ggplot2)
# 
# treatment_order <- c("C1", "FB1", "C3","CB3","FB3","CD3","MD3","FD3")
# # Convert Treatment column to factor with the specified order
# sum$Treatment <- factor(sum$Treatment, levels = treatment_order)
# #normalize for control soil
# calculate_difference <- function(SIC1, SE1, SIC2, SE2) {
#   diff <- SIC1 - SIC2
#   propagated_se <- sqrt(SE1^2 + SE2^2)
#   return(c(diff, propagated_se))
# }
# # Initialize results list for the comparisons
# results_list <- list()
# # FB1 - C1 comparison
# results_list[["FB1 - C1"]] <- calculate_difference(
#   sum$SICpercent[sum$Treatment == "FB1"], 
#   sum$se[sum$Treatment == "FB1"], 
#   sum$SICpercent[sum$Treatment == "C1"], 
#   sum$se[sum$Treatment == "C1"]
# )
# 
# # Loop through treatments ending with '3' and subtract from C3
# for (treatment in sum$Treatment[grepl("3$", sum$Treatment) & sum$Treatment != "C3"]) {
#   comparison_name <- paste(treatment, "- C3")
#   results_list[[comparison_name]] <- calculate_difference(
#     sum$SICpercent[sum$Treatment == treatment], 
#     sum$se[sum$Treatment == treatment], 
#     sum$SICpercent[sum$Treatment == "C3"], 
#     sum$se[sum$Treatment == "C3"]
#   )
# }
# 
# # Convert the results list into a data frame
# results <- do.call(rbind, lapply(names(results_list), function(name) {
#   cbind(Comparison = name, as.data.frame(t(results_list[[name]])))
# }))
# colnames(results) <- c("Comparison", "Difference", "Propagated_SE")
# 
# results$Treatment <- c("FB1","CB3","CD3","FB3","FD3","MD3")
# treatment_order <- c( "FB1", "CB3","FB3","CD3","MD3","FD3")
# # Convert Treatment column to factor with the specified order
# results$Treatment <- factor(results$Treatment, levels = treatment_order)
# results$SICpercent <- results$Difference
# results$se <- results$Propagated_SE 
# results$soilmass <- c (31.65, 34.40 ,33.41 , 34.40,33.41 ,33.41)
# #ton C/ha
# results$tonC_ha <- (results$SICpercent/100)*results$soilmass*10000/0.22/1000
# results$tonC_ha_se <- (results$se/100)*results$soilmass*10000/0.22/1000
# results$se <- results$Propagated_SE


#SIC calculation
setwd ("C:/Users/AVienne/OneDrive - Universiteit Antwerpen/Labexp 3 (start Oct 2022)/PAPER_")
library (readxl)
library (Rmisc)
library (dplyr)


data <- read_excel("SIC_calculationdata.xlsx", sheet = "Carbonate", col_names = T)
data <- data [-which (data$Treatment=="FB1Bac"|data$Treatment=="C1Bac"),]
blankdata <- data[which (data$Treatment=="BLANK"),]
elements <- c("Fe","K","Mg","Na")
data <- data[-which (data$Treatment=="BLANK"),]

#Blank normalization
data$Fe_fraction2  <- data$Fe_fraction2 - mean (blankdata$Fe_fraction2)
data$K_fraction2  <- data$K_fraction2 - mean (blankdata$Fe_fraction2)
data$Mg_fraction2 <- data$Mg_fraction2 - mean (blankdata$Fe_fraction2)
data$Ca_fraction2  <- data$Ca_fraction2 - mean (blankdata$Fe_fraction2)

data$Fe <- data$Fe_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g
data$K <- data$K_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g
data$Mg<- data$Mg_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g
data$Ca<- data$Ca_fraction2* data$V_mL_fraction2/data$sampleMass_g #µg/g

Puresilicatesdata <- data[is.na(data$soilmass), ]
data <- data[!is.na(data$soilmass), ]
Treatments <- unique (data$Treatment)
Treatments_letters <- unique(gsub("[0-9]", "", Treatments))
Treatments <- Treatments_letters [-which (Treatments_letters=="C")]

#rock/soil ratio? 
combined_data <- data.frame()

for (i in (1: length (Treatments))) {
  data$ Dunite_soil <- 0.11 / data$soilmass #g/g
  data$Basalt_soil <- 1.1 / data$soilmass #g/g
  treatment <- Treatments[i]
  
  corrected_data <- data[grep(paste0("^",treatment), data$Treatment), ]
  
  #to do add if to use dunite or basalt/soil ratio if the rock is dunite or basalt 
  
  #correction for all elements:
  if (treatment == "FB" |treatment == "CB" ){  
    corrected_data$Ca <- corrected_data$Ca - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Ca)
    corrected_data$Fe <- corrected_data$Fe - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Fe)
    corrected_data$K <- corrected_data$K - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$K)
    corrected_data$Mg <- corrected_data$Mg - corrected_data$Basalt_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Mg)}
  
  if (treatment == "CD" |treatment == "MD" |treatment == "FD" ){ 
    corrected_data$Ca <- corrected_data$Ca - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Ca)
    corrected_data$Fe <- corrected_data$Fe - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Fe)
    corrected_data$K <- corrected_data$K - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$K)
    corrected_data$Mg <- corrected_data$Mg - corrected_data$Dunite_soil * mean (Puresilicatesdata [which (Puresilicatesdata$Treatment==treatment),]$Mg)}
  
  combined_data <- rbind(combined_data, corrected_data)
}

data_control <- data[which (data$Treatment=="C3"|data$Treatment=="C1" ),]
combined_data <- rbind(combined_data, data_control)

#elements in combined_data are in µg/g --> µmol/g
combined_data$Fe_mol <- combined_data$Fe / 55.845
combined_data$K_mol <- combined_data$K / 39.098
combined_data$Ca_mol <- combined_data$Ca / 40.078
combined_data$Mg_mol <- combined_data$Mg / 24.305

#µmol C 
combined_data$molC_g <- 10^(-6)*((combined_data$Fe_mol+combined_data$Mg_mol+combined_data$Ca_mol)*2+combined_data$K_mol)
combined_data$SICpercent <- 100*combined_data$molC_g*12 

#relative contribution elements
combined_data$fraction_Fe <- combined_data$Fe_mol*2*10^(-6)/ combined_data$molC_g
combined_data$fraction_Ca <- combined_data$Ca_mol*2 *10^(-6)/ combined_data$molC_g
combined_data$fraction_Mg <- combined_data$Mg_mol*2 *10^(-6)/ combined_data$molC_g
combined_data$fraction_K <- combined_data$K_mol*10^(-6) / combined_data$molC_g


sumCa = summarySE(  combined_data     , measurevar = "fraction_Ca", groupvar = "Treatment" )
sumFe = summarySE(  combined_data     , measurevar = "fraction_Fe", groupvar = "Treatment" )
sumMg = summarySE(  combined_data     , measurevar = "fraction_Mg", groupvar = "Treatment" )
sumK = summarySE(  combined_data     , measurevar = "fraction_K", groupvar = "Treatment" )

# merged_data <- merge(sumCa, sumK, by = "Treatment", suffixes = c("_Ca", "_K"))
# merged_data <- merge(merged_data, sumFe, by = "Treatment", suffixes = c("", "_Fe"))
# merged_data <- merge(merged_data, sumMg, by = "Treatment", suffixes = c("", "_Mg"))

sumCa$Element <- "Ca"
sumK$Element <- "K"
sumFe$Element <- "Fe"
sumMg$Element <- "Mg"

# Select relevant columns from each dataset and stack them into one data frame
combined_data2 <- bind_rows(
  sumCa %>% select(Treatment, fraction_Ca = fraction_Ca, se_Ca = se, Element),
  sumK %>% select(Treatment, fraction_K = fraction_K, se_K = se, Element),
  sumFe %>% select(Treatment, fraction_Fe = fraction_Fe, se_Fe = se, Element),
  sumMg %>% select(Treatment, fraction_Mg = fraction_Mg, se_Mg = se, Element)
)

# # Reshape combined_data into long format for plotting
# library (tidyr)
# long_data <- pivot_longer(combined_data2, 
#                           cols = starts_with("fraction_"), 
#                           names_to = "Fraction_Type", 
#                           values_to = "Fraction_Value")
# 
# # Plot using ggplot
# fractionplot <- ggplot(long_data, aes(x = Treatment, y = Fraction_Value, color = Element)) +
#   geom_point(position = position_dodge(width = 0.5), size = 3) +  # Points for different datasets
#   labs( x = "Treatment",
#        y = "Fraction of carbonate charges") +
#   scale_color_manual(values = c("blue", "red", "green", "purple")) +  # Custom colors for datasets
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))+
#   theme_bw() +
#   theme(axis.title.y = element_text(size= 10), axis.title.x = element_text(size= 10), axis.title = element_text(face = "bold")) +
#   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour= "black"))
#   
# output_path <- "C:/Users/AVienne/OneDrive - Universiteit Antwerpen/Labexp 3 (start Oct 2022)/PAPER_/fractionplot.jpeg"
# ggsave(filename = output_path, plot = fractionplot, width = 6, height = 6, units = "in", dpi = 600)




library (Rmisc)
sum = summarySE(  combined_data     , measurevar = "SICpercent", groupvar = "Treatment" )
library (ggplot2)

treatment_order <- c("C1", "FB1", "C3","CB3","FB3","CD3","MD3","FD3")
# Convert Treatment column to factor with the specified order
sum$Treatment <- factor(sum$Treatment, levels = treatment_order)
#normalize for control soil
calculate_difference <- function(SIC1, SE1, SIC2, SE2) {
  diff <- SIC1 - SIC2
  propagated_se <- sqrt(SE1^2 + SE2^2)
  return(c(diff, propagated_se))
}
# Initialize results list for the comparisons
results_list <- list()
# FB1 - C1 comparison
results_list[["FB1 - C1"]] <- calculate_difference(
  sum$SICpercent[sum$Treatment == "FB1"], 
  sum$se[sum$Treatment == "FB1"], 
  sum$SICpercent[sum$Treatment == "C1"], 
  sum$se[sum$Treatment == "C1"]
)

# Loop through treatments ending with '3' and subtract from C3
for (treatment in sum$Treatment[grepl("3$", sum$Treatment) & sum$Treatment != "C3"]) {
  comparison_name <- paste(treatment, "- C3")
  results_list[[comparison_name]] <- calculate_difference(
    sum$SICpercent[sum$Treatment == treatment], 
    sum$se[sum$Treatment == treatment], 
    sum$SICpercent[sum$Treatment == "C3"], 
    sum$se[sum$Treatment == "C3"]
  )
}

# Convert the results list into a data frame
results <- do.call(rbind, lapply(names(results_list), function(name) {
  cbind(Comparison = name, as.data.frame(t(results_list[[name]])))
}))
colnames(results) <- c("Comparison", "Difference", "Propagated_SE")

results$Treatment <- c("FB1","CB3","CD3","FB3","FD3","MD3")
treatment_order <- c( "FB1", "CB3","FB3","CD3","MD3","FD3")
# Convert Treatment column to factor with the specified order
results$Treatment <- factor(results$Treatment, levels = treatment_order)
results$SICpercent <- results$Difference
results$se <- results$Propagated_SE 
results$soilmass <- c (31.65, 34.40 ,33.41 , 34.40,33.41 ,33.41)
#ton C/ha
results$tonC_ha <- (results$SICpercent/100)*results$soilmass*10000/0.22/1000
results$tonC_ha_se <- (results$se/100)*results$soilmass*10000/0.22/1000

#################


#initialize plot 
plot <- ggplot(results, aes(x = Treatment, y = tonC_ha )) +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +  # Plot points for 'diff' values
  geom_errorbar(aes(ymin = tonC_ha  - tonC_ha_se, ymax = tonC_ha  + tonC_ha_se), 
                width = 0.2, position = position_dodge(width = 0.5)) +  # Add error bars
  theme_bw(base_size = 12) +  # Minimal theme for cleaner look with base font size
  theme(
    axis.title.y = element_text(size = 14, face = "bold"),  # Increase y-axis title size
    axis.title.x = element_text(size = 14, face = "bold"),  # Increase x-axis title size
    axis.text = element_text(size = 12),  # Increase axis text size
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.background = element_blank(), 
    axis.line = element_line(colour = "black")
  ) +
  labs(x = "Treatment", 
       y = expression(Delta ~ SIC ~ "(Ton C ha"^"-1" * ")")) + 
  theme(legend.position = "none") 



treatmentvector <- c("FB1","FB3","CB3","CD3","MD3","FD3")
for (i in (1: length(treatmentvector))){  
  treatment <- treatmentvector[i]
  
  #SIc = 1
  data <- read_excel(paste0("Simulationdata_",treatment,".xlsx"), sheet = "Sheet1", col_names = TRUE)
  #Data of day 130
  data130 <- data[which (data$days==130),]   
  delta_CaCO3 <- data130[which(data130$type=="rock"),]$calcite - data130[which(data130$type=="control"),]$calcite
  delta_MgCO3 <- data130[which(data130$type=="rock"),]$Nesquehonite - data130[which(data130$type=="control"),]$Nesquehonite
  delta_molCO3 <- delta_MgCO3+delta_CaCO3 #mol
  delta_tonC_ha <- delta_molCO3*12*10000/0.22/10^(6) # ton C/ha
  # Create a small dataframe with the x (treatment) and y (delta_tonC_ha) values
  new_point <- data.frame(treatment = treatment, delta_tonC_ha = delta_tonC_ha, SI = 1)
  
  #SIc = 0
  data <- read_excel(paste0("Simulationdata_",treatment,"Sic.xlsx"), sheet = "Sheet1", col_names = TRUE)
  #Data of day 130
  data130 <- data[which (data$days==130),]   
  delta_CaCO3 <- data130[which(data130$type=="rock"),]$calcite - data130[which(data130$type=="control"),]$calcite
  delta_MgCO3 <- data130[which(data130$type=="rock"),]$Nesquehonite - data130[which(data130$type=="control"),]$Nesquehonite
  delta_molCO3 <- delta_MgCO3+delta_CaCO3 #mol
  delta_tonC_ha <- delta_molCO3*12*10000/0.22/10^(6) # ton C/ha
  # Create a small dataframe with the x (treatment) and y (delta_tonC_ha) values
  new_point2 <- data.frame(treatment = treatment, delta_tonC_ha = delta_tonC_ha, SI=0)
  
  
  # Add the point to the plot
  plot <- plot + 
    geom_point(data = new_point, aes(x = treatment, y = delta_tonC_ha), size = 4, shape = 8)+
    geom_point(data = new_point2, aes(x = treatment, y = delta_tonC_ha), size = 4, shape = 9)
  
  
  }

# Create mock data
treatments <- c('A', 'B', 'C', 'D', 'E')
difference_experimental <- c(1.5, 2.1, 1.8, 2.3, 1.9)
difference_simulated <- c(1.7, 2.0, 2.1, 2.2, 2.0)
errors_experimental <- c(0.2, 0.1, 0.15, 0.25, 0.2)
errors_simulated <- c(0.1, 0.2, 0.1, 0.2, 0.15)

# Create data frames for experimental and simulated data
experimental_data <- data.frame(Treatment = treatments, 
                                Difference = difference_experimental, 
                                se = errors_experimental, 
                                Type = "Experimental")
simulated_data <- data.frame(Treatment = treatments, 
                             Difference = difference_simulated, 
                             se = errors_simulated, 
                             Type = "Simulated (SIc=1)")

# Combine both datasets
combined_data <- rbind(experimental_data, simulated_data)
combined_data [1,4] <- "Simulated (SIc=0)"
  
# Create the plot, mock data for legend
forlegend <- ggplot(combined_data, aes(x = Treatment, y = Difference, shape = Type)) +
  geom_point(size = 4) +  # Plot points
  geom_errorbar(aes(ymin = Difference - se, ymax = Difference + se), width = 0.2) +  # Add error bars
  theme_bw() +  # Minimal theme for cleaner look
  labs(x = "Treatment", y = "Difference (Ton CO2 ha-1)", title = "Experimental vs Simulated Data") +
  scale_shape_manual(values = c("Experimental" = 16, "Simulated (SIc=1)" = 8,"Simulated (SIc=0)"=9)) +  # Set shape: 16 for dots, 8 for stars
  theme(legend.position = "right")

library (cowplot)
legend <- get_legend (forlegend)
final_plot <- plot_grid(plot, legend, rel_widths = c(1, 0.3))  # Adjust widths if needed
output_path <- "C:/Users/AVienne/OneDrive - Universiteit Antwerpen/Labexp 3 (start Oct 2022)/PAPER_/SIC_sim_exp.jpeg"
ggsave(filename = output_path, plot = final_plot, width = 6, height = 4, units = "in", dpi = 600)


