#clear environment
rm(list = ls())


library(knitr) #used to print the results
library(tidyverse) #used for everything
library(progress) #for progress bar


# import data -------------------------------------------------------------

#load raw combined data
load("Rdata/isotopes.Rdata")

wide.df <- iso.df %>%
  select(sample.id,Name,Species,Facility,Type,Matrix,isotope.raw,isotope.expected) %>%
  arrange(Species,Facility,Name) %>%
  drop_na(Type) %>%
  pivot_wider(names_from="Species",values_from=c("isotope.raw","isotope.expected"))

#export the raw data as a csv for external assessment
write.csv(wide.df,"export/wide_data.csv",row.names=FALSE)

#do some quick rearranging
iso.df <- iso.df %>%
  select(Name,Species,Facility,Type,Matrix,isotope.raw,isotope.expected) %>%
  arrange(Species,Facility,Name)

#export the raw data as a csv for external assessment
write.csv(iso.df,"export/raw_data.csv",row.names=FALSE)

# figure out all of the possible normalization combinations ---------------

#create a list of all possible standards
name.list <- iso.df %>%
  filter(Type!="Linearity") %>% #we don't want the working standards
  pull(Name) %>%
  unique() %>%
  as.character()

#calculate every 2-standard combination. these are our quality controls
qc.list <- combn(name.list,2) 

#make an empty list to store the standard/QC combinations
combos.list <- list()

#iterate over every QC combination
for (col in 1:ncol(qc.list)){
  qc <- qc.list[,col] #return a list of QC names
  norm.list <- name.list[name.list!=qc[1] & name.list!=qc[2]] #return a list of standards
  
  onepoint.list <- list(standards=combn(norm.list,1),quality.controls=qc)
  twopoint.list <- list(standards=combn(norm.list,2),quality.controls=qc)
  threepoint.list <- list(standards=combn(norm.list,3),quality.controls=qc)
  fourpoint.list <- list(standards=combn(norm.list,4),quality.controls=qc)
  
  new.list <- list(one.point=onepoint.list,
                   two.point=twopoint.list,
                   three.point=threepoint.list,
                   four.point=fourpoint.list)
  
  combos.list <- append(combos.list,new.list)
}


# perform a normalization for each combination ----------------------------

method.list <- unique(names(combos.list))

facility.list <- iso.df %>%
  pull(Facility) %>%
  unique()

name_counter <- 0

for (combo in combos.list){
  for (col in 1:ncol(combo$standards)){
    name_counter <- name_counter+1
  }
}

n_iter <- name_counter

results.list <- list()

method_counter <- 0
norm_counter <- 0

pb <- progress_bar$new(format = "(:spin) [:bar] :percent [:elapsed || :eta]",
                       total = n_iter,complete = "=",incomplete = "-",current = ">",
                       clear = FALSE, width = 100, show_after=0)
pb$tick(0)

for (combo in combos.list){
  method_counter <- method_counter+1
  for (col in 1:ncol(combo$standards)){
    pb$tick()
    norm_counter <- norm_counter+1
    for (facility in facility.list){
      for (myspecies in c("N","C")){
        
        standards <- combo$standards[,col]
        quality.controls <- combo$quality.controls
        
        y <- iso.df %>%
          filter(Name %in% standards) %>%
          filter(Species==myspecies)%>%
          filter(Facility==facility) %>%
          pull(isotope.expected)
        
        x <- iso.df %>%
          filter(Name %in% standards) %>%
          filter(Species==myspecies)%>%
          filter(Facility==facility) %>%
          pull(isotope.raw)
        
        if (length(standards)==1){
          slope <- 1
          intercept <- mean(y)-mean(x)
        } else {
          lm <- lm(y~x,model=FALSE,qr=FALSE)
          slope <- as.numeric(coef(lm)[2])
          intercept <- as.numeric(coef(lm)[1])
        }
        
        method <- names(combos.list)[method_counter]
        
        new.list <- list(Species=myspecies,
                         Method=method,
                         Facility=facility,
                         Normalization=norm_counter,
                         Slope=slope,Intercept=intercept,
                         iso.range=max(y)-min(y),
                         standards=paste(standards,collapse = ' '),
                         quality.controls=paste(quality.controls,collapse = ' ')
                         )
        
        new.list <- list(normalization=new.list)
        
        results.list <- append(results.list,new.list)
      }
    } 
  }
}

pb$terminate()

results.df <- as.data.frame(do.call(rbind, results.list)) %>%
  mutate_all(as.character) %>%
  mutate(across(all_of(c("Slope","Intercept","iso.range","Normalization")),as.numeric))

results.df <- full_join(iso.df,results.df) %>%
  ungroup() %>%
  mutate(isotope.norm=isotope.raw*Slope+Intercept,
         isotope.deviation=isotope.norm-isotope.expected)

results.df$QC_logical <- mapply(grepl, results.df$Name, results.df$quality.controls)

results.df$SRM_logical <- mapply(grepl, results.df$Name, results.df$standards)

results.df <- results.df %>%
  mutate(Type=ifelse(QC_logical,"QC",
                     ifelse(SRM_logical,"SRM",NA))) %>%
  select(-c("SRM_logical","QC_logical"))

intermediate.df <- results.df %>%
  filter(Type=="QC")

# assess matrix -----------------------------------------------------------

norm.list <- results.df %>%
  pull(Normalization) %>%
  unique()

QC_matrix.df <- results.df %>%
  filter(Type=="QC") %>%
  select(Matrix,Method,Normalization) %>%
  unique() %>%
  arrange(Normalization)

results.list <- list()

for (norm in norm.list){
  temp.df <- QC_matrix.df %>%
    filter(Normalization==norm)
  
  if (nrow(temp.df)==1){
    temp.df$qc.matrix <- temp.df$Matrix
  } else {
    temp.df$qc.matrix <- "Mixed"
  }
  
  results.list <- append(results.list,list(temp.df))
}

QC_matrix.df <- bind_rows(results.list) %>%
  select(-Matrix)

QC_matrix.df %>%
  count(qc.matrix)

normalizations.df <- left_join(results.df,QC_matrix.df)

SRM_matrix.df <- results.df %>%
  filter(Type=="SRM") %>%
  select(Matrix,Method,Normalization) %>%
  unique() %>%
  arrange(Normalization)

results.list <- list()

for (norm in norm.list){
  temp.df <- SRM_matrix.df %>%
    filter(Normalization==norm)
  
  if (nrow(temp.df)==1){
    temp.df$srm.matrix <- temp.df$Matrix
  } else {
    temp.df$srm.matrix <- "Mixed"
  }
  
  results.list <- append(results.list,list(temp.df))
}

SRM_matrix.df <- bind_rows(results.list) %>%
  select(-Matrix)

SRM_matrix.df %>%
  count(srm.matrix)

normalizations.df <- left_join(normalizations.df,SRM_matrix.df)

normalizations.df <- normalizations.df %>%
  mutate(Matrix=ifelse(qc.matrix=="High Organic"&srm.matrix=="High Organic","Matched",
                       ifelse(qc.matrix=="Plant"&srm.matrix=="Plant","Matched",
                              ifelse(qc.matrix=="Mixed"&srm.matrix=="Mixed","Both Mixed",
                                     "Mixed"))))

normalizations.df %>%
  count(Matrix)

# determine extrapolation -------------------------------------------------

extrapolation.df <- results.df %>%
  filter(!is.na(Type)) %>%
  group_by(Normalization,Species,Type) %>%
  mutate(isotope.min=min(isotope.expected),
         isotope.max=max(isotope.expected)) %>%
  select(Normalization,Species,Type,isotope.min,isotope.max) %>%
  ungroup() %>%
  unique() %>%
  pivot_wider(names_from=Type,values_from=c(isotope.min,isotope.max)) %>%
  mutate(Extrapolation=ifelse(isotope.max_SRM<isotope.min_QC | isotope.min_SRM>isotope.max_QC,
                              "Extrapolated","Bounded")) %>%
  select(Normalization,Extrapolation)

extrapolation.df %>%
  count(Extrapolation)

normalizations.df <- left_join(normalizations.df,extrapolation.df)

# save the normalizations -------------------------------------------------


save(normalizations.df,file="Rdata/normalizations_03202023.Rdata")

# summarize the results ---------------------------------------------------

load("Rdata/normalizations_03202023.Rdata")

summary.df <- normalizations.df %>%
  filter(Type=="QC") %>%
  select(Species,Facility,Method,Matrix,Extrapolation,standards,quality.controls,isotope.deviation,Slope,Intercept) %>%
  group_by(Species,Method,Facility,Matrix,Extrapolation,standards,quality.controls) %>%
  summarize_all(mean) %>%
  mutate_if(is.numeric,round,digits=3) %>%
  ungroup() %>%
  mutate(Method=factor(Method,levels=c("one.point","two.point","three.point","four.point"),
                       labels=c("One-Point","Two-Point","Three-Point","Four-Point"))) %>%
  arrange(Species,Facility,Method,standards,quality.controls) %>%
  mutate(isotope.deviation=ifelse(standards=="IAEA600 USGS91",NA,isotope.deviation),
         Slope=ifelse(standards=="IAEA600 USGS91",NA,Slope),
         Intercept=ifelse(standards=="IAEA600 USGS91",NA,Intercept),
         Comments=ifelse(standards=="IAEA600 USGS91","Not computed. See methods",NA))

colnames(summary.df) <- c("Species","Method","Facility","Matrix",
                          "Extrapolation","Standards","Quality Controls","QC Deviation (permil)", "m","b", "Comments")

write.csv(summary.df,"export/all_normalizations.csv",row.names=FALSE)
