require(tidyverse) #for everything
require(readxl) #for importing data

#clear environment
rm(list = ls())

# import data -------------------------------------------------------------

N_groups.df <- read_excel('raw/WeekOneGroups_height.xlsx',sheet=1) %>%
  mutate(across(c("Identifier 1","Identifier 2","Group","Comment","Preparation","Method"),as.factor)) %>%
  mutate(across("Time Code",as.POSIXct)) %>%
  mutate_if(is.character,as.numeric)

C_groups.df <- read_excel('raw/WeekOneGroups_height.xlsx',sheet=2) %>%
  mutate(across(c("Identifier 1","Identifier 2","Group","Comment","Preparation","Method"),as.factor)) %>%
  mutate(across("Time Code",as.POSIXct)) %>%
  mutate_if(is.character,as.numeric)

badnews.df <- read_excel('raw/ratios_LBJ.xlsx',sheet="CN_Raw_all_peaks_w_ratios.wke") %>%
  mutate(across(c("Identifier 1","Identifier 2","Comment","Preparation","Method"),as.factor)) %>%
  mutate(across("Time Code",as.POSIXct)) %>%
  mutate_if(is.character,as.numeric) %>%
  filter()

std.df <- read_excel('raw/RunData_06252022.xlsx',sheet="Initialization") %>%
  mutate_if(is.character,as.factor)

csi_refgas.df <- read_excel('raw/RefGasLinearity.xlsx',sheet=1) %>%
  mutate(across(c("Type","Identifier 1","Identifier 2","Comment","Preparation","Method"),as.factor)) %>%
  mutate(across("Time Code",as.POSIXct)) %>%
  mutate_if(is.character,as.numeric)

norm.df <- read_excel('raw/multipoint.xlsx',sheet=1) %>%
  mutate(across(c("name","method"),as.factor)) %>%
  mutate_if(is.character,as.numeric)

# organize data -----------------------------------------------------------

#combine our data with the expected isotopic values in std.df
badnews.df <- left_join(badnews.df,std.df[c("name","type","matrix")],by=c("Identifier 1"="name"))

#subset the columns we actually care about, rename them, and put them in a new dataframe
mydata.df <- badnews.df[c("Row","Identifier 1","Peak Nr","type","Ampl  28","d 15N/14N","Ampl  44","d 13C/12C")]
colnames(mydata.df) <- c("id","name","peak","type","Nitrogen","d15N","Carbon","d13C")

#subset the columns we care about and only return linearity measurements
csi_refgas.df <- csi_refgas.df[c("Peak Nr","Type","Ampl  28","d 15N/14N")] %>%
  filter(Type=="Linearity")
colnames(csi_refgas.df) <- c("peak","type","beam.mV","raw.iso")

#create two new columns to facilitate a join between refgas2.df and mydata.df
csi_refgas.df$name <- "Reference Gas"
csi_refgas.df$species <- "Nitrogen"

#subset mydata.df to only include peaks of interest and to pivot longer
mydata.df <- mydata.df %>%
  filter(peak==4 | peak==5) %>%
  filter(type=="Linearity") %>%
  pivot_longer(c("Nitrogen","Carbon"),names_to="species",values_to="beam.mV") %>%
  pivot_longer(c("d15N","d13C"),names_to="isotope",values_to="raw.iso") %>%
  drop_na() %>%
  select(-isotope)%>% #remove isotope column
  select(-id) #remove ID column

#join mydata.df with refgas2.df
mydata.df <- full_join(mydata.df,csi_refgas.df)

#calculate medians of isotopes ny name and species
mymedian.df <- mydata.df %>%
  group_by(name,species) %>%
  summarize(median.iso=median(raw.iso))

#join medians to mydata.df
mydata.df <- left_join(mydata.df,mymedian.df)

#calculate difference between raw and median values (aka linearity effect)
mydata.df$norm.iso <- mydata.df$raw.iso-mydata.df$median.iso

mydata.df$beam.V <- mydata.df$beam.mV/1000

mydata.df$species <- factor(mydata.df$species,levels=c("Nitrogen","Carbon"))


# calculate linearity -----------------------------------------------------

#subset nitrogen data because carbon is linear and doesn't require a correction
N.df <- mydata.df %>%
  filter(species=="Nitrogen") %>%
  filter(name!="Reference Gas")

#compute polynomial coefficients
N.lm <- lm(norm.iso ~ poly(beam.V,3,raw=TRUE),data=N.df)$coefficients
N.lm #return coefficients

#calculate linearity correction from polynomial
N.df$lin.corr <- (N.lm[4]*N.df$beam.V^3)+(N.lm[3]*N.df$beam.V^2)+(N.lm[2]*N.df$beam.V)+N.lm[1]

#join linearity correction data to mydata.df
linearity.df <- left_join(mydata.df,N.df)


# export data -------------------------------------------------------------

save(linearity.df,file="Rdata/linearity.Rdata")
