# 3. Standardize data for meta analysis

### this script standardizes the water and nutrient datasets prior the meta analysis

# source setup file

source('1_Setup.R')


## standard dev for each population by treatment combination
# then pooled within ranges
# then pool across ranges

#########################
#### NUTRIENT DATASET ###
#########################

#transform to long dataset

Ndata_long <- Ndata %>%
  #log size traits
  mutate(Stretch_height = log10(Stretch_height),
         Area = log10(Area),
         Internode_length = log10(Internode_length),
         Total_Biomass = log10(Total_Biomass)) %>%
  melt(id.vars = c("Maternal_line", "Treatment", "Range", "Population"), 
       variable.name = "trait", value.name = "value") %>%
  mutate(trait = as.character(trait))

## calculate pooled standard deviation for each trait

N_pool_SD <- Ndata_long %>%
  group_by(trait, Range, Population, Treatment) %>%
  mutate(is_na_Area =  if_else(value < 1000000, 1, 0 )) %>%
  # SD for each population and treatment
  summarise(SD_trait_pop_treat = sd(value, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(trait, Range) %>%
  # SD for each Range
  summarise(SD_trait_pooled_range = 
              sqrt(sum(SD_trait_pop_treat^2, na.rm = TRUE)/n())
  ) %>%
  ungroup() %>%
  group_by(trait) %>%
  # total pooled SD
  summarise(SD_pooled_total = 
              sqrt(sum(SD_trait_pooled_range^2, na.rm = TRUE)/n())
  )


Ndata_standardised <- Ndata %>%
  #log size traits
  mutate(Stretch_height = log10(Stretch_height),
         Area = log10(Area),
         Internode_length = log10(Internode_length),
         Total_Biomass = log10(Total_Biomass)) %>%
  mutate(column_bind = "bind") %>%
  # join pooled standard deviatations
  left_join(.,
            N_pool_SD %>%
              mutate(column_bind = "bind") %>%
              mutate(trait = paste0(trait,"_SD_pooled")) %>%
              dcast(column_bind ~ trait, value.var = 'SD_pooled_total'  )
            ) %>%
 # standardise_traits 
mutate(   'Stretch_height' =  Stretch_height / Stretch_height_SD_pooled,
          "SLA" = SLA / SLA_SD_pooled,
          "Area" = Area /Area_SD_pooled,
          "Shape" = Shape / Shape_SD_pooled,
          "Internode_length" = Internode_length / Internode_length_SD_pooled,
          "percent_dead_leaves" = percent_dead_leaves / percent_dead_leaves_SD_pooled,
          "Leaf_percent_N" = Leaf_percent_N / Leaf_percent_N_SD_pooled,
          "Fv_Fm" = Fv_Fm / Fv_Fm_SD_pooled,
          "D13c" = D13c / D13c_SD_pooled,
          "LMF" = LMF / LMF_SD_pooled,
          "RMF" = RMF / RMF_SD_pooled,
          "SSD" = SSD / SSD_SD_pooled,
          "Total_Biomass" = Total_Biomass / Total_Biomass_SD_pooled
) %>%
  select( "Maternal_line", "Treatment", "Range", "Population",
          "Stretch_height", "SLA", "Area", "Shape", "Internode_length",
          "percent_dead_leaves",  "Leaf_percent_N", "Fv_Fm", "D13c",
          "LMF", "RMF", "SSD", "Total_Biomass" 
  )



#########################
#### WATER DATASET ###
#########################

#transform to long dataset

Wdata_long <- Wdata %>%
  #log size traits
  mutate(Stretch_height = log10(Stretch_height),
         Area = log10(Area),
         Internode_length = log10(Internode_length),
         Total_Biomass = log10(Total_Biomass)) %>%
  melt(id.vars = c("Maternal_line", "Treatment", "Range", "Population"), 
       variable.name = "trait", value.name = "value") %>%
  mutate(trait = as.character(trait))

## calculate pooled standard deviation for each trait

W_pool_SD <- Wdata_long %>%
  group_by(trait, Range, Population, Treatment) %>%
  mutate(is_na_Area =  if_else(value < 1000000, 1, 0 )) %>%
  # SD for each population and treatment
  summarise(SD_trait_pop_treat = sd(value, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(trait, Range) %>%
  # SD for each Range
  summarise(SD_trait_pooled_range = 
              sqrt(sum(SD_trait_pop_treat^2, na.rm = TRUE)/n())
  ) %>%
  ungroup() %>%
  group_by(trait) %>%
  # total pooled SD
  summarise(SD_pooled_total = 
              sqrt(sum(SD_trait_pooled_range^2, na.rm = TRUE)/n())
  )

Wdata_standardised <- Wdata %>%
  #log size traits
  mutate(Stretch_height = log10(Stretch_height),
         Area = log10(Area),
         Internode_length = log10(Internode_length),
         Total_Biomass = log10(Total_Biomass)) %>%
# join pooled standard deviations
  mutate(column_bind = "bind") %>%
  left_join(.,
            W_pool_SD %>%
              mutate(column_bind = "bind") %>%
              mutate(trait = paste0(trait,"_SD_pooled")) %>%
              dcast(column_bind ~ trait, value.var = 'SD_pooled_total'  )
  ) %>%
  # standardise_traits 
  mutate(   'Stretch_height' =  Stretch_height / Stretch_height_SD_pooled,
            "SLA" = SLA / SLA_SD_pooled,
            "Area" = Area / Area_SD_pooled,
            "Shape" = Shape / Shape_SD_pooled,
            "Internode_length" = Internode_length / Internode_length_SD_pooled,
            "percent_dead_leaves" = percent_dead_leaves / percent_dead_leaves_SD_pooled,
            "Leaf_percent_N" = Leaf_percent_N / Leaf_percent_N_SD_pooled,
            "Fv_Fm" = Fv_Fm / Fv_Fm_SD_pooled,
            "D13c" = D13c / D13c_SD_pooled,
            "LMF" = LMF / LMF_SD_pooled,
            "RMF" = RMF / RMF_SD_pooled,
            "SSD" = SSD / SSD_SD_pooled,
            "Total_Biomass" = Total_Biomass / Total_Biomass_SD_pooled
  ) %>%
  select( "Maternal_line", "Treatment", "Range", "Population",
          "Stretch_height", "SLA", "Area", "Shape", "Internode_length",
          "percent_dead_leaves",  "Leaf_percent_N", "Fv_Fm", "D13c",
          "LMF", "RMF", "SSD", "Total_Biomass" 
  ) 
