###############################################################.
#Lucia Combrink 
#Combine Dataframes 
#Updated: 3/23/22
################################################################.
library(tidyverse)
library(readxl)
setwd("~/Desktop/WRProjectData/") #Set working directory where all rawdata is stored. 


##Steps: 
#1) Download all relevant dataframes. 
#2) Combine all data (field,hatchery,lake,prey,isotope,gillraker)
#4) Write dataframes out. 

##1) Download (and clean) all dataframes: ####

WR_FieldData <- read_excel("WR_FieldData.xlsx") #All field data 2018-2020
HS_HatchData <- read_excel("HS_HatchData.xlsx") #All hatchery data 2020
WR_LakeData <- read_excel("WR_LakeData.xlsx") #Variables associated with each lake. 
WR_DietsumData <-read_csv("WR_DietsumData.csv") #Dataframe with % and count of diet items for each fish. 
WR_GillRData <- read_csv("WR_GRData_corrected.combined.csv") #Dataframe with all cleaned and size-corrected GR data
WR_DietSpecData <- read.csv("WR_DietSpecData.csv") #Diet data for each lake. 
WR_ZoopData <- read_excel("WR_ZoopData.xlsx", na = "NA") %>%  #Selecting relevant zooplankton data. 
  dplyr::select(lake_abrv, H_shoshone, L_minutus,D_mendatoe,D_pulex, H_shoshone_mm,L_minutus_mm,D_mendatoe_mm,D_pulex_mm, H_shoshone_biomass,L_minutus_biomass,D_mendatoe_biomass,D_pulex_biomass) %>% 
  filter(lake_abrv %in% c("Darren", "Lake37", "Lake38", "LIce", "Windy", "Kagevah", "NoNameW", "CuttNFK", "Spider", "UJean", "LJean", "LBlackJoe"))
WR_ZoopData[is.na(WR_ZoopData)] <- 0 #Replace NAs with 0 (since 0 species were observed)

#Calcualte average zooplankton length within each lake. See Combrink et al., 2022. 
WR_ZoopData <- WR_ZoopData %>% 
  mutate(avg_zoop_length = ((H_shoshone*H_shoshone_mm) + (L_minutus*L_minutus_mm) + (D_mendatoe*D_mendatoe_mm) + (D_pulex*D_pulex_mm)) / (H_shoshone + L_minutus + D_mendatoe + D_pulex))


## 2) Combine field,hatchery,isotope,prey proportion,gillraker,& lake data ####

WR_alldata <- rbind(WR_FieldData, HS_HatchData) %>% #890 obs
  full_join(WR_GillRData, by= "full_ID") %>% #891 One line added, unsure where it came from(likely an error). 
  full_join(WR_DietsumData, by="full_ID") %>% #891 
  full_join(dplyr::select(WR_LakeData,"lake", "lake_abrv", "elevation","area","depth_max","CPUE","stocked_date",
                   "t_stocking","lake_catagory","dominant_spc", "hectare", "NDVI_mean", "NDVI_SD"), by = "lake") %>% #One row added, unsure why
  left_join(WR_DietSpecData, by = "lake_abrv") %>% 
  left_join(WR_ZoopData, by="lake_abrv") %>% 
  select(-net_set_date, -net_pull_date, -flesh_col, -'field notes', -"gillraker notes",-age_months, -"...1", -eggs, -I_M, -ID, -num_raker_missing,-post_raker_notes) #Remove variables we don't need for downstream analyses. 

View(WR_alldata)

## 3) Write dataframes out ####

write.csv(WR_alldata,"WR_alldata.csv")