rm(list = ls())
##############################################################################################
#  Script by Femke Lutz (12.2019) for:                                                      ##
# "The importance of management information and soil moisture representation for            ##
#  simulating tillage effects on N$_2$O emissions in LPJmL5.0-tillage"                      ##
#                                                                                           ##
#  Reads in LPJmL output, Daycent outputs and observations to create                        ##
#  one combined dataframe which includes:                                                   ##
#  -  Runs (LPJmL+ Daycent) and observations of all experimental sites for years where      ##
#     maize is grown and observations are available.                                        ##
#    *N2O emissions                                                                         ##
#    *Experimental set up (e.g. LPJmL.G, LPJmL.D, LPJmL.D-F etc.)                           ##
##############################################################################################

#GENERAL SETTINGS
sites <- c("nebraska", "colorado", "boigneville", "michigan")
NCELLS <- 1
#Path to directory including output runs and observations of experiments
working.path <- ""

#General settings for reading in LPJmL scenarios testes (e.g. Detailed vs Global settings)

scens_ct_lpj <-
  c(
    "output_till_detail",
    "output_till_fert",
    "output_till_irr",
    "output_till_gs",
    "output_till_pz",
    "output_till_till",
    "output_till_glob"
  )
scens_nt_lpj <-
  c(
    "output_notill_detail",
    "output_notill_fert",
    "output_notill_irr",
    "output_notill_gs",
    "output_notill_pz",
    "output_notill_till",
    "output_notill_glob"
  )
scens_ct_lpj_adj <- paste0(scens_ct_lpj, "_adjusted")
scens_nt_lpj_adj <- paste0(scens_nt_lpj, "_adjusted")

all <-
  c(
    "df_ct_lpjml_detail",
    "df_nt_lpjml_detail",
    "df_ct_lpjml_default",
    "df_nt_lpjml_default",
    "df_ct_lpjml_irr",
    "df_nt_lpjml_irr",
    "df_ct_lpjml_fert",
    "df_nt_lpjml_fert",
    "df_ct_lpjml_gs",
    "df_nt_lpjml_gs",
    "df_ct_lpjml_pz",
    "df_nt_lpjml_pz"
  )

daily_files <-
  c(
    "d_npp.bin",
    "d_gpp.bin",
    "d_rh.bin",
    "d_trans.bin",
    "d_interc.bin",
    "d_wevap.bin",
    "d_runoff.bin",
    "d_nleaf.bin",
    "d_leaching.bin",
    "d_nsoil_slow.bin",
    "d_nsoil_fast.bin",
    "d_growingday.bin",
    "d_fphu.bin",
    "d_no3.bin",
    "d_nh4.bin",
    "d_n2o_denit.bin",
    "d_n2o_nit.bin",
    "d_nmin.bin",
    "d_nimmo.bin",
    "d_fw0.bin",
    "d_fw1.bin",
    "d_fwevap.bin",
    "d_w0.bin",
    "d_w1.bin",
    "d_cso.bin",
    "d_cleaf.bin",
    "d_cpool.bin",
    "d_litter_ag.bin",
    "d_litter_agsub.bin",
    "d_litter_cover.bin",
    "d_litter_agtemp.bin",
    "d_litter_agmoist.bin",
    "d_litter_agdecomp.bin",
    "d_whc.bin",
    "d_wsat.bin",
    "d_fc.bin",
    "d_wp.bin",
    "d_litter_agevap.bin"
  )
scen <-
  c(
    "Detail",
    "Detail-Fert.",
    "Detail-Irr.",
    "Detail-GS",
    "Detail-Pool.S",
    "Detail-Till.T",
    "Global"
  )


#FUNCTIONS#############################################################

get.data <-
  function(path,
           filename,
           bands = 365,
           nyears,
           ncells,
           datsize = 4) {
    f.var <- file(paste(path, filename, sep = ""), "rb")
    var <- readBin(f.var,
                   double(),
                   size = datsize,
                   n = bands * ncells * nyears)
    close(f.var)
    return(var)
  }

formatDate <- function(date_chr = "",
                       format_chr = "%Y-%m-%d") {
  format(as.Date.character(date_chr), format_chr)
}

doy2day <-
  function(doy, year) {
    strptime(paste(year, doy), format = "%Y %j")
  }
g.m2TOkg.ha <- function(g_m2) {
  kg_ha <- g_m2 * 10
}
carbon2biomass <-
  function(carbon) {
    biomass <-
      carbon / 0.45
  }  #0.45 conversion factor from DM biomass to carbon

#### read in LPJmL .bin files

lpj_reads <- function(scens) {
  #scens_ct_lpj, scens_nt_lpj
  d_df <-
    data.frame(matrix(
      NA,
      nrow = 365 * length(years),
      ncol = length(daily_files)
    ))
  colnames(d_df) <- sub(".bin", "", daily_files)
  all_data <- NULL
  for (ff in 1:length(scens)) {
    for (i in c(1:length(daily_files))) {
      dvar <-
        get.data(
          paste0(working.path, site, "/", scens[ff], "/"),
          daily_files[i],
          365,
          length(years),
          NCELLS,
          4
        )
      d_df[, i] <- dvar
      d_df$run_ct_nt <- scens[ff]
      d_df$run <- scen[ff]
    }#end daily files
    all_data <- rbind(all_data, d_df)
  } #end of scens
  # add dates
  c <- NULL
  for (ff in 1:length(scens)) {
    b <- subset(all_data, all_data$run_ct_nt == scens[ff])
    b <- cbind(data.frame(doy, day, month, year, date), b)
    c <- rbind(c, b)
  }
  all_data <- c
}#end of function

###################################################################################

for (pp in 1:length(sites)) {
  site <- sites[pp]
#SETTINGS of experiments #############  
  #Complete runs of the experiments
  
  if (site == "nebraska") {
    FIRSTYEAR <- 2001
    LASTYEAR <- 2015
    NYEARS <- years <- FIRSTYEAR:LASTYEAR
  }
  if (site == "colorado") {
    FIRSTYEAR <- 1999
    LASTYEAR <- 2006
    NYEARS <- years <- FIRSTYEAR:LASTYEAR
  }
  if (site == "michigan") {
    FIRSTYEAR <- 1989
    LASTYEAR <- 2010
    NYEARS <- years <- FIRSTYEAR:LASTYEAR
  }
  if (site == "boigneville") {
    FIRSTYEAR <- 1971
    LASTYEAR <- 2004
    NYEARS <- years <- FIRSTYEAR:LASTYEAR
  }
  
  #All years where N2O observations are available of experimental runs
  if (site == "nebraska") {
    FIRSTYEAR.obs <- 2011
    LASTYEAR.obs <- 2015
    NYEARS.obs <- years.obs <- FIRSTYEAR.obs:LASTYEAR.obs
  }
  if (site == "colorado") {
    FIRSTYEAR.obs <- 2002
    LASTYEAR.obs <- 2006
    NYEARS.obs <- years.obs <- FIRSTYEAR.obs:LASTYEAR.obs
  }
  if (site == "michigan") {
    FIRSTYEAR.obs <- 1991
    LASTYEAR.obs <- 2010
    #select only years with maize grown (there are crop rotations at this exp.)
    NYEARS.obs <- c(1991, 1993, 1996, 1999, 2002, 2005, 2008)
  }
  if (site == "boigneville") {
    FIRSTYEAR.obs <- 2003
    LASTYEAR.obs <- 2003
    #select only years with maize grown (there are crop rotations at this exp.)
    NYEARS.obs <- 2003
  }
  
  ####READ IN DATA##############################################################
  #OBSERVATIONS####
  #Load observed emissions and add dates, tillage ("ct") and no-tillage ("nt")
  obs_ct_ghg <-
    get(load(paste0(
      working.path, sites[pp], "/", "ghg_ct_obs", ".RData"
    )))
  obs_ct_ghg$date <- as.Date(as.character(obs_ct_ghg$Date))
  obs_nt_ghg <-
    get(load(paste0(
      working.path, sites[pp], "/", "ghg_nt_obs", ".RData"
    )))
  obs_nt_ghg$date <- as.Date(as.character(obs_nt_ghg$Date))
  
  #add column with names to harmonize df with daycent and lpjml
  colnames(obs_ct_ghg)[2] <- "n2o_tot"
  obs_ct_ghg$run <- "Observed_CT"
  colnames(obs_nt_ghg)[2] <- "n2o_tot"
  obs_nt_ghg$run <- "Observed_NT"
  
  #Remove NA's from observations
  xx <- which(is.na(obs_ct_ghg$n2o_tot))
  if (length(xx > 0)) {
    obs_ct_ghg <- obs_ct_ghg[-xx, ]
  }
  yy <- which(is.na(obs_nt_ghg$n2o_tot))
  if (length(yy > 0)) {
    obs_nt_ghg <- obs_nt_ghg[-yy, ]
  }
  rm(xx, yy)
  
  #combine tillage and no-tillage df's  of observed emissions
  obs_ghg_all <- rbind(obs_ct_ghg, obs_nt_ghg)
  obs_ghg <- subset(obs_ghg_all, select = c(year, date, run, n2o_tot))
  
  #DAYCENT####
  #Load simulated emissions by Daycent, tillage ("ct") and no-tillage ("nt")
  ghg_day_ct <-
    get(load(
      paste0(working.path, sites[pp], "/", "summary_ct_day", ".RData")
    ))
  ghg_day_nt <-
    get(load(
      paste0(working.path, sites[pp], "/", "summary_nt_day", ".RData")
    ))
  
  #add column with names to harmonize df with observations and lpjml
  colnames(ghg_day_ct)[12] <- "year"
  ghg_day_ct$run <- "Daycent_CT"
  
  colnames(ghg_day_nt)[12] <- "year"
  ghg_day_nt$run <- "Daycent_NT"
  
  #Combine tillage ("ct") and no-tillage ("nt") simulated emissions for daycent.
  ghg_day_all <- rbind(ghg_day_ct, ghg_day_nt)
  
  #Subset year, date, run and emissions
  ghg_day <- subset(ghg_day_all, select = c(year, date, run, n2o_tot))
  
  #LPJML####
  # Dates, months, years for dataframe
  doy <- rep(1:365, length(years))
  day <-
    rep(as.numeric(format(doy2day(1:365, 2003), "%d")), length(years))
  month <-
    rep(as.numeric(format(doy2day(1:365, 2003), "%m")), length(years))
  year <- rep(years, each = 365)
  date <-
    as.Date(formatDate(paste(year, month, day, sep = "-"))) #YYYY-MM-DD, # as.Date, otherwise returns factor class (problem for merge func.)
  
  #Read in LPJmL Data ####
  
  ct_lpj <- lpj_reads(scens_ct_lpj)
  ct_lpj_adj <- lpj_reads(scens_ct_lpj_adj)
  nt_lpj <- lpj_reads(scens_nt_lpj)
  nt_lpj_adj <- lpj_reads(scens_nt_lpj_adj)
  
  #calculate N2O from nitrif+denitr and convert to g/ha
  ct_lpj$n2o_tot <-
    (ct_lpj$d_n2o_denit + ct_lpj$d_n2o_nit) * 10000 #c#convert m2 to ha
  nt_lpj$n2o_tot <-
    (nt_lpj$d_n2o_denit + nt_lpj$d_n2o_nit) * 10000 #c#convert m2 to ha
  ct_lpj_adj$n2o_tot <-
    (ct_lpj_adj$d_n2o_denit + ct_lpj_adj$d_n2o_nit) * 10000 #c#convert m2 to ha
  nt_lpj_adj$n2o_tot <-
    (nt_lpj_adj$d_n2o_denit + nt_lpj_adj$d_n2o_nit) * 10000 #c#convert m2 to ha
  
  #combine simulated emissions tillage ("ct") and no-tillage ("nt") of lpjml to one dataframe
  lpj_full <- rbind(ct_lpj, nt_lpj, ct_lpj_adj, nt_lpj_adj)
  
  #remove the zeroes that appear once (1 day after harvest), and copy previous value
  #for those variables where this occurs  only.
  
  zeroes <- which(lpj_full$d_wevap == 0 & lpj_full$d_fphu == 1)
  nn <- which(
    colnames(lpj_full) == "d_n2o_denit" |
      colnames(lpj_full) == "d_litter_ag" |
      colnames(lpj_full) == "d_wevap" |
      colnames(lpj_full) == "d_w0" |
      colnames(lpj_full) == "d_w1" |
      colnames(lpj_full) == "d_litter_agsub" |
      colnames(lpj_full) == "d_litter_cover" |
      colnames(lpj_full) == "d_wsat" |
      colnames(lpj_full) == "d_fc" |
      colnames(lpj_full) == "d_wp"
  )
  
  lpj_full[zeroes, nn] <- lpj_full[zeroes - 1, nn]
  
  #Save LPJML entire dataframe
  save(lpj_full,
       file = paste0(working.path, site, "/", "LPJML_entire", ".RData"))
  
  #Subset dataframe with variables of interest
  lpj_df <- subset(lpj_full, select = c(year, date, run_ct_nt, n2o_tot))
  
  #rename run_ct_nt to harmonize columnnames with df of daycent and observations
  colnames(lpj_df)[which(colnames(lpj_df) == "run_ct_nt")] <- "run"
  
  #COMBINE LPJmL, Daycent and Observations
  full_data <- rbind(lpj_df, ghg_day, obs_ghg)
  full_data$n2o_tot <- as.numeric(full_data$n2o_tot)
  
  #Add site information
  full_data$site <- site
  
  #FINAL DATAFRAME####################################################################
  # Includes: years and dates where maize is grown and observations are available for
  # all sites, runs (daycent, experimental runs(e.g. detail vs global), and N2o emissions)
  
  
  #FULL_DATA_OY
  full_data_oy <- NULL
  for (kk in 1:length(NYEARS.obs)) {
    xx <- subset(full_data, full_data$year == NYEARS.obs[kk])
    full_data_oy <- rbind(full_data_oy, xx)
  }
  
  #SavE data
  save(full_data_oy,
       file = paste0(working.path, site, "/", "full_data_oy", ".RData"))
  print(paste0("dataframe is saved for site:", sites[pp]))
  
  
}#end of site loop
