rm(list = ls())
##############################################################################################
#  Script by Femke Lutz (12.2019) for:                                                      ##
# "The importance of management information and soil moisture representation for            ##
#  simulating tillage effects on N$_2$O emissions in LPJmL5.0-tillage                       ##
#                                                                                           ##
#  preprocesses data to create figures as submitted in the paper, uses the dataframe        ##
#   created in read.runs.R                                                                  ##
#                                                                                           ##
##############################################################################################

#PACKAGES

#Install packages, load libraries
#install.packages("ggplot2")
#install.packages("gridExtra")
library(ggplot2)   #for plotting
library(gridExtra) #having multiple panels in 1 PDF

#GENERAL SETTINGS
#add working path
working.path <-
  ""
sites <- c("colorado", "nebraska", "boigneville", "michigan")

# output directories of lpjml tillage runs
CT_vect <-
  c(
    "output_till_detail",
    "output_till_fert",
    "output_till_irr",
    "output_till_gs",
    "output_till_pz",
    "output_till_till",
    "output_till_glob",
    "Daycent_CT",
    "Observed_CT",
    "output_till_detail_adjusted",
    "output_till_fert_adjusted",
    "output_till_irr_adjusted",
    "output_till_gs_adjusted",
    "output_till_pz_adjusted",
    "output_till_till_adjusted",
    "output_till_glob_adjusted"
  )
# output directories of lpjml no-tillage runs
NT_vect <-
  c(
    "output_notill_detail",
    "output_notill_fert",
    "output_notill_irr",
    "output_notill_gs",
    "output_notill_pz",
    "output_notill_glob",
    "output_notill_till",
    "Daycent_NT",
    "Observed_NT",
    "output_notill_detail_adjusted",
    "output_notill_fert_adjusted",
    "output_notill_irr_adjusted",
    "output_notill_gs_adjusted",
    "output_notill_pz_adjusted",
    "output_notill_glob_adjusted",
    "output_notill_till_adjusted"
  )

run_lpjml <- c(
  "output_till_detail",
  "output_notill_detail",
  "output_till_fert",
  "output_notill_fert",
  "output_till_irr",
  "output_notill_irr",
  "output_till_gs",
  "output_notill_gs",
  "output_till_pz",
  "output_notill_pz",
  "output_till_till",
  "output_notill_till",
  "output_till_glob",
  "output_notill_glob"
)
runs_adjusted <- paste0(run_lpjml, "_adjusted")

run_daycent <- c("Daycent_CT", "Daycent_NT")

#Defining line type for field capacity and wilting point
linet <-
  c("Field Capacity" = "twodash", "Wilting Point" = "longdash")
run.vect <-
  c(
    "Observed",
    "LPJmL.D",
    "LPJmL.D-F",
    "LPJmL.D-I",
    "LPJmL.D-GS",
    "LPJmL.D-PS",
    "LPJmL.D-T",
    "LPJmL.G",
    "Daycent"
  )

#Define colors in graphs

cols.all.run <-
  c(
    "LPJmL.D" = "#F8766D",
    "LPJmL.D-I" = "#93AA00",
    "LPJmL.D-GS" = "#00BA38",
    "LPJmL.D-PS" = "#00C19F",
    "LPJmL.D-T" = "#00B9E3",
    "LPJmL.G" = "#DB72FB",
    "Daycent" = "#00BFC4",
    "Observed" = "#619CFF"
  )

cols.sub.run <- c(
  "LPJmL.D" = "#F8766D",
  "LPJmL.G" = "#DB72FB",
  "Daycent" = "#00BFC4",
  "Observed" = "#619CFF"
)

cols.adj.new <-
  c(
    "Observed" = "black",
    "LPJmL.Orig." = "#FF33FF",
    "LPJmL.Mod." = "black",
    "Daycent" = "black"
  )

cols.water <-
  c(
    "LPJmL.Orig" = "grey",
    "LPJmL.Mod." = "#00BFC4",
    "Daycent" = "#F8766D",
    "Observed" = "black"
  )
#FUNCTIONS#######################################################################

#absolute differences between tillage types (no-tillage "nt" and tillage "ct)
abso <- function(n2o_nt, n2o_ct, rat = NULL) {
  for (ii in 1:length(n2o_nt)) {
    rat[ii] <- (n2o_nt[ii] - n2o_ct[ii])
  }
  return(rat)
}

######################################################
########Preprocessing             ####################
######################################################

#Read in data frame from read.runs.R, combine dataframes of all experimental sites
full_data_year <- NULL
for (i in 1:length(sites)) {
  data_oy <-
    get(load(paste0(
      working.path, sites[i], "/", "full_data_oy", ".RData"
    ))) #add path
  full_data_year <- rbind(full_data_year, data_oy)
}

#Subset data to seperate runs and observations into different dataframes
data_runs_year <-
  subset(
    full_data_year,
    full_data_year$run != "Observed_CT" &
      full_data_year$run != "Observed_NT"
  )
data_obs_year <-
  subset(
    full_data_year,
    full_data_year$run == "Observed_CT" |
      full_data_year$run == "Observed_NT"
  )
data_obs_year$adj.type <- "Observed"
data_obs_year$run.agg <- "Observed"

#Add tillage type information to runs and observations dataframes

data_runs_year$till.type <-
  ifelse(data_runs_year$run %in% NT_vect, "No tillage", "Conv. tillage")
data_obs_year$till.type <-
  ifelse(data_obs_year$run %in% NT_vect, "No tillage", "Conv. tillage")

#Add information to type of run (e.g. Daycent, LPJmL original of modified)

data_runs_year$adj.type <-
  ifelse(
    data_runs_year$run %in% runs_adjusted,
    "LPJmL.Mod.",
    ifelse(
      data_runs_year$run %in% run_daycent,
      "Daycent",
      ifelse(data_runs_year$run %in% run_lpjml, "LPJmL.Orig.", "Observed")
    )
  )

data_runs_year$run.agg <-
  ifelse(
    data_runs_year$run %in% run_lpjml,
    sub("output_.*till_", "", data_runs_year$run),
    ifelse(
      data_runs_year$run %in% run_daycent,
      "Daycent",
      ifelse(
        data_runs_year$run %in% runs_adjusted,
        sub("output_.*till_", "", data_runs_year$run),
        "Observed"
      )
    )
  )

#Add information to type of LPJmL run (e.g. detailed vs global input information)
data_runs_year$run.agg[which(data_runs_year$run.agg == "detail" |
                               data_runs_year$run.agg == "detail_adjusted")] <-
  "LPJmL.D"
data_runs_year$run.agg[which(data_runs_year$run.agg == "fert" |
                               data_runs_year$run.agg == "fert_adjusted")] <-
  "LPJmL.D-F"
data_runs_year$run.agg[which(data_runs_year$run.agg == "irr" |
                               data_runs_year$run.agg == "irr_adjusted")] <-
  "LPJmL.D-I"
data_runs_year$run.agg[which(data_runs_year$run.agg == "gs" |
                               data_runs_year$run.agg == "gs_adjusted")] <-
  "LPJmL.D-GS"
data_runs_year$run.agg[which(data_runs_year$run.agg == "pz" |
                               data_runs_year$run.agg == "pz_adjusted")] <-
  "LPJmL.D-PS"
data_runs_year$run.agg[which(data_runs_year$run.agg == "till" |
                               data_runs_year$run.agg == "adjusted")] <-
  "LPJmL.D-T"
data_runs_year$run.agg[which(data_runs_year$run.agg == "glob" |
                               data_runs_year$run.agg == "glob_adjusted")] <-
  "LPJmL.G"

#Subset data to seperate tillage and no-tillage into different dataframes of runs and observations
#in order to calculate absolute differences between tillage types

data_runs_year.till   <-
  subset(data_runs_year, data_runs_year$till.type == "Conv. tillage")
data_runs_year.notill <-
  subset(data_runs_year, data_runs_year$till.type == "No tillage")
data_obs_year.till    <-
  subset(data_obs_year, data_obs_year$till.type == "Conv. tillage")
data_obs_year.notill  <-
  subset(data_obs_year, data_obs_year$till.type == "No tillage")

#having till and no till data next to eachother for calculating differences, runs
runs.till.type <-
  merge(
    data_runs_year.till,
    data_runs_year.notill,
    by.x = c("date", "site", "run.agg", "adj.type"),
    by.y = c("date", "site", "run.agg", "adj.type"),
    suffixes = c(".till", ".notill")
  )

#having till and no till data next to eachother for calculating differences, obs
obs.till.type <-
  merge(
    data_obs_year.till,
    data_obs_year.notill,
    by.x = c("date", "site", "run.agg", "adj.type"),
    by.y = c("date", "site", "run.agg", "adj.type"),
    suffixes = c(".till", ".notill")
  )

# calculate absolute differences between tillage types; observations
obs.till.type$abs.obs <-
  abso(obs.till.type$n2o_tot.notill, obs.till.type$n2o_tot.till)

##Merging observation and runs df's after calculting absolute differences
runs.obs.year <-
  merge(
    runs.till.type,
    obs.till.type,
    by.x = c("date", "site"),
    by.y = c("date", "site"),
    suffixes = c(".runs", ".obs"),
    all.x = T,
    all.y = F
  )
# calculate absolute differences between tillage types; runs
runs.obs.year$abs.runs <-
  abso(runs.obs.year$n2o_tot.notill.runs,
       runs.obs.year$n2o_tot.till.runs)

#Yearly averages for Fig 1-4####
#Calculate averages over tillage type (not the differences), site and year; runs
runs.agg.year <-
  aggregate(n2o_tot ~ year + site + till.type + adj.type +
              run.agg,
            data_runs_year,
            mean)

# subset Daycent, lpjml global and lpjml detailed run
runs.agg.year <-
  subset(
    runs.agg.year,
    runs.agg.year$run.agg == "Daycent" |
      runs.agg.year$run.agg == "LPJmL.D" |
      runs.agg.year$run.agg == "LPJmL.G"
  )

#Calculate averages over tillage type (not the differences), site and year; observations
obs.agg.year <-
  aggregate(n2o_tot ~ year + site + run.agg + till.type, data_obs_year, mean)
yearly.agg.all   <-
  merge(
    runs.agg.year,
    obs.agg.year,
    by = c("year", "site", "till.type"),
    suffix = c(".run", ".obs")
  )

#Subset LPJmL runs: "orig." and "global" run (Fig 1a)

yearly.agg.g <-
  subset(
    yearly.agg.all,
    yearly.agg.all$adj.type != "LPJmL.Mod." &
      yearly.agg.all$run.agg.run != "LPJmL.D"
  )

#Subset LPJmL runs: "orig." and "detail" run (Fig 1b)
yearly.agg.d <-
  subset(
    yearly.agg.all,
    yearly.agg.all$adj.type != "LPJmL.Mod." &
      yearly.agg.all$run.agg.run != "LPJmL.G"
  )

#Subset LPJmL runs: "modified" and "detail" run (Fig 5)
yearly.agg.d.mod <-
  subset(
    yearly.agg.all,
    yearly.agg.all$adj.type != "LPJmL.Orig." &
      yearly.agg.all$run.agg.run != "LPJmL.G"
  )


#FINAL DATASET FOR YEAR, OBS-RUNS below eachother, for boxplots####
#Subset data for observations and runs, harmonize structure, and rbind them

obs_abs <-
  subset(
    runs.obs.year,
    select = -c(
      adj.type.runs,
      run.agg.runs,
      year.till.runs,
      run.till.runs,
      n2o_tot.till.runs,
      till.type.till.runs,
      year.notill.runs,
      run.notill.runs,
      n2o_tot.notill.runs,
      till.type.notill.runs,
      abs.runs
    )
  )

ff <- which(is.na(obs_abs$abs.obs))
obs_abs <- obs_abs[-ff,]
rm(ff)
#remove the duplicated data, that was needed for merge, to have for all runs an observation
dup.rat <- NULL
for (ss in 1:length(sites)) {
  sit.dup <- subset(obs_abs, obs_abs$site == sites[ss])
  xx <- sit.dup[!duplicated(sit.dup$date),]
  dup.rat <- rbind(dup.rat, xx)
}
obs_abs <- dup.rat
rm(dup.rat, xx, sit.dup)

colnames(obs_abs) <- sub("*.obs", "", colnames(obs_abs))

run_abs <-
  subset(
    runs.obs.year,
    select = -c(
      adj.type.obs,
      run.agg.obs,
      year.till.obs,
      run.till.obs,
      n2o_tot.till.obs,
      till.type.till.obs,
      year.notill.obs,
      run.notill.obs,
      n2o_tot.notill.obs,
      till.type.notill.obs,
      abs.obs
    )
  )
colnames(run_abs) <- sub("*.runs", "", colnames(run_abs))

runs.obs.year.box <- rbind(obs_abs, run_abs)

#Factorize adjusted types for ggplot

runs.obs.year.box$run.agg <-
  factor(runs.obs.year.box$run.agg, levels = run.vect)
runs.obs.year.box$adj.type <-
  factor(
    runs.obs.year.box$adj.type,
    levels = c("Observed", "LPJmL.Orig.", "LPJmL.Mod.", "Daycent")
  )
rm(run_abs, obs_abs)


####################################################
##################PLOTTING##########################
####################################################

########################################################
#Figures 3, Appendix A1 and A2
########################################################

# subset the "lpjml.orig." runs
runs.obs.year.box_orig <-
  subset(runs.obs.year.box, runs.obs.year.box$adj.type != "LPJmL.Mod.")

#Statistics for boxplots to be shown in figure; original runs
means <-
  aggregate(abs ~ run.agg + site, data = runs.obs.year.box_orig, median)
means$abs <- round(means$abs, 2)
counts <-
  aggregate(abs ~ run.agg + site, data = runs.obs.year.box_orig, length)
means$count <- counts[, 3]

#Statistics for boxplots to be shown in figure; modified runs
means_ad <-
  aggregate(abs ~ run.agg + site + adj.type, data = runs.obs.year.box, median)
sums_ad <-
  aggregate(abs ~ run.agg + site + adj.type, data = runs.obs.year.box, summary)
means_ad$abs <- round(means_ad$abs, 2)
counts_ad <-
  aggregate(abs ~ run.agg + site + adj.type, data = runs.obs.year.box, length)
means_ad$counts_ad <- counts_ad[, 4]

#APPENDIX A1####
s1 <-
  ggplot(runs.obs.year.box_orig, aes(x = run.agg, y = abs, fill = run.agg)) +
  geom_boxplot(notch = F,
               outlier.colour = "grey",
               outlier.alpha = 0.9) +
  scale_colour_manual(values = cols.all.run) +
  theme(axis.text.x = element_blank()) +
  ylab(expression(paste(N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"))) +
  guides(fill = FALSE) +
  labs(colour = "Legend") + #legend title
  ylim(-30, 40) +
  theme(plot.caption = element_text(hjust = 0)) + #figure number left
  geom_text(
    data = means,
    show.legend = FALSE,
    aes(label = abs, y = 30),
    size = 3.5,
    position = position_dodge(width = 0.8)
  ) + #position dodge allows it to have
  # numbers above each boxplot
  geom_text(data = means, aes(label = paste("n=", "", count), y = -20), size =
              3.5) +
  theme(
    text = element_text(size = 16),
    axis.text.x = element_text(angle = 45),
    axis.title.x = element_blank()
  ) +
  scale_x_discrete(
    labels = c(
      "Observed" = "Observed",
      "LPJmL.D" = "LPJmL.D.Orig",
      "LPJmL.D-F" = "LPJmL.D-F.Orig",
      "LPJmL.D-I" = "LPJmL.D-I.Orig",
      "LPJmL.D-GS" = "LPJmL.D-GS.Orig",
      "LPJmL.D-PS" = "LPJmL.D-PS.Orig",
      "LPJmL.D-T" = "LPJmL.D-T.Orig",
      "LPJmL.G" = "LPJmL.G.Orig",
      "Daycent" = "Daycent"
    )
  ) +
  labs(caption = "A1") +
  facet_grid(site ~ .)

#Figure 3
fig3 <-
  ggplot(runs.obs.year.box_orig, aes(x = run.agg, y = abs, fill = run.agg)) +
  geom_boxplot(notch = F,
               outlier.colour = "grey",
               outlier.alpha = 0.9) +
  scale_colour_manual(values = cols.sub.run) +
  theme(axis.text.x = element_blank()) +
  ylab(expression(paste(N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"))) +
  guides(fill = FALSE) +
  labs(colour = "Legend") + #legend title
  ylim(-30, 40) +
  theme(plot.caption = element_text(hjust = 0)) + #figure number left
  geom_text(
    data = means,
    show.legend = FALSE,
    aes(label = abs, y = 30),
    size = 3,
    position = position_dodge(width = 0.8)
  ) + #position dodge allows it to have
  # numbers above each boxplot
  geom_text(data = means, aes(label = paste("n=", "", count), y = -20), size =
              3) +
  theme(
    text = element_text(size = 12),
    axis.text.x = element_text(angle = 45),
    axis.title.x = element_blank()
  ) +
  scale_x_discrete(
    limits = c("Observed", "LPJmL.D", "LPJmL.G", "Daycent"),
    labels = c(
      "Observed" = "Observed",
      "LPJmL.D" = "LPJmL.D.Orig",
      "LPJmL.G" = "LPJmL.G.Orig",
      "Daycent" = "Daycent"
    )
  ) +
  labs(caption = "Fig. 3") +
  facet_grid(site ~ .)

#APPENDIX A3
s3 <-
  ggplot(runs.obs.year.box,
         aes(
           x = run.agg,
           y = abs,
           fill = run.agg,
           colour = adj.type
         )) +
  geom_boxplot(notch = F,
               outlier.colour = "grey",
               outlier.alpha = 0.1) +
  guides(colour = FALSE) +
  #new
  scale_colour_manual(
    values = cols.adj.new,
    label = c("Observed", "LPJmL.*.Orig", "LPJmL.*.Mod", "Daycent")
  ) +
  theme(axis.text.x = element_blank()) +
  ylab(expression(paste(N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"))) +
  guides(fill = FALSE) +
  labs(colour = "Legend") + #legend title
  ylim(-30, 40) +
  geom_text(
    data = means_ad,
    show.legend = FALSE,
    aes(label = abs, y = 30),
    size = 3,
    position = position_dodge(width = 0.8)
  ) + #position dodge allows it to have
  # numbers above each boxplot
  geom_text(
    data = means_ad,
    show.legend = FALSE,
    aes(label = paste("n is", "", counts_ad),
        y = -15),
    size = 3
  ) +
  theme(
    text = element_text(size = 10),
    axis.text.x = element_text(angle = 45),
    axis.title.x = element_blank()
  ) +
  scale_x_discrete(limits = c("Observed", "LPJmL.D", "LPJmL.G", "Daycent")) +
  labs(caption = "A3") +
  theme_bw() +
  theme(plot.caption = element_text(hjust = 0)) + #figure number left
  facet_grid(site ~ .)

########################################################
#Figures 1A-2B, Figure 5-6
########################################################
#calculate differences between tillage types of the aggregated results,
#for different runs (e.g. global, detail, modified, daycent)

n2o.average.g <-     yearly.agg.g
n2o.average.d <-     yearly.agg.d
n2o.average.adj <-   yearly.agg.d.mod

n2o.average.g.till <-
  subset(n2o.average.g, n2o.average.g$till.type == "Conv. tillage")
n2o.average.d.till <-
  subset(n2o.average.d, n2o.average.d$till.type == "Conv. tillage")
n2o.average.g.notill <-
  subset(n2o.average.g, n2o.average.g$till.type == "No tillage")
n2o.average.d.notill <-
  subset(n2o.average.d, n2o.average.d$till.type == "No tillage")
n2o.average.adj.till <-
  subset(n2o.average.adj, n2o.average.adj$till.type == "Conv. tillage")
n2o.average.adj.notill <-
  subset(n2o.average.adj, n2o.average.adj$till.type == "No tillage")

#Combine Till and no-till, to have it next to eachother for calculating differences,
# yearly aggregation.
n2o.average.g.abs <-   merge(
  n2o.average.g.till,
  n2o.average.g.notill,
  by = c("year", "site", "run.agg.run"),
  suffix = c(".till", ".notill")
)
n2o.average.d.abs <-   merge(
  n2o.average.d.till,
  n2o.average.d.notill,
  by = c("year", "site", "run.agg.run"),
  suffix = c(".till", ".notill")
)
n2o.average.adj.abs <-
  merge(
    n2o.average.adj.till,
    n2o.average.adj.notill,
    by = c("year", "site", "run.agg.run"),
    suffix = c(".till", ".notill")
  )

n2o.average.g.abs$diff.runs    <-
  n2o.average.g.abs$n2o_tot.run.notill -
  n2o.average.g.abs$n2o_tot.run.till
n2o.average.d.abs$diff.runs    <-
  n2o.average.d.abs$n2o_tot.run.notill -
  n2o.average.d.abs$n2o_tot.run.till
n2o.average.adj.abs$diff.runs  <-
  n2o.average.adj.abs$n2o_tot.run.notill -
  n2o.average.adj.abs$n2o_tot.run.till

n2o.average.g.abs$diff.obs    <-
  n2o.average.g.abs$n2o_tot.obs.notill -
  n2o.average.g.abs$n2o_tot.obs.till
n2o.average.d.abs$diff.obs    <-
  n2o.average.d.abs$n2o_tot.obs.notill -
  n2o.average.d.abs$n2o_tot.obs.till
n2o.average.adj.abs$diff.obs  <-
  n2o.average.adj.abs$n2o_tot.obs.notill -
  n2o.average.adj.abs$n2o_tot.obs.till

#FIGURE 1####

fig1a <-
  ggplot(n2o.average.g, (aes(
    x = n2o_tot.run, y = n2o_tot.obs, color = till.type
  ))) +
  geom_point(aes(shape = run.agg.run)) +
  geom_abline(slope = 1, intercept = 0) +
  theme(plot.caption = element_text(hjust = 0.5, size = 12)) + #caption centered
  theme(text = element_text(size = 10)) + #text size
  labs(caption = "(A)") +
  scale_shape_discrete(name = "Model",
                       labels = c("Daycent", "LPJmL.G.Orig")) +
  coord_equal(ratio = 1,
              xlim = c(0, 80),
              ylim = c(0, 80)) +
  labs(color = "Tillage type", shape = "Model") +
  ylab(expression(paste(
    "Observed", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  ))) +
  xlab(expression(paste(
    "Simulated", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  )))

fig1b <-
  ggplot(n2o.average.d, (aes(
    x = n2o_tot.run, y = n2o_tot.obs, color = till.type
  ))) +
  geom_point(aes(shape = run.agg.run)) +
  geom_abline(slope = 1, intercept = 0) +
  theme(plot.caption = element_text(hjust = 0.5, size = 12)) + #caption centered
  theme(text = element_text(size = 10)) +
  labs(caption = "(B)") +
  scale_shape_discrete(name = "Model",
                       labels = c("Daycent", "LPJmL.D.Orig")) +
  coord_equal(ratio = 1,
              xlim = c(0, 80),
              ylim = c(0, 80)) +
  labs(color = "Tillage type", shape = "Model") +
  guides(colour = guide_legend(order = 2), shape = guide_legend(order =
                                                                  1)) +
  ylab(expression(paste(
    "Observed", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  ))) +
  xlab(expression(paste(
    "Simulated", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  )))

#FIGURE 2 ####

fig2a <-
  ggplot(n2o.average.g.abs, (aes(
    x = diff.runs, y = diff.obs, color = run.agg.run
  ))) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0) +
  labs(colour = "Model") +
  theme(plot.caption = element_text(hjust = 0.5, size = 12)) + #caption centered
  theme(text = element_text(size = 10)) +
  labs(caption = "(A)") +
  scale_color_discrete(name = "Model",
                       labels = c("Daycent", "LPJmL.G.Orig")) +
  coord_equal(ratio =  1,
              xlim = c(-20, 40),
              ylim = c(-20, 40)) +
  ylab(expression(paste(
    "Observed", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  ))) +
  xlab(expression(paste(
    "Simulated", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  )))

fig2b <-
  ggplot(n2o.average.d.abs, (aes(
    x = diff.runs, y = diff.obs, color = run.agg.run
  ))) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0) +
  labs(colour = "Model") +
  theme(plot.caption = element_text(hjust = 0.5, size = 12)) + #caption centered
  theme(text = element_text(size = 10)) +
  labs(caption = "(B)") +
  scale_color_discrete(name = "Model",
                       labels = c("Daycent", "LPJmL.D.Orig")) +
  coord_equal(ratio =  1,
              xlim = c(-20, 40),
              ylim = c(-20, 40)) +
  ylab(expression(paste(
    "Observed", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  ))) +
  xlab(expression(paste(
    "Simulated", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  )))

#FIGURE 6 ####
fig6 <-
  ggplot(n2o.average.adj.abs, (aes(
    x = diff.runs, y = diff.obs, color = adj.type.till
  ))) +
  geom_point(data = n2o.average.d.abs,
             aes(x = diff.runs, y = diff.obs)) + #alpha=0.5 ,show.legend=F
  geom_point(aes(color = adj.type.till)) +
  labs(caption = "Fig. 6") +
  scale_colour_manual(
    values = c("#F8766D", "#00BFC4", "grey"),
    name = "Model",
    labels = c("Daycent", "LPJmL.D.Mod", "LPJmL.D.Orig")
  ) +
  geom_abline(slope = 1, intercept = 0) +
  labs(colour = "Model") +
  theme(plot.caption = element_text(hjust = 0)) + #figure number left
  theme(text = element_text(size = 10)) +
  coord_equal(ratio = 1,
              xlim = c(-20, 40),
              ylim = c(-20, 40)) +
  ylab(expression(paste(
    "Observed", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  ))) +
  xlab(expression(paste(
    "Simulated", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  )))

#FIGURE 5
fig5 <-
  ggplot(n2o.average.adj, (aes(
    x = n2o_tot.run, y = n2o_tot.obs, color = till.type
  ))) +
  geom_point(
    data = n2o.average.d,
    aes(x = n2o_tot.run, y = n2o_tot.obs,
        shape = adj.type),
    color = "grey"
  ) + #alpha=0.5 ,show.legend=F
  geom_point(aes(shape = adj.type)) +
  scale_shape_discrete(name = "Model",
                       labels = c("Daycent", "LPJmL.D.Mod", "LPJmL.D.Orig")) +
  geom_abline(slope = 1, intercept = 0) +
  theme(plot.caption = element_text(hjust = 0)) + #figure number left
  theme(text = element_text(size = 10)) +
  labs(color = "Tillage type",
       shape = "Model",
       caption = "Fig. 5") +
  coord_equal(ratio = 1,
              xlim = c(0, 80),
              ylim = c(0, 80)) +
  ylab(expression(paste(
    "Observed", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  ))) +
  xlab(expression(paste(
    "Simulated", " ", N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^ -1, "]"
  )))

########################################################
#Figure 4
########################################################
# Load dataframe water (created in read.runs.R)

water.frame <-
  get(load(paste0(working.path, "waterframe", ".RData"))) #add path
levels(water.frame$adj.type)[levels(water.frame$adj.type) == "Obs."] <-
  "Observed"
levels(water.frame$adj.type)[levels(water.frame$adj.type) == "LPJmL.modified."] <-
  "LPJmL.Mod."
levels(water.frame$adj.type)[levels(water.frame$adj.type) == "Daycent.D"] <-
  "Daycent"

#Only read data water for experimental site in Nebraska
site.oi <- "nebraska"
water.site <-
  subset(water.frame,
         water.frame$site == site.oi &
           water.frame$till.type == "No tillage")
water.orig <-
  subset(water.site, water.site$adj.type != "LPJmL.Mod.")
water.all <-
  subset(water.frame,
         water.frame$site == site.oi & water.frame$run == "LPJmL.D")

if (site.oi == "nebraska") {
  water.orig <- subset(water.orig, water.orig$year >= 2011)
  water.orig$day <-
    as.Date(paste0("2000-", format(water.orig$date, "%j")), "%Y-%j")
  
  #aggregate water information over all years
  water.orig.agg <-
    aggregate(first_layer ~ day + run + till.type, water.orig, mean)
  water.prop <-
    aggregate(Property ~ day + run + till.type + Hydr.prop, water.orig, mean)
  water.tog <-
    merge(
      water.orig.agg,
      water.prop,
      by.x = c("day", "run", "till.type"),
      by.y = c("day", "run", "till.type"),
      all = T
    )
  water.orig.run <- subset(water.tog, water.tog$run != "Observed")
  water.orig.obs <- subset(water.tog, water.tog$run == "Observed")
  
  water.site <- subset(water.site, water.site$year >= 2011)
  water.site$day <-
    as.Date(paste0("2000-", format(water.site$date, "%j")), "%Y-%j")
  water.site.agg <-
    aggregate(first_layer ~ day + run + till.type + adj.type, water.site, mean)
  water.prop.agg <-
    aggregate(Property ~ day + run + till.type + Hydr.prop + adj.type,
              water.site,
              mean)
  water.tog.adj <-
    merge(
      water.site.agg,
      water.prop.agg,
      by.x = c("day", "run", "till.type", "adj.type"),
      by.y = c("day", "run", "till.type", "adj.type"),
      all = T
    )
  water.adj.run <-
    subset(water.tog.adj, water.tog.adj$run != "Observed")
  water.adj.obs <-
    subset(water.tog.adj, water.tog.adj$run == "Observed")
}

#FIGURE 4 ####
fig4 <-
  ggplot(water.adj.run, aes(x = day, y = first_layer)) + #detail and daycent
  geom_line(aes(color = adj.type)) +
  geom_point(data = water.adj.obs, aes(x = day, y = first_layer, fill =
                                         "Observations")) +
  geom_line(aes(
    x = day,
    y = Property,
    col = adj.type,
    linetype = Hydr.prop
  )) +
  scale_color_manual(values = cols.water,
                     labels = c("Daycent", "LPJmL.D.Orig", "LPJmL.D.Mod")) +
  scale_fill_manual(name = "Type", values = c("Observations" = "black")) +
  scale_x_date(
    labels = function(x)
      format(x, "%d-%b")
  ) +
  theme(text = element_text(size = 12), axis.title.x = element_blank()) +
  labs(colour = "Model") + #legend title
  labs(caption = "Fig. 4") +
  theme(plot.caption = element_text(hjust = 0)) + #figure number left
  scale_linetype_manual(values = linet, na.translate = F) +
  ylab("WFPS [fraction]") +
  scale_y_continuous(limits = c(0, 1))

########################################################
#Figure A2
########################################################
#Dataframe is created in "waterframe.R"

nit_denit_data <-
  get(load(paste0(working.path, "nit_denit_all", ".RData"))) #add path
nit_denit_data$n2o_tot <-
  nit_denit_data$n2o_nit + nit_denit_data$n2o_denit

#subset years of interest based on the different sites
all_sites <- NA
for (ss in 1:length(sites)) {
  if (ss == 1) {
    yy <- subset(nit_denit_data, nit_denit_data$site == sites[ss])
    xx <- subset(yy, yy$year >= 2002 & yy$year <= 2006)
  }
  if (ss == 2) {
    yy <- subset(nit_denit_data, nit_denit_data$site == sites[ss])
    xx <- subset(yy, yy$year > 2010)
  }
  if (ss == 3) {
    yy <- subset(nit_denit_data, nit_denit_data$site == sites[ss])
    xx <- subset(yy, yy$year == 2003)
  }
  if (ss == 4) {
    yy <- subset(nit_denit_data, nit_denit_data$site == sites[ss])
    xx <- subset(
      yy,
      yy$year == 1991 |
        yy$year == 1993 |
        yy$year == 1996 |
        yy$year == 1999 |
        yy$year == 2002 |
        yy$year == 2005 |
        yy$year == 2008
    )
  }
  all_sites <- rbind(all_sites, xx)
}
nit_denit_data <- all_sites

#Harmonize model names
nit_denit_data$run[which(nit_denit_data$run == "Detail")]                     <-
  "LPJmL.D"
nit_denit_data$run[which(nit_denit_data$run == "Detail-Fert.")]               <-
  "LPJmL.D-F"
nit_denit_data$run[which(nit_denit_data$run == "Detail-Irr.")]                <-
  "LPJmL.D-I"
nit_denit_data$run[which(nit_denit_data$run == "Detail-GS")]                  <-
  "LPJmL.D-GS"
nit_denit_data$run[which(nit_denit_data$run == "Detail-Pool.S")]              <-
  "LPJmL.D-PS"
nit_denit_data$run[which(nit_denit_data$run == "Detail-Till.T")]              <-
  "LPJmL.D-T"
nit_denit_data$run[which(nit_denit_data$run == "Global")]                     <-
  "LPJmL.G"
nit_denit_data$adj.type[which(nit_denit_data$adj.type == "LPJmL.Orig")]       <-
  "LPJmL.Orig."
nit_denit_data$adj.type[which(nit_denit_data$adj.type == "LPJmL.modified.")] <-
  "LPJmL.Mod."
nit_denit_data <-
  subset(nit_denit_data,
         nit_denit_data$run == "LPJmL.D" |
           # nit_denit_data$run=="LPJmL.G" |
           nit_denit_data$run == "Daycent.D")
nit_denit_data$run[which(nit_denit_data$run == "Daycent.D")] <-
  "Daycent"
nit_denit_data$adj.type[which(nit_denit_data$adj.type == "Daycent.D")] <-
  "Daycent"

nit_denit_data.till <-
  subset(nit_denit_data, nit_denit_data$till.type == "Conv. tillage")
nit_denit_data.notill <-
  subset(nit_denit_data, nit_denit_data$till.type == "No tillage")

#having till and no till data next to eachother to calculate differences, runs
nit_denit.till.type <-
  merge(
    nit_denit_data.till,
    nit_denit_data.notill,
    by = c("date", "site", "run", "adj.type"),
    suffixes = c(".till", ".notill")
  )

nit_denit.till.type$abso.nit <-
  nit_denit.till.type$n2o_nit.notill - nit_denit.till.type$n2o_nit.till
nit_denit.till.type$abso.denit <-
  nit_denit.till.type$n2o_denit.notill - nit_denit.till.type$n2o_denit.till
nit_denit.till.type$abso.tot <-
  nit_denit.till.type$n2o_tot.notill - nit_denit.till.type$n2o_tot.till
#Data for nitrification
nitr <-
  subset(nit_denit.till.type,
         select = c(date, site, run, adj.type, abso.nit))
#Data for denitrification
denit <-
  subset(nit_denit.till.type,
         select = c(date, site, run, adj.type, abso.denit))
#Data for totals
tot <-
  subset(nit_denit.till.type,
         select = c(date, site, run, adj.type, abso.tot))
nitr$n_source <- "Nitrification"
colnames(tot)[5] <- colnames(denit)[5] <- colnames(nitr)[5] <- "n2o"
denit$n_source <- "Denitrification"
tot$n_source <- "Total"

#STatistics shown in boxplots

n_sources <- rbind(nitr, denit, tot)
n_sources$n_source <- factor(n_sources$n_source,
                             levels = c("Total", "Denitrification", "Nitrification"))
n_sources$adj.type <- factor(n_sources$adj.type,
                             levels = c("LPJmL.Orig.", "LPJmL.Mod.", "Daycent"))
means <-
  aggregate(n2o ~ run + site + n_source + adj.type, data = n_sources, median)
counts <-
  aggregate(n2o ~ run + site + n_source + adj.type, data = n_sources, length)
means$counts <- counts[, 5]
means$n2o <-    round(means$n2o, 2)


#Figure A2####


s2 <-
  ggplot(n_sources, aes(
    x = n_source,
    y = n2o,
    fill = run,
    colour = adj.type
  )) +
  geom_boxplot(notch = F,
               outlier.colour = "grey",
               outlier.alpha = 0.1) +
  scale_colour_manual(values = cols.adj.new) +
  ylab(expression(paste(N[2], "O-N", " ", "[g N ha" ^ -1, "d" ^
                          -1, "]"))) +
  theme(plot.caption = element_text(hjust = 0)) +
  geom_text(
    data = means,
    show.legend = FALSE,
    aes(label = n2o, y = 30),
    size = 3.5,
    position = position_dodge(width = 0.8)
  ) +
  ylim(-30, 40) +
  geom_text(
    data = means,
    show.legend = FALSE,
    aes(label = paste("n is", "", counts),
        y = -15),
    size = 3.5
  ) +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  labs(caption = "A2") +
  theme_bw() +
  theme(plot.caption = element_text(hjust = 0)) + #figure number left
  facet_grid(site ~ run)



###Figures papers ####
pdf(paste0(working.path, "FIGURES", ".pdf"), width = 8.3, height = 11.7)
grid.arrange(fig1a, fig1b, fig2a, fig2b, nrow = 2, ncol = 2)
grid.arrange(fig3, nrow = 2, ncol = 2)
grid.arrange(fig4,
             nrow = 2,
             ncol = 2,
             widths = c(0.57, 0.43))
grid.arrange(fig5, fig6, nrow = 2, ncol = 2)
grid.arrange(s1, nrow = 2)
grid.arrange(s2, nrow = 2)
grid.arrange(s3,
             nrow = 2,
             ncol = 2,
             widths = c(0.57, 0.43))
dev.off()
