#'========================================================================================
#' Project: Van Dijk et al. (2021), Nature Food
#' Subject: Code to conduct meta-regression
#' Author:  Michiel van Dijk
#' Contact: michiel.vandijk@wur.nl
#' Website: www.michielvandijk.org
#'========================================================================================

# ========================================================================================
# SET UP ---------------------------------------------------------------------------------
# ========================================================================================

# Make sure to run the LOAD DATA part in nf_figures.r first before you run this script!

# Load key packages
p_load("tidyverse", "scales", "glue", "cowplot", "readxl", "RColorBrewer",
       "lme4", "lmerTest", "emmeans", "texreg", "effects")


# ========================================================================================
# PREPARATION ----------------------------------------------------------------------------
# ========================================================================================

# set colors
ssp_col <- c(brewer.pal(n = 5, name = "Set1"), "grey50")
names(ssp_col) <- c("SSP3", "SSP2", "SSP1", "SSP5", "SSP4", "No class")

# Only select baseline studies
db_meta <- db_raw %>%
  filter(type == "Baseline")

# Set order of SSP and RCP
db_meta <- db_meta %>%
  mutate(ssp = factor(ssp, levels = c("SSP1", "SSP2", "SSP3", "SSP4", "SSP5", "No class")),
         rcp = factor(rcp, levels = c("NOCC", "RCP2.6", "RCP4.5", "RCP6.0", "RCP8.5", "No class")))

# Create factor variables
db_meta <- db_meta %>%
  mutate(study_short = factor(study_short))

# Remove no class
db_meta <- db_meta %>%
  filter(ssp != "No class",
         rcp != "No class")

# select growth rate
db_meta_growth <- db_meta %>%
  dplyr::select(-gr10) %>%
  filter(variable %in% c("CALO", "CONS", "PRH"),
         year %in% c(2010, 2050)) %>%
  droplevels() %>%
  group_by(study_short, variable, unit, region, sector, projection, model, baseyear,
           ssp, rcp, pure_ssp, pure_rcp, source) %>%
  mutate(growth = (proj-proj[year == 2010])/proj[year == 2010]*100) %>%
  filter(year == 2050) %>%
  gather(indicator, value, -year, -study_short, -variable, -unit, -region, -sector, -projection,
         -model, -baseyear, -ssp, -rcp, -pure_ssp, -pure_rcp, -source) %>%
  filter(indicator == "growth") %>%
  mutate(value = as.numeric(value))

# Recode model type for analysis
model_meta <- model_raw %>%
  mutate(type = ifelse(!type %in% c("CGE", "PE"), "Other", type))

# Prepare variables for analysis
db_meta_growth <- db_meta_growth %>%
  mutate(
    year_dum = case_when(
      baseyear < 2010 ~ "<2010",
      baseyear == 2010 ~ "2010",
      baseyear > 2010 ~ ">2010"),
    year_dum = factor(year_dum, levels = c("2010", "<2010", ">2010")),
    model_dum = case_when(
      model %in% model_meta$model[model_meta$type == "CGE"] ~ "CGE",
      model %in% model_meta$model[model_meta$type == "PE"] ~ "PE",
      model %in% model_meta$model[model_meta$type == "Other"] ~ "OM",
      model == "Statistical extrapolation" ~ "SE",
      model == "Expert input" ~ "EI"),
    model_dum = factor(model_dum, levels = c("CGE", "PE", "SE", "EI", "OM")),
    model = factor(model),
    ssp_rcp = interaction(ssp, rcp, drop = TRUE))

# Add study level variables
db_meta_growth <- db_meta_growth %>%
  left_join(study_level) %>%
  mutate(single_multi_model = as.factor(single_multi_model),
         jb_published = case_when(
           type_of_publication %in% c("Journal article", "Book (chapter)") ~ "Published",
           type_of_publication %in% c("Report/working paper") ~ "Unpublished"),
         j_published = case_when(
           type_of_publication %in% c("Journal article") ~ "Published",
           type_of_publication %in% c("Report/working paper", "Book (chapter)") ~ "Unpublished"),
         jb_published = as.factor(jb_published),
         j_published = as.factor(j_published),
         type_of_publication = as.factor(type_of_publication))

# Set dummy reference levels
db_meta_growth <- db_meta_growth %>%
  mutate(ssp_rcp = relevel(ssp_rcp , ref = "SSP2.NOCC"),
         j_published = relevel(j_published, ref = "Unpublished"),
         jb_published = relevel(jb_published, ref = "Unpublished"))

# ========================================================================================
# CALO -----------------------------------------------------------------------------------
# ========================================================================================

# Prepare
calo <- db_meta_growth %>%
  filter(variable == "CALO") %>%
  droplevels()

# Regression
calo_meta <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + (1 | study_short) + (1 | model), data = calo)

# Calculate point estimate and error bar
calo_pe <- Effect(c("ssp_rcp"), calo_meta)
calo_pe <- as.data.frame(calo_pe) %>%
  separate(ssp_rcp, into = c("ssp", "rcp"), sep = "\\.", extra = "merge")

# Pairwise comparison of dummies to test difference between NOCC and RCP within SSP
calo_pw <- as_tibble(pairs(emmeans(calo_meta, "ssp_rcp", lmer.df = "satterthwaite"))) %>%
  separate(contrast, into = c("ssp_rcp1", "ssp_rcp2"), sep = " - ") %>%
  separate(ssp_rcp1, into = c("ssp1", "rcp1"), sep = "\\.", extra = "merge") %>%
  separate(ssp_rcp2, into = c("ssp2", "rcp2"), sep = "\\.", extra = "merge")

calo_pw_ssp <- calo_pw %>%
  filter(rcp1 == rcp2, rcp1 == "NOCC") %>%
  mutate(variable = "CALO")

calo_pw_cc <- calo_pw %>%
  filter(ssp1 == ssp2) %>%
  filter(rcp1 %in% c("NOCC", "RCP8.5") & rcp2 %in% c("NOCC", "RCP8.5")) %>%
  mutate(variable = "CALO")


# ========================================================================================
# CONS -----------------------------------------------------------------------------------
# ========================================================================================

# Prepare
cons <- db_meta_growth %>%
  filter(variable == "CONS") %>%
  droplevels()

# Regression
cons_meta <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + (1 | study_short) + (1 | model), data = cons)

# Calculate point estimate and error bar
cons_pe <- Effect(c("ssp_rcp"), cons_meta)
cons_pe <- as.data.frame(cons_pe) %>%
  separate(ssp_rcp, into = c("ssp", "rcp"), sep = "\\.", extra = "merge")

# Pairwise comparison of dummies to test difference between NOCC and RCP within SSP
cons_pw <- as_tibble(pairs(emmeans(cons_meta, "ssp_rcp", lmer.df = "satterthwaite"))) %>%
  separate(contrast, into = c("ssp_rcp1", "ssp_rcp2"), sep = " - ") %>%
  separate(ssp_rcp1, into = c("ssp1", "rcp1"), sep = "\\.", extra = "merge") %>%
  separate(ssp_rcp2, into = c("ssp2", "rcp2"), sep = "\\.", extra = "merge")

cons_pw_ssp <- cons_pw %>%
  filter(rcp1 == rcp2, rcp1 == "NOCC") %>%
  mutate(variable = "CONS")

cons_pw_cc <- cons_pw %>%
  filter(ssp1 == ssp2) %>%
  filter(rcp1 %in% c("NOCC", "RCP8.5") & rcp2 %in% c("NOCC", "RCP8.5")) %>%
  mutate(variable = "CONS")


# ========================================================================================
# PRH ------------------------------------------------------------------------------------
# ========================================================================================

# Prepare
prh <- db_meta_growth %>%
  filter(variable == "PRH") %>%
  droplevels()

# Remove Dawson_et_al_2016 which is a clear outlier and distorts the estimations
prh <- prh %>%
  filter(study_short != "Dawson_et_al_2016")

# Regression
prh_meta <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + (1 | study_short) + (1 | model), data = prh)

# Calculate point estimate and error bar
prh_pe <- Effect(c("ssp_rcp"), prh_meta)
prh_pe <- as.data.frame(prh_pe) %>%
  separate(ssp_rcp, into = c("ssp", "rcp"), sep = "\\.", extra = "merge")

# Pairwise comparison of dummies to test difference between NOCC and RCP within SSP
prh_pw <- as_tibble(pairs(emmeans(prh_meta, "ssp_rcp", lmer.df = "satterthwaite"))) %>%
  separate(contrast, into = c("ssp_rcp1", "ssp_rcp2"), sep = " - ") %>%
  separate(ssp_rcp1, into = c("ssp1", "rcp1"), sep = "\\.", extra = "merge") %>%
  separate(ssp_rcp2, into = c("ssp2", "rcp2"), sep = "\\.", extra = "merge")

prh_pw_ssp <- prh_pw %>%
  filter(rcp1 == rcp2, rcp1 == "NOCC") %>%
  mutate(variable = "PRH")

prh_pw_cc <- prh_pw %>%
  filter(ssp1 == ssp2) %>%
  filter(rcp1 %in% c("NOCC", "RCP8.5") & rcp2 %in% c("NOCC", "RCP8.5")) %>%
  mutate(variable = "PRH")


# ========================================================================================
# TABLE S1 -------------------------------------------------------------------------------
# ========================================================================================

# Available on request


# ========================================================================================
# TABLE S2 -------------------------------------------------------------------------------
# ========================================================================================

# Available on request


# ========================================================================================
# TABLE S3--------------------------------------------------------------------------------
# ========================================================================================

screenreg(list(calo_meta, cons_meta, prh_meta),
          custom.model.names = c("Per capita food demand", "Total food demand", "Pop. at risk of hunger"),
          custom.coef.map = list(
            "(Intercept)" = "Constant",
            "ssp_rcpSSP1.NOCC" = "SSP1.NOCC",
            "ssp_rcpSSP2.NOCC" = "SSP2.NOCC",
            "ssp_rcpSSP3.NOCC" = "SSP3.NOCC",
            "ssp_rcpSSP4.NOCC" = "SSP4.NOCC",
            "ssp_rcpSSP5.NOCC" = "SSP5.NOCC",

            "ssp_rcpSSP1.RCP2.6" = "SSP1.RCP2.6",
            "ssp_rcpSSP2.RCP2.6" = "SSP2.RCP2.6",
            "ssp_rcpSSP3.RCP2.6" = "SSP3.RCP2.6",
            "ssp_rcpSSP4.RCP2.6" = "SSP4.RCP2.6",
            "ssp_rcpSSP5.RCP2.6" = "SSP5.RCP2.6",

            "ssp_rcpSSP1.RCP4.5" = "SSP1.RCP4.5",
            "ssp_rcpSSP2.RCP4.5" = "SSP2.RCP4.5",
            "ssp_rcpSSP3.RCP4.5" = "SSP3.RCP4.5",
            "ssp_rcpSSP4.RCP4.5" = "SSP4.RCP4.5",
            "ssp_rcpSSP5.RCP4.5" = "SSP5.RCP4.5",

            "ssp_rcpSSP1.RCP6.0" = "SSP1.RCP6.0",
            "ssp_rcpSSP2.RCP6.0" = "SSP2.RCP6.0",
            "ssp_rcpSSP3.RCP6.0" = "SSP3.RCP6.0",
            "ssp_rcpSSP4.RCP6.0" = "SSP4.RCP6.0",
            "ssp_rcpSSP5.RCP6.0" = "SSP5.RCP6.0",

            "ssp_rcpSSP1.RCP8.5" = "SSP1.RCP8.5",
            "ssp_rcpSSP2.RCP8.5" = "SSP2.RCP8.5",
            "ssp_rcpSSP3.RCP8.5" = "SSP3.RCP8.5",
            "ssp_rcpSSP4.RCP8.5" = "SSP4.RCP8.5",
            "ssp_rcpSSP5.RCP8.5" = "SSP5.RCP8.5",

            "pure_sspPure SSP" = "Pure SSP",
            "pure_rcpPure RCP" = "Pure RCP"
          ))



# ========================================================================================
# FIGURE S6 ------------------------------------------------------------------------------
# ========================================================================================

pw_ssp <- bind_rows(calo_pw_ssp, cons_pw_ssp, prh_pw_ssp) %>%
  mutate(ssp1_n = as.numeric((gsub('[A-Z]', "", ssp1))),
         ssp2_n = as.numeric((gsub('[A-Z]', "", ssp2))),
         midpoint = (ssp1_n + ssp2_n)/2)

# Function to rescale x-axes so very small values can be plotted
.pvtrans = scales::trans_new("Scaled P value", transform = function(x) emmeans:::.pval.tran(x),
                             inverse = function(p) emmeans:::.pval.inv(p),
                             format = function(x) format(x, drop0trailing = TRUE,
                                                         scientific = FALSE), domain = c(0, 1))
# set ssp colors
ssp_col <- c(brewer.pal(n = 5, name = "Set1"), "grey50")
names(ssp_col) <- c("SSP3", "SSP2", "SSP1", "SSP5", "SSP4", "No class")

# p-values plot for comparison of SSPs
fig_pw_ssp <- pw_ssp %>%
  ggplot() +
  geom_linerange(aes(x = p.value, y = ssp1, ymin = ssp1, ymax = ssp2),
                 position = position_dodge2(.1), color = "blue") +
  facet_wrap(~variable) +
  labs(y = NULL, x = "p-value") +
  scale_x_continuous(trans = .pvtrans,
                     breaks = emmeans:::.pvmaj.brk, minor_breaks = emmeans:::.pvmin.brk) +
  scale_y_discrete(expand = expansion(mult = c(.05, .05))) +
  scale_color_manual(values = ssp_col) +
  theme_bw(base_size = 14)


# ========================================================================================
# FIGURE S7 ------------------------------------------------------------------------------
# ========================================================================================

# p-values plot for comparison of NOCC and RCP8.5
pw_ssp_cc <- bind_rows(calo_pw_cc, cons_pw_cc, prh_pw_cc)

fig_pw_ssp_cc <- pw_ssp_cc %>%
  ggplot() +
  geom_linerange(aes(x = p.value, y = rcp1, ymin = rcp1, ymax = rcp2,
                     color = ssp1, group = ssp1),
                 position = position_dodge2(.1)) +
  facet_wrap(~variable) +
  labs(y = NULL, x = "p-value") +
  scale_x_continuous(trans = .pvtrans,
                     limits = c(0,1.05),
                     breaks = emmeans:::.pvmaj.brk, minor_breaks = emmeans:::.pvmin.brk) +
  scale_y_discrete(expand = expansion(mult = c(.05, .05))) +
  scale_color_manual(values = ssp_col) +
  theme_bw(base_size = 14) +
  theme(legend.position = "bottom")


# ========================================================================================
# TABLE S4 -------------------------------------------------------------------------------
# ========================================================================================


calo_m <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + model_dum + (1 | study_short) + (1 | model), data = calo)
cons_m <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + model_dum + (1 | study_short) + (1 | model), data = cons)
prh_m <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + model_dum + (1 | study_short) + (1 | model), data = prh)

screenreg(list(calo_m, cons_m, prh_m),
          custom.model.names = c("Per capita food demand", "Total food demand", "Pop. at risk of hunger"),
          custom.coef.map = list(
            "(Intercept)" = "Constant",
            "model_dumPE" = "PE model",
            "model_dumSE" = "Statistical extrapolation",
            "model_dumEI" = "Expert input",
            "model_dumOM" = "Other"),
          custom.gof.rows = list("SSP.RCP interaction" = c("Yes", "Yes", "Yes"),
                                 "Pure SSP" = c("Yes", "Yes", "Yes"),
                                 "Pure RCP" = c("Yes", "Yes", "Yes")))


# ========================================================================================
# TABLE S5--------------------------------------------------------------------------------
# ========================================================================================

# Removed pure_ssp from calo_j and calo_jb to avoid singular model fit
calo_j <- lmer(value ~ ssp_rcp + pure_rcp + j_published + (1 | study_short) + (1 | model), data = calo)
cons_j <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + j_published + (1 | study_short) + (1 | model), data = cons)
prh_j <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + j_published + (1 | study_short) + (1 | model), data = prh)

calo_jb <- lmer(value ~ ssp_rcp + pure_rcp + jb_published + (1 | study_short) + (1 | model), data = calo)
cons_jb <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + jb_published + (1 | study_short) + (1 | model), data = cons)
prh_jb <- lmer(value ~ ssp_rcp + pure_ssp + pure_rcp + jb_published + (1 | study_short) + (1 | model), data = prh)

screenreg(list(calo_j, cons_j, prh_j, calo_jb, cons_jb, prh_jb),
custom.header = list("Journal only" = 1:3, "Journal and book" = 4:5),
custom.model.names = c("Per capita food demand", "Total food demand", "Pop. at risk of hunger",
                       "Per capita food demand", "Total food demand", "Pop. at risk of hunger"),
custom.coef.map = list(
  "(Intercept)" = "Constant",
  "j_publishedPublished" = "Published in journal",
  "jb_publishedPublished" = "Published in journal or book"),
custom.gof.rows = list("SSP.RCP interaction" = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                       "Pure SSP" = c("No", "Yes", "Yes", "No", "Yes", "Yes"),
                       "Pure RCP" = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes")))


# ========================================================================================
# TABLE S6 -------------------------------------------------------------------------------
# ========================================================================================

# Available on request


# ========================================================================================
# TABLE S7 -------------------------------------------------------------------------------
# ========================================================================================

# Available on request

