

# geom_hex # hexbin for lots of points, or .png

# --------------------------------------------------------------------------------------------------------------------------------
# functions

"%!in%" <- Negate("%in%")


# --------------------------------------------------------------------------------------------------------------------------------
# options

options(stringsAsFactors = FALSE)


# --------------------------------------------------------------------------------------------------------------------------------
# packages

# install.packages("caret", dependencies = c("Imports", "Depends", "Suggests"))
packages <- c("corrplot", "ggplot2", "grid", "gridExtra", "scales", "RColorBrewer", "maps", 
                        "plyr", "reshape2", "caret", "GGally", "choroplethr", "choroplethrMaps", "ggthemes") 
sapply(packages, require, character.only = TRUE)


######################################################################################
# set global ggplot theme

theme_publication <- function(base_size = 12, base_family = "Helvetica", ...) {
      require(grid)
      require(ggthemes)
      (theme_foundation(base_size = base_size, base_family = base_family)
       + theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
               text = element_text(),
               panel.background = element_rect(color = NA),
               plot.background = element_rect(color = NA),
               panel.border = element_rect(color = "black", size = 1),
               axis.title = element_text(face = "plain", size = rel(1)),
               axis.title.y = element_text(angle=90, vjust = 2, margin = margin(r=7)),
               axis.title.x = element_text(vjust = -0.2, margin = margin(t=10)),
               axis.text = element_text(size = rel(0.9)), 
               #axis.line.y = element_line(color="black"),
               #axis.line.x = element_line(color="black"),
               axis.ticks = element_line(),
               panel.grid.minor = element_blank(),
               panel.grid.major.y = element_line(size=.5, color="#f0f0f0"),
                # explicitly set the horizontal lines (or they will disappear too)
                panel.grid.major.x = element_blank(),
                panel.spacing = unit(0.1, "lines"),
               #legend.key = element_rect(color = NA),
               legend.position = "none",
               #legend.direction = "horizontal",
               legend.key.size = unit(0.5, "cm"),
               legend.spacing = unit(0, "cm"),
               #legend.title = element_text(face="italic"),
               legend.text = element_text(size = 8),
               plot.margin = unit(c(10,5,5,5),"mm"),
               # strip.text = element_blank(),
               strip.background = element_blank()
          ))
}

scale_fill_publication <- function(...){
      require(scales)
      discrete_scale("fill", "Publication", manual_pal(values =   
          c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}

scale_color_publication <- function(...){
      require(scales)
      discrete_scale("colour", "Publication", manual_pal(values =
          c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}


# --------------------------------------------------------------------------------------------------------------------------------
# functions

simpleCap <- function(x) {
  s <- tolower(x)
  s <- strsplit(s, " ")[[1]]
  paste0(toupper(substring(s, 1,1)), substring(s, 2), collapse=" ")
}

# tools::toTitleCase(tolower(x))

rm_box_outliers <- function(dat, form, limit_var, by_vars) {
	# get boxplot outlier limits
	form <- formula(paste(limit_var, "~", paste(by_vars, collapse = " + ")))
    box_stats <- boxplot(form, data = dat, plot = FALSE)
    sts_df <- data.frame(t(rbind(box_stats$stats[1, ], box_stats$stats[5, ])))
    colnames(sts_df) <- c("lower", "upper")
    sts_df$level <- box_stats$names
    # exclude data outside of limits
    dat$level <- interaction(dat[, by_vars])
    dat <- merge(dat, sts_df, by = "level", all.x = TRUE)
    dat_split <- split(dat, f = dat$level)     
    dat_split <- lapply(dat_split, function(x) {
	    x[x[, limit_var] >= x[, "lower"] & x[, limit_var] <= x[, "upper"], ]})  
    dat_limits <- do.call(rbind.data.frame, dat_split)
    rownames(dat_limits) <- NULL
    return(dat_limits)
}


# --------------------------------------------------------------------------------------------------------------------------------
# load data

project_dir <- "path-to-output-directory"
data_dir <- "path-to-input-directory"
setwd(project_dir)


load("data/combined_PP_ecoregion_political.Rdata") # (combined_PP, ecoregions, political regions)
load("data/plots_final_response_predictions.Rdata") # cubist predictions of responses (present day and time diffs)
load("data/RCP_cleaned.Rdata") # RCP (cleaned RCP scenario data for 2050 and 2070)
load("data/preds_long_response7_diff_avg_TOTAL.Rdata") # predicted changes (delta) in plant biodoversity (TOTAL) relative to current values (ac45, gs45, gd45, he45, averaged), for all 7 metrics 
load("data/preds_long_response7_diff_avg_NAT_INTRO.Rdata") # same as above but for NAT/INTRO (long & wide format)
load("data/explanatory_present2present_4x4_panel.Rdata") # data for Figs 3-4
load("data/explanatory_present2present_4x4_panel_NAT_INTRO.Rdata") # data for FigS9
load("data/explanatory_diff2diff_4x4_panel.Rdata") # differenced version of above    
load("data/regDat_RCP45.Rdata") # regression data for slopes of diff2diff NAT/INTRO explanatory plot
load("data/preds_long_response7_diff_avg_TOTAL_nonRCP45.Rdata") # predicted changes (delta) in plant biodoversity (TOTAL) relative to current values (non-RCP45, averaged)
load("data/preds_long_response7_diff_avg_NAT_INTRO_nonRCP45.Rdata") # same as above but for NAT/INTRO (long & wide format)
    
    
# --------------------------------------------------------------------------------------------------------------------------------
# Table S2: correlation matrix of all scenarios for 7 metrics

preds_long_response7 <- preds_long_all[with(preds_long_all, 
    response %in% c("species", "family", "genus", "PD.ALL", "MPD.ALL", "PD.s.ALL", "MPD.s.ALL") & 
    year %in% c("2050", "2070"),
    !(scenario %in% "current")), ]
preds_long_response7 <- droplevels(preds_long_response7)

# reshape to wide format
preds_wide_response7 <- dcast(preds_long_response7, ... ~ response + scenario, value.var = "pred")
#preds_wide_response72 <- dcast(preds_long_response7, ... ~ response + scenario + year, value.var = "pred")

# tables
species_2050 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2050", grepl("species", colnames(preds_wide_response5))])
summary(species_2050[lower.tri(species_2050)])
species_2070 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2070", grepl("species", colnames(preds_wide_response5))])
summary(species_2070[lower.tri(species_2070)])
genus_2050 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2050", grepl("genus", colnames(preds_wide_response5))])
summary(genus_2050[lower.tri(genus_2050)])
genus_2070 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2070", grepl("genus", colnames(preds_wide_response5))])
summary(genus_2070[lower.tri(genus_2070)])
family_2050 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2050", grepl("family", colnames(preds_wide_response5))])
summary(family_2050[lower.tri(family_2050)])
family_2070 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2070", grepl("family", colnames(preds_wide_response5))])
summary(family_2070[lower.tri(family_2070)])
PD_2050 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2050", 29:35])
summary(PD_2050[lower.tri(PD_2050)])
PD_2070 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2070", 29:35])
summary(PD_2070[lower.tri(PD_2070)])
MPD_2050 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2050", 36:42])
summary(MPD_2050[lower.tri(MPD_2050)])
MPD_2070 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2070", 36:42])
summary(MPD_2070[lower.tri(MPD_2070)])
PD.s_2050 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2050", 43:49])
summary(PD.s_2050[lower.tri(PD.s_2050)])
PD.s_2070 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2070", 43:49])
summary(PD.s_2070[lower.tri(PD.s_2070)])
MPD.s_2050 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2050", 50:56])
summary(MPD.s_2050[lower.tri(MPD.s_2050)])
MPD.s_2070 <- cor(preds_wide_response5[preds_wide_response5$year %in% "2070", 50:56])
summary(MPD.s_2070[lower.tri(MPD.s_2070)])



#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# Figure S3: CV model comparisons for species

load('data/cv_compare_species/cv_models_comp_species.Rdata')
str(cv_models_comp, max.level = 1)

load('data/explain_models_varImp_ALL_EXP_CV.Rdata')
str(explain_models_CV, max.level = 1)

cv_models_comp$lm_explain <- explain_models_CV$species

# summarize accuracy of models
results <- resamples(cv_models_comp) 
summ <- summary(results)
dotplot(results)

summ_stats <- do.call(rbind.data.frame, summ$statistics)
summ_stats$metric <- factor(gsub("(\\w+)\\.\\w+", "\\1", rownames(summ_stats)),
    levels = c("Rsquared", "RMSE", "MAE"), labels = c("R-squared", "RMSE", "MAE"))
summ_stats$model <- factor(gsub("\\w+\\.(\\w+)", "\\1", rownames(summ_stats)),
    levels = rev(c("cubist",  "rf", "lm_explain", "xgbLinear", "glmnet", "gamSpline", "penalized", 
                           "lm", "gaussprPoly", "rqnc",  "BstLm", "simpls", "widekernelpls",  "pcr")),
    labels = rev(c("CUBE",  "RF", "Explanatory", "EGB", "ENRGLM", "GAM", "RR", "OLS", 
                           "GPPK", "PQR",  "BLM",  "PLS", "WKPLS",  "PCR")))              
rownames(summ_stats) <- NULL
#summ_stats <- summ_stats[!(summ_stats$model %in% c("WKPLS", "PLS", "PCR")), ]
summ_stats <- summ_stats[!(summ_stats$metric %in% c("MAE")), ]    

R2 <- summ_stats[with(summ_stats, metric %in% "R-squared"), ]
R2[order(R2$Mean, decreasing = TRUE), "model"]

cv_comp <- ggplot(summ_stats, aes(x = model, y = Mean)) +
    geom_point(size = 1.4, shape = 16) +
    geom_linerange(aes(ymin = Min., ymax = Max.)) + 
    coord_flip() +
    facet_wrap(~ metric, scales = "free_x") +
    labs(x = "", y = "Predictive Accuracy") +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          plot.margin = grid::unit(c(0,0.1,0.2,0), units = "cm")) # trouble
ggsave(cv_comp, file = "plots/final_plots/Fig_S3/Figure_3_species.pdf", height = 2.2, width = 2.5, useDingbats = FALSE)


# differences between models
#diffs <- diff(results)
#summary(diffs)
#dotplot(diffs)

# summarize best model
#print(cv_models_comp$cubist)


#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# Figure S3: CV model comparisons for PD.ALL

load('data/cv_compare_PD.ALL/cv_models_comp_PD.ALL.Rdata')
str(cv_models_comp, max.level = 1)

load('data/explain_models_varImp_ALL_EXP_CV.Rdata')
str(explain_models_CV, max.level = 1)

cv_models_comp$lm_explain <- explain_models_CV$PD.ALL

# summarize accuracy of models
results <- resamples(cv_models_comp) 
summ <- summary(results)
dotplot(results)

summ_stats <- do.call(rbind.data.frame, summ$statistics)
summ_stats$metric <- factor(gsub("(\\w+)\\.\\w+", "\\1", rownames(summ_stats)),
    levels = c("Rsquared", "RMSE", "MAE"), labels = c("R-squared", "RMSE", "MAE"))     
summ_stats$model <- factor(gsub("\\w+\\.(\\w+)", "\\1", rownames(summ_stats)),
    levels = rev(c("cubist",  "rf", "xgbLinear", "lm_explain",  "gamSpline",  "glmnet", "penalized", 
                           "lm", "gaussprPoly", "rqnc",  "BstLm", "simpls", "widekernelpls",  "pcr")),
    labels = rev(c("CUBE",  "RF", "EGB", "Explanatory",  "GAM", "ENRGLM", "RR", "OLS", 
                           "GPPK", "PQR",  "BLM",  "PLS", "WKPLS",  "PCR")))                     
rownames(summ_stats) <- NULL
#summ_stats <- summ_stats[!(summ_stats$model %in% c("WKPLS", "PLS", "PCR")), ]
summ_stats <- summ_stats[!(summ_stats$metric %in% c("MAE")), ]    

R2 <- summ_stats[with(summ_stats, metric %in% "R-squared"), ]
R2[order(R2$Mean, decreasing = TRUE), "model"]

cv_comp <- ggplot(summ_stats, aes(x = model, y = Mean)) +
    geom_point(size = 1.4, shape = 16) +
    geom_linerange(aes(ymin = Min., ymax = Max.)) + 
    coord_flip() +
    facet_wrap(~ metric, scales = "free_x") +
    labs(x = "", y = "Predictive Accuracy") +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          plot.margin = grid::unit(c(0,0.1,0.2,0), units = "cm")) # trouble
ggsave(cv_comp, file = "plots/final_plots/Fig_S3/Figure_3_PD.ALL.pdf", height = 2.2, width = 2.5, useDingbats = FALSE)


#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# Figure S3: CV model comparisons for genus

load('data_cv/cv_models_comp_genus_all.Rdata')
str(cv_models_comp, max.level = 1)

load('data/explain_models_varImp_ALL_EXP_CV.Rdata')
str(explain_models_CV, max.level = 1)

cv_models_comp$lm_explain <- explain_models_CV$genus

# summarize accuracy of models
results <- resamples(cv_models_comp) 
summ <- summary(results)
dotplot(results)

summ_stats <- do.call(rbind.data.frame, summ$statistics)
summ_stats$metric <- factor(gsub("(\\w+)\\.\\w+", "\\1", rownames(summ_stats)),
    levels = c("Rsquared", "RMSE", "MAE"), labels = c("R-squared", "RMSE", "MAE"))     
summ_stats$model <- factor(gsub("\\w+\\.(\\w+)", "\\1", rownames(summ_stats)),
    levels = rev(c("cubist", "gaussprPoly", "rf", "xgbLinear", "gamSpline", "lm_explain",  "glmnet", "penalized", "lm",  
                             "rqnc",  "BstLm", "widekernelpls", "simpls", "pcr")),
    labels = rev(c("CUBE", "GPPK", "RF", "EGB", "GAM", "Explanatory",  "ENRGLM", "RR", "OLS",   
                            "PQR",  "BLM", "WKPLS", "PLS", "PCR")))                     
rownames(summ_stats) <- NULL
#summ_stats <- summ_stats[!(summ_stats$model %in% c("WKPLS", "PLS", "PCR")), ]
summ_stats <- summ_stats[!(summ_stats$metric %in% c("MAE")), ]    

R2 <- summ_stats[with(summ_stats, metric %in% "R-squared"), ]
R2[order(R2$Mean, decreasing = TRUE), "model"]

cv_comp <- ggplot(summ_stats, aes(x = model, y = Mean)) +
    geom_point(size = 1.4, shape = 16) +
    geom_linerange(aes(ymin = Min., ymax = Max.)) + 
    coord_flip() +
    facet_wrap(~ metric, scales = "free_x") +
    labs(x = "", y = "Predictive Accuracy") +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          plot.margin = grid::unit(c(0,0.1,0.2,0), units = "cm")) # trouble
ggsave(cv_comp, file = "plots/final_plots/Fig_S3/Figure_3_genus.pdf", height = 2.2, width = 2.5, useDingbats = FALSE)


#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# Figure S3: CV model comparisons for family

load('data_cv/cv_models_comp_family_all.Rdata')
str(cv_models_comp, max.level = 1)

load('data/explain_models_varImp_ALL_EXP_CV.Rdata')
str(explain_models_CV, max.level = 1)

cv_models_comp$lm_explain <- explain_models_CV$family

# summarize accuracy of models
results <- resamples(cv_models_comp) 
summ <- summary(results)
dotplot(results)

summ_stats <- do.call(rbind.data.frame, summ$statistics)
summ_stats$metric <- factor(gsub("(\\w+)\\.\\w+", "\\1", rownames(summ_stats)),
    levels = c("Rsquared", "RMSE", "MAE"), labels = c("R-squared", "RMSE", "MAE"))     
summ_stats$model <- factor(gsub("\\w+\\.(\\w+)", "\\1", rownames(summ_stats)),
    levels = rev(c("cubist", "rf", "xgbLinear",  "lm_explain",  "gamSpline",  "glmnet", "lm", "penalized", 
                            "rqnc", "gaussprPoly", "BstLm", "simpls", "widekernelpls",  "pcr")),
    labels = rev(c("CUBE", "RF", "EGB",  "Explanatory",  "GAM", "ENRGLM", "OLS", "RR",  
                           "PQR", "GPPK", "BLM",  "PLS", "WKPLS",  "PCR")))                     
rownames(summ_stats) <- NULL
#summ_stats <- summ_stats[!(summ_stats$model %in% c("WKPLS", "PLS", "PCR")), ]
summ_stats <- summ_stats[!(summ_stats$metric %in% c("MAE")), ]    

R2 <- summ_stats[with(summ_stats, metric %in% "R-squared"), ]
R2[order(R2$Mean, decreasing = TRUE), "model"]

cv_comp <- ggplot(summ_stats, aes(x = model, y = Mean)) +
    geom_point(size = 1.4, shape = 16) +
    geom_linerange(aes(ymin = Min., ymax = Max.)) + 
    coord_flip() +
    facet_wrap(~ metric, scales = "free_x") +
    labs(x = "", y = "Predictive Accuracy") +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          plot.margin = grid::unit(c(0,0.1,0.2,0), units = "cm")) # trouble
ggsave(cv_comp, file = "plots/final_plots/Fig_S3/Figure_3_family.pdf", height = 2.2, width = 2.5, useDingbats = FALSE)


#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# Figure S3: CV model comparisons for MPD.ALL

load('data_cv/cv_models_comp_MPD.ALL_all.Rdata')
str(cv_models_comp, max.level = 1)

load('data/explain_models_varImp_ALL_EXP_CV.Rdata')
str(explain_models_CV, max.level = 1)

cv_models_comp$lm_explain <- explain_models_CV$MPD.ALL

# summarize accuracy of models
results <- resamples(cv_models_comp) 
summ <- summary(results)
dotplot(results)

summ_stats <- do.call(rbind.data.frame, summ$statistics)
summ_stats$metric <- factor(gsub("(\\w+)\\.\\w+", "\\1", rownames(summ_stats)),
    levels = c("Rsquared", "RMSE", "MAE"), labels = c("R-squared", "RMSE", "MAE"))     
summ_stats$model <- factor(gsub("\\w+\\.(\\w+)", "\\1", rownames(summ_stats)),
    levels = rev(c("cubist", "rf", "gaussprPoly", "xgbLinear", "lm_explain", "glmnet", "gamSpline", "penalized", "lm", "rqnc",       
                              "BstLm", "simpls", "widekernelpls",  "pcr")),
    labels = rev(c("CUBE", "RF", "GPPK", "EGB", "Explanatory", "ENRGLM", "GAM", "RR", "OLS", "PQR",       
                              "BLM",  "PLS", "WKPLS",  "PCR")))                         
rownames(summ_stats) <- NULL
#summ_stats <- summ_stats[!(summ_stats$model %in% c("WKPLS", "PLS", "PCR")), ]
summ_stats <- summ_stats[!(summ_stats$metric %in% c("MAE")), ]    

summ_stats[with(summ_stats, metric %in% "RMSE" & model %in% "Explanatory"), c("Min.", "Mean", "Max.")] <- NA
R2 <- summ_stats[with(summ_stats, metric %in% "R-squared"), ]
R2[order(R2$Mean, decreasing = TRUE), "model"]

cv_comp <- ggplot(summ_stats, aes(x = model, y = Mean)) +
    geom_point(size = 1.4, shape = 16) +
    geom_linerange(aes(ymin = Min., ymax = Max.)) + 
    coord_flip() +
    facet_wrap(~ metric, scales = "free_x") +
    labs(x = "", y = "Predictive Accuracy") +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          plot.margin = grid::unit(c(0,0.1,0.2,0), units = "cm")) # trouble
ggsave(cv_comp, file = "plots/final_plots/Fig_S3/Figure_3_MPD.ALL.pdf", height = 2.2, width = 2.5, useDingbats = FALSE)


#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# Figure S3: CV model comparisons for MPD.s.ALL

load('data_cv/cv_models_comp_MPD.s.ALL_all.Rdata')
str(cv_models_comp, max.level = 1)

load('data/explain_models_varImp_ALL_EXP_CV.Rdata')
str(explain_models_CV, max.level = 1)

cv_models_comp$lm_explain <- explain_models_CV$MPD.s.ALL

# summarize accuracy of models
results <- resamples(cv_models_comp) 
summ <- summary(results)
dotplot(results)

summ_stats <- do.call(rbind.data.frame, summ$statistics)
summ_stats$metric <- factor(gsub("(\\w+)\\.\\w+", "\\1", rownames(summ_stats)),
    levels = c("Rsquared", "RMSE", "MAE"), labels = c("R-squared", "RMSE", "MAE"))     
summ_stats$model <- factor(gsub("\\w+\\.(\\w+)", "\\1", rownames(summ_stats)),
    levels = rev(c("cubist", "rf", "xgbLinear", "gaussprPoly", "lm_explain", "lm", "glmnet", "penalized", "gamSpline",  
                             "rqnc",  "BstLm", "widekernelpls", "simpls", "pcr")),
    labels = rev(c("CUBE", "RF", "EGB", "GPPK", "Explanatory", "OLS", "ENRGLM", "RR", "GAM",   
                            "PQR",  "BLM", "WKPLS", "PLS", "PCR")))                     
rownames(summ_stats) <- NULL
#summ_stats <- summ_stats[!(summ_stats$model %in% c("WKPLS", "PLS", "PCR")), ]
summ_stats <- summ_stats[!(summ_stats$metric %in% c("MAE")), ]    

R2 <- summ_stats[with(summ_stats, metric %in% "R-squared"), ]
R2[order(R2$Mean, decreasing = TRUE), "model"]

cv_comp <- ggplot(summ_stats, aes(x = model, y = Mean)) +
    geom_point(size = 1.4, shape = 16) +
    geom_linerange(aes(ymin = Min., ymax = Max.)) + 
    coord_flip() +
    facet_wrap(~ metric, scales = "free_x") +
    labs(x = "", y = "Predictive Accuracy") +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          plot.margin = grid::unit(c(0,0.1,0.2,0), units = "cm")) # trouble
ggsave(cv_comp, file = "plots/final_plots/Fig_S3/Figure_3_MPD.s.ALL.pdf", height = 2.2, width = 2.5, useDingbats = FALSE)


#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# Figure S3: CV model comparisons for PD.s.ALL

load('data_cv/cv_models_comp_PD.s.ALL_all.Rdata')
str(cv_models_comp, max.level = 1)

load('data/explain_models_varImp_ALL_EXP_CV.Rdata')
str(explain_models_CV, max.level = 1)

cv_models_comp$lm_explain <- explain_models_CV$PD.s.ALL

# summarize accuracy of models
results <- resamples(cv_models_comp) 
summ <- summary(results)
dotplot(results)

summ_stats <- do.call(rbind.data.frame, summ$statistics)
summ_stats$metric <- factor(gsub("(\\w+)\\.\\w+", "\\1", rownames(summ_stats)),
    levels = c("Rsquared", "RMSE", "MAE"), labels = c("R-squared", "RMSE", "MAE"))     
summ_stats$model <- factor(gsub("\\w+\\.(\\w+)", "\\1", rownames(summ_stats)),
    levels = rev(c("cubist", "rf", "lm_explain", "gaussprPoly", "glmnet", "xgbLinear", "penalized", "lm", "gamSpline",    
                             "rqnc",  "BstLm", "simpls", "widekernelpls", "pcr")),
    labels = rev(c("CUBE", "RF", "Explanatory", "GPPK", "ENRGLM", "EGB", "RR", "OLS", "GAM",     
                            "PQR",  "BLM", "PLS", "WKPLS",  "PCR")))                     
rownames(summ_stats) <- NULL
#summ_stats <- summ_stats[!(summ_stats$model %in% c("WKPLS", "PLS", "PCR")), ]
summ_stats <- summ_stats[!(summ_stats$metric %in% c("MAE")), ]    

R2 <- summ_stats[with(summ_stats, metric %in% "R-squared"), ]
R2[order(R2$Mean, decreasing = TRUE), "model"]

cv_comp <- ggplot(summ_stats, aes(x = model, y = Mean)) +
    geom_point(size = 1.4, shape = 16) +
    geom_linerange(aes(ymin = Min., ymax = Max.)) + 
    coord_flip() +
    facet_wrap(~ metric, scales = "free_x") +
    labs(x = "", y = "Predictive Accuracy") +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          plot.margin = grid::unit(c(0,0.1,0.2,0), units = "cm")) # trouble
ggsave(cv_comp, file = "plots/final_plots/Fig_S3/Figure_3_PD.s.ALL.pdf", height = 2.2, width = 2.5, useDingbats = FALSE)



#######################################################################################
#### FIG_1 TOTAL BOXPLOTS and CHOROPLETH PLOTS
#######################################################################################

# BOXPLOTS (panel A)

preds_long_response7_diff_limits <- rm_box_outliers(preds_long_response7_diff, limit_var = "diff_percent", by_vars = c("year", "response", "scenario"))
    
RCP45scenarios <- ggplot(preds_long_response7_diff_limits, aes(x = year, y = diff_percent, fill = scenario, color = scenario)) +
    geom_hline(yintercept = 0, linetype = "solid", color = "grey20", size = 0.3) +
    geom_boxplot(outlier.size = 0.1, size = 0.2, alpha = 0.5, position = position_dodge(width = 0.9), outlier.shape = NA) +
    scale_fill_tableau(palette = "Tableau 10", name = "Scenario") +
    scale_color_tableau(palette = "Tableau 10", name = "Scenario") +
    #facet_wrap(~ response, scale = "free_y") +
    facet_grid(response ~ ., scale = "free_y") +
    labs(x = "Year", y = expression("%"~Delta~~"(future - present)")) + 
    guides(fill = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5), 
                color = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5)) + 
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          legend.position = "top",
          legend.key.size = unit(0.3, "cm"),
          legend.key.height = unit(0.4, "cm"),
          legend.key = element_blank(),
          legend.text = element_text(size = 6),
          legend.title = element_text(size = 7),
          legend.box.margin = margin(0, 0, -11, 0))
ggsave(RCP45scenarios, file = "plots/final_plots/Fig1/Fig1a.pdf", height = 18.1, width = 4, units = "cm")


# -------------------------------------------------------------------------------------------------------------------------------
# CHOROPLETH PLOTS (panel B)

# NEED TO USE THE DATA THAT IS AVERAGED OVER THE 4 SCENARIOS TO CREATE THE PLOTS DATA
# have two columns of maps, one with (future 2050 - present) and the other with (future 2070 - present)

# manual static choropleth plots

# load county map data from choroplethrMaps
data(county.map) # data(county.regions)
head(county.map)
county_map_data <- county.map[!(county.map$STATE %in% c("02", "15")), ]
# ggplot(county_map_data, aes(long, lat, group=group)) + geom_polygon()

# load state map data from choroplethrMaps
data(state.map) # data(county.regions)
head(state.map)
state_map_data <- state.map[!(state.map$STATE %in% c("02", "15")), ]

# merge map data with prediction data
county_plot_data <- merge(county_map_data, preds_long_response7_diff_avg_wide, by = "region", all.x = TRUE) 
# order polygons
county_plot_data <- with(county_plot_data, county_plot_data[order(group, order), ])

# --------------------------------------------------------------------------------------------------------------------------------
theme_set(theme_bw())
theme_blank <- theme_update(
    panel.background = element_blank(), 
    panel.border = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.spacing = unit(0, "lines"), 
    plot.background = element_blank(), 
    plot.margin = grid::unit(c(-0.12,-1,-0.22,-1.8), units = "cm"), # trouble
    #panel.spacing = unit(c(-.5, -.4, -1, -.4), "lines"), # top, right, bottom, left
    axis.title.x = element_blank(), 
    axis.title.y = element_blank(),
    axis.text.x = element_blank(), 
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    legend.position = c(0.95, 0.45), # c(0.5, -0.02)
    legend.direction = "vertical",
    legend.text = element_text(size = 5,hjust = 1),
    legend.margin = margin(t=0, r=0, b=0, l=-0.5, unit="cm"), # remove white space around legend
    legend.title = element_blank()
    )


# --------------------------------------------------------------------------------------------------------------------------------
# choropleth diff plots (takes roughly 45 seconds for each set of 14 plots)

# get max and min of bounding box for the legend
lower_upper_limit_list <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl("species|genus|family|PD", colnames(county_plot_data))]

for (diff in responseColumnsDiffs) {
	print(diff)
    lower_upper_limit_list[[diff]] <- quantile(county_plot_data[, diff], probs = c(0.025, 0.975), na.rm = TRUE)
}
names(lower_upper_limit_list) <- c(names(lower_upper_limit_list)[1:6], rep("ABC", 2), rep("DEF", 2), rep("GHI", 2), rep("JKL", 2))
lower_upper_limit_vec <- unlist(lower_upper_limit_list)

lower_upper_limit_legend_list <- list()
for (response in c("species", "genus", "family", "ABC", "DEF", "GHI", "JKL")) {
	print(response)
    lower_upper_limit_legend_list[[response]] <- 	range(lower_upper_limit_vec[grepl(response, names(lower_upper_limit_vec))])    
}
lower_upper_limit_legend_list <- rep(lower_upper_limit_legend_list, times = 2)
names(lower_upper_limit_legend_list) <- paste(rep(c("species", "genus", "family", "PD", "MPD", "PD.s", "MPD.s"), times = 2), rep(c("2050", "2070"), each = 7), sep = "_")


# plot the choropleth maps
county_map_plots_diff_area <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl(".+\\d+", colnames(county_plot_data))]
	
for (diff in responseColumnsDiffs) {
	print(diff)

#lower_upper_limit <- quantile(county_plot_data[, diff], probs = c(0.025, 0.975), na.rm = TRUE)
scaled_variable <- county_plot_data[, diff]   	

#lower_upper_limit <- quantile((county_plot_data[, diff] / county_plot_data[, "area"]) * 100, probs = c(0.025, 0.975), na.rm = TRUE)
#scaled_variable <- (county_plot_data[, diff] / county_plot_data[, "area"]) * 100 
  	
#scaled_variable <- as.numeric(ifelse(scaled_variable < lower_upper_limit["2.5%"], lower_upper_limit["2.5%"], scaled_variable))
#scaled_variable <- as.numeric(ifelse(scaled_variable > lower_upper_limit["97.5%"], lower_upper_limit["97.5%"], scaled_variable))

scaled_variable <- as.numeric(ifelse(scaled_variable < lower_upper_limit_legend_list[[diff]][1], lower_upper_limit_legend_list[[diff]][1], scaled_variable))
scaled_variable <- as.numeric(ifelse(scaled_variable > lower_upper_limit_legend_list[[diff]][2], lower_upper_limit_legend_list[[diff]][2], scaled_variable))
    	
county_map_plots_diff_area[[diff]] <- ggplot(county_plot_data, aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = scaled_variable)) + # color = "grey", alpha = 1, lwd = I(1/20)
    geom_path(data = state_map_data , colour = "black", lwd = I(1/40)) +
    coord_map(projection = "albers", at0 = 45.5, lat1 = 29.5) +
    scale_fill_gradient2(low = "#4E79A7", mid = "white", high = "#F28E2B", midpoint = 0,  
                         limits = lower_upper_limit_legend_list[[diff]], # lower_upper_limit, 
                         guide = guide_colorbar(barwidth = 0.23, barheight = 2.7)) 
# red: "#D62728", green: "#2CA02C"                         
ggsave(county_map_plots_diff_area[[diff]], 
    file = paste0("plots/final_plots/Fig1/county_", diff, "_percent.pdf"), 
    height = 0.8, width = 1.6) 

} 



#######################################################################################
# -------------------------------------------------------------------------------------------------------------------------------------
# Figure 2

# remove boxplot outliers from data
preds_long_response7_diff_avg_ecolimits <- rm_box_outliers(preds_long_response7_diff_avg, limit_var = "diff_percent_avg", by_vars = c("year", "response", "ecoregion"))

boxplot_ecoregion_avg <- ggplot(preds_long_response7_diff_avg_ecolimits, aes(x = reorder(ecoregion, diff_percent_avg, FUN = median, na.rm = TRUE), 
        y = diff_percent_avg, fill = year, color = year)) +
    geom_hline(yintercept = 0, linetype = "solid", color = "grey20", size = 0.3) +
    geom_boxplot(outlier.shape = NA, size = 0.2, alpha = 0.5, position = position_dodge(width = 0.9)) +
    scale_fill_tableau(palette = "Tableau 10", name = "Year") +
    scale_color_tableau(palette = "Tableau 10", name = "Year") +
    #scale_x_discrete(breaks = paste(preds_long_response7_diff_avg$response, preds_long_response7_diff_avg$ecoregion), 
    #                            labels = preds_long_response7_diff_avg$ecoregion) + 
    #facet_grid(response ~ ., scales = "free") +
    facet_wrap(~ response, scales = "free_x", nrow = 2) +       
    labs(x = "", y = "") + 
    coord_flip() +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8),
          axis.title.y = element_text(size = 8),
          axis.text.x = element_text(size = 7),
          axis.text.y = element_text(size = 7),
          strip.text = element_text(size = 7, vjust = -0.5),
          panel.border = element_rect(size = 0.5),
          panel.spacing.y = unit(-0.3, "lines"),
          legend.position = "top", 
          legend.direction = "horizontal",
          legend.key.size = unit(0.4, "cm"),
          legend.key.height = unit(0.5, "cm"),
          legend.key = element_blank(),
          legend.title = element_text(size = 8),
          legend.box.margin = margin(0, 0, -17, 0),
          legend.margin = margin(t = 0, b = 0, unit = "cm"), 
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"))
ggsave(boxplot_ecoregion_avg, file = "plots/final_plots/Fig2/boxplot_ecoregion_avg_scenarios.pdf", height = 10, width = 12, units = "cm") # 3, 10


# -------------------------------------------------------------------------------------------------------------------------------------
# map panel of Figure 2
# https://rpubs.com/huanfaChen/ggplotShapefile

shp_path <- "plots/final_plots/Fig2/na_cec_eco_l1"
shp <- rgdal::readOGR(dsn = shp_path, layer = "NA_CEC_Eco_Level1", stringsAsFactors = FALSE)

ggplot() + 
geom_polygon(data = shp, aes(x = long, y = lat, group = group), color = "black", fill = NA) +
theme_void()


#######################################################################################
# -------------------------------------------------------------------------------------------------------------------------------------
# Figure S6

# remove boxplot outliers from data
preds_long_response7_diff_avg_ecolimits_ni <- rm_box_outliers(preds_long_response7_diff_avg_ni, limit_var = "diff_percent_avg", by_vars = c("year", "response", "NAT_INTRO", "ecoregion"))

preds_long_response7_diff_avg_ecolimits_ni$response_NI <- factor(with(preds_long_response7_diff_avg_ecolimits_ni, paste0(response, gsub("(\\w{1}).+", "\\1", NAT_INTRO))), levels = c("speciesN", "speciesI", "genusN", "genusI", "familyN", "familyI", "PDN", "PDI", "MPDN", "MPDI", "PD.sN", "PD.sI", "MPD.sN", "MPD.sI"))

preds_long_response7_diff_avg_ecolimits_ni <- preds_long_response7_diff_avg_ecolimits_ni[complete.cases(preds_long_response7_diff_avg_ecolimits_ni$ecoregion), ]

boxplot_ecoregion_avg_ni <- ggplot(preds_long_response7_diff_avg_ecolimits_ni, 
        aes(x = reorder(ecoregion, diff_percent_avg, FUN = median, na.rm = TRUE), 
        y = diff_percent_avg, fill = year, color = year)) +
    geom_hline(yintercept = 0, linetype = "solid", color = "grey20", size = 0.3) +
    geom_boxplot(outlier.shape = NA, size = 0.2, alpha = 0.5, position = position_dodge(width = 0.9)) +
    scale_fill_tableau(palette = "Tableau 10", name = "Year") +
    scale_color_tableau(palette = "Tableau 10", name = "Year") +
    #scale_x_discrete(breaks = paste(preds_long_response7_diff_avg$response, preds_long_response7_diff_avg$ecoregion), 
    #                            labels = preds_long_response7_diff_avg$ecoregion) + 
    #facet_grid(response ~ ., scales = "free") +
    facet_wrap(~ response_NI, scales = "free_x", nrow = 2) +       
    labs(x = "", y = "") + 
    coord_flip() +
    theme_publication() +
    theme(axis.title.x = element_text(size = 8),
          axis.title.y = element_text(size = 8),
          axis.text.x = element_text(size = 7),
          axis.text.y = element_text(size = 7),
          strip.text = element_text(size = 7, vjust = -0.5),
          panel.border = element_rect(size = 0.5),
          panel.spacing.y = unit(-0.3, "lines"),
          legend.position = "top", 
          legend.direction = "horizontal",
          legend.key.size = unit(0.4, "cm"),
          legend.key.height = unit(0.5, "cm"),
          legend.key = element_blank(),
          legend.title = element_text(size = 8),
          legend.box.margin = margin(0, 0, -17, 0),
          legend.margin = margin(t = 0, b = 0, unit = "cm"), 
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"))
ggsave(boxplot_ecoregion_avg_ni, file = "plots/final_plots/Fig_S6/Fig_S6.pdf", height = 10, width = 18, units = "cm") # 3, 10



#######################################################################################
# Fig 3-4 present to present final 4x4 panel plots (TOTAL)

# ----------------------------------------------------------------
Fig3 <- ggplot(combined_PP_long_4x4_long[combined_PP_long_4x4_long$response %in% c("Species", "Genus", "Family"), ], 
        aes(x = env_var, y = metric)) +
    #geom_point(shape = 16, alpha = 0.1, size = 0.5) +
    geom_hex(bins = 100) + # aes(alpha = ..count..), 
    #scale_alpha(name = "Frequency", range = c(0.7, 1)) +
    scale_fill_gradient(trans = "log", breaks = seq(1, 401, by = 50), low = "#132B43", high = "#56B1F7") +       
    geom_smooth(method = "lm", size = 0.5, color = "red", fill = "red") +
    facet_grid(response ~ predictor, scale = "free") +
    labs(x = "Explanator scale", y = "Response scale") + 
    theme_publication() +
    theme(axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7, vjust = -0.5),
          axis.title.x = element_text(size = 8, margin = margin(t = 1, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -2, b = 0, l = 0)),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          panel.border = element_rect(size = 0.5),
          legend.position = "none")
ggsave(Fig3, file = "plots/final_plots/Fig3/Figure_3.pdf", height = 3, width = 3.5)   
 
Fig4 <- ggplot(combined_PP_long_4x4_long[combined_PP_long_4x4_long$response %in% 
                c("PD (all)", "MPD (all)", "PD (Std. all)", "MPD (Std. all)"), ], 
        aes(x = env_var, y = metric)) +
    #geom_point(shape = 16, alpha = 0.1, size = 0.5) +
    geom_hex(bins = 100) + # aes(alpha = ..count..), 
    #scale_alpha(name = "Frequency", range = c(0.7, 1)) +
    scale_fill_gradient(trans = "log", breaks = seq(1, 401, by = 50),  low = "#132B43", high = "#56B1F7") +
    geom_smooth(method = "lm", size = 0.5, color = "red", fill = "red") +
    facet_grid(response ~ predictor, scale = "free") +
    labs(x = "Explanator scale", y = "Response scale") + 
    theme_publication() +
    theme(axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7, vjust = -0.5),
          axis.title.x = element_text(size = 8, margin = margin(t = 1, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -5, b = 0, l = 0)),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          panel.border = element_rect(size = 0.5),
          legend.position = "none")
ggsave(Fig4, file = "plots/final_plots/Fig4/Figure_4.pdf", height = 3.5, width = 3.5)  



#######################################################################################
# Fig S9 present to present final 4x4 panel plots (NAT versus INTRO)

 # use species.i and species.n etc. and color by .i and .n

scatterplot_4x4_ni <- ggplot(combined_PP_long_4x4_ni_long,
        aes(x = env_var, y = metric, fill = Origin)) +
    #geom_point(shape = 16, alpha = 0.1, size = 0.5) +
    geom_hex(bins = 100) + # aes(alpha = ..count..), 
    #scale_alpha(name = "Frequency", range = c(0.7, 1)) +
    scale_fill_manual(values = c("#132B43", "darkgreen")) +
    #scale_fill_gradient(trans = "log", breaks = seq(1, 401, by = 50),  low = "#132B43", high = "#56B1F7") +
    geom_smooth(method = "lm", size = 0.5, color = "red", fill = "red") +
    facet_grid(response + Origin ~ predictor, scale = "free") +
    labs(x = "Explanator scale", y = "Response scale") + 
    theme_publication() +
    guides(colour = guide_legend(override.aes = list(alpha=1, size = 2))) +
    theme(axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7, vjust = -0.5),
          axis.title.x = element_text(size = 8, margin = margin(t = 1, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = 0, b = 0, l = 0)),
          panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
          panel.border = element_rect(size = 0.5),
          legend.position = "top",
          legend.direction = "horizontal",
          legend.title = element_text(size = 7),
          legend.text = element_text(size = 6),
          legend.key.size = unit(0.3, "cm"),
          legend.key.height = unit(0.3, "cm"),
          legend.key = element_blank(),
          legend.box.margin = margin(0, 0, -20, 0))
ggsave(scatterplot_4x4_ni, file = "plots/final_plots/Fig_S9/Fig_S9.pdf", height = 8.5, width = 3.9)   
 


#######################################################################################
#### FIG_S5 NATIVE VS NON-NATIVE BOXPLOTS and CHOROPLETH PLOTS
#######################################################################################

# BOXPLOTS (panel A)

preds_long_response7_diff_limits_ni <- rm_box_outliers(preds_long_response7_diff_ni, limit_var = "diff_percent", by_vars = c("year", "response", "scenario", "NAT_INTRO"))
preds_long_response7_diff_limits_ni <- preds_long_response7_diff_limits_ni[complete.cases(preds_long_response7_diff_limits_ni$diff_percent), ]   
    
    
RCP45scenarios_ni <- ggplot(preds_long_response7_diff_limits_ni, aes(x = year, y = diff_percent, fill = scenario, color = scenario)) +
    geom_hline(yintercept = 0, linetype = "solid", color = "grey20", size = 0.3) +
    geom_boxplot(outlier.size = 0.1, size = 0.2, alpha = 0.5, position = position_dodge(width = 0.9), outlier.shape = NA) +
    scale_fill_tableau(palette = "Tableau 10", name = "Scenario") +
    scale_color_tableau(palette = "Tableau 10", name = "Scenario") +
    #facet_wrap(~ response, scale = "free_y") +
    facet_grid(response ~ NAT_INTRO, scale = "free_y") +
    labs(x = "Year", y = expression("%"~Delta~~"(future - present)")) + 
    guides(fill = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5), 
                color = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5)) + 
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 1, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          legend.position = "top",
          legend.key.size = unit(0.3, "cm"),
          legend.key.height = unit(0.4, "cm"),
          legend.key = element_blank(),
          legend.text = element_text(size = 6),
          legend.title = element_text(size = 7),
          legend.box.margin = margin(0, 0, -20, 0))
ggsave(RCP45scenarios_ni, file = "plots/final_plots/Fig_S5/Fig_S5a.pdf", height = 17.9, width = 5.45, units = "cm")


# --------------------------------------------------------------------------------------------------------------------------------
# CHOROPLETH PLOTS (panel B)

# predicted changes (delta) in plant biodoversity relative to current values (ac45, gs45, gd45, he45, averaged), for all 7 metrics

# USE % OF COUNTIES AS RESPONSE SCALE

# manual static choropleth plots

# load county map data from choroplethrMaps
data(county.map) # data(county.regions)
head(county.map)
county_map_data <- county.map[!(county.map$STATE %in% c("02", "15")), ]
# ggplot(county_map_data, aes(long, lat, group=group)) + geom_polygon()

# load state map data from choroplethrMaps
data(state.map) # data(county.regions)
head(state.map)
state_map_data <- state.map[!(state.map$STATE %in% c("02", "15")), ]

# merge map data with prediction data
county_plot_data <- merge(county_map_data, preds_long_response7_diff_avg_wide_ni, by = "region", all.x = TRUE) 
# order polygons
county_plot_data <- with(county_plot_data, county_plot_data[order(group, order), ])

# --------------------------------------------------------------------------------------------------------------------------------
theme_set(theme_bw())
theme_blank <- theme_update(
    panel.background = element_blank(), 
    panel.border = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.spacing = unit(0, "lines"), 
    plot.background = element_blank(), 
    plot.margin = grid::unit(c(-0.12,-1,-0.22,-1.8), units = "cm"), # trouble
    #panel.spacing = unit(c(-.5, -.4, -1, -.4), "lines"), # top, right, bottom, left
    axis.title.x = element_blank(), 
    axis.title.y = element_blank(),
    axis.text.x = element_blank(), 
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    legend.position = c(0.95, 0.45), # c(0.5, -0.02)
    legend.direction = "vertical",
    legend.text = element_text(size = 5, hjust = 1),
    legend.margin = margin(t=0, r=0, b=0, l=-0.5, unit="cm"), # remove white space around legend
    legend.title = element_blank()
    )
    
# --------------------------------------------------------------------------------------------------------------------------------
# diff plots (takes roughly 45 seconds for each set of 14 plots)

# get max and min of bounding box for the legend
lower_upper_limit_list <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl("species|genus|family|PD", colnames(county_plot_data))]

for (diff in responseColumnsDiffs) {
	print(diff)
    lower_upper_limit_list[[diff]] <- quantile(county_plot_data[, diff], probs = c(0.025, 0.975), na.rm = TRUE)
}
names(lower_upper_limit_list) <- gsub("(.+\\_)\\d+\\_(.+)", "\\1\\2", names(lower_upper_limit_list))
lower_upper_limit_vec <- unlist(lower_upper_limit_list)
names(lower_upper_limit_vec) <- rep(names(lower_upper_limit_list), each = 2)

lower_upper_limit_legend_list <- list()
for (response in unique(names(lower_upper_limit_list))) {
	print(response)
    lower_upper_limit_legend_list[[response]] <- range(lower_upper_limit_vec[which(response == names(lower_upper_limit_vec))])    
}
lower_upper_limit_legend_list_final <- rep(lower_upper_limit_legend_list, times = 2)
names(lower_upper_limit_legend_list_final) <- c(gsub("(\\w+)\\_(\\w+)", "\\1_2050_\\2", names(lower_upper_limit_legend_list)), gsub("(\\w+)\\_(\\w+)", "\\1_2070_\\2", names(lower_upper_limit_legend_list)))


# plot the choropleth maps
county_map_plots_diff_area <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl("species|genus|family|PD", colnames(county_plot_data))]
	
for (diff in responseColumnsDiffs) {
	print(diff)

scaled_variable <- county_plot_data[, diff]   	

scaled_variable <- as.numeric(ifelse(scaled_variable < lower_upper_limit_legend_list_final[[diff]][1], lower_upper_limit_legend_list_final[[diff]][1], scaled_variable))
scaled_variable <- as.numeric(ifelse(scaled_variable > lower_upper_limit_legend_list_final[[diff]][2], lower_upper_limit_legend_list_final[[diff]][2], scaled_variable))
    	
county_map_plots_diff_area[[diff]] <- ggplot(county_plot_data, aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = scaled_variable)) + # color = "grey", alpha = 1, lwd = I(1/20)
    geom_path(data = state_map_data , colour = "black", lwd = I(1/40)) +
    coord_map(projection = "albers", at0 = 45.5, lat1 = 29.5) +
    scale_fill_gradient2(low = "#4E79A7", mid = "white", high = "#F28E2B", midpoint = 0,  # "darkred" "darkgreen"
                         limits = lower_upper_limit_legend_list_final[[diff]], # lower_upper_limit, 
                         guide = guide_colorbar(barwidth = 0.23, barheight = 2.7)) 
                         
ggsave(county_map_plots_diff_area[[diff]], 
    file = paste0("plots/final_plots/FIG_S5/county_", diff, "_percent.pdf"), 
    height = 0.8, width = 1.6) 

} 



#######################################################################################
#### FIG_S4 CURRENT TOTAL DIVERSITY CHOROPLETH PLOTS
#######################################################################################
# --------------------------------------------------------------------------------------------------------------------------------
# predicted total plant diversity for all 21 metrics

# USE % OF COUNTIES AS RESPONSE SCALE

# --------------------------------------------------------------------------------------------------------------------------------
# manual static choropleth plots

# load county map data from choroplethrMaps
data(county.map) # data(county.regions)
head(county.map)
county_map_data <- county.map[!(county.map$STATE %in% c("02", "15")), ]
# ggplot(county_map_data, aes(long, lat, group=group)) + geom_polygon()

# load state map data from choroplethrMaps
data(state.map) # data(county.regions)
head(state.map)
state_map_data <- state.map[!(state.map$STATE %in% c("02", "15")), ]

# merge map data with observed data
responses21 <- c("species", "genus", "family", "species.n", "genus.n", "family.n", "species.i", "genus.i", "family.i", 
    "PD.ALL", "MPD.ALL", "PD.s.ALL", "MPD.s.ALL", "PD.NAT", "MPD.NAT", "PD.s.NAT", "MPD.s.NAT",
    "PD.INTRO", "MPD.INTRO", "PD.s.INTRO", "MPD.s.INTRO")
county_plot_data <- merge(county_map_data, 
    combined_PP[, c("region", responses21)], 
    by = "region", all.x = TRUE) 
# order polygons
county_plot_data <- with(county_plot_data, county_plot_data[order(group, order), ])

# --------------------------------------------------------------------------------------------------------------------------------
theme_set(theme_bw())
theme_blank <- theme_update(
    panel.background = element_blank(), 
    panel.border = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.spacing = unit(0, "lines"), 
    plot.background = element_blank(), 
    plot.margin = grid::unit(c(-0.12,-1,-0.22,-1.8), units = "cm"), # trouble
    #panel.spacing = unit(c(-.5, -.4, -1, -.4), "lines"), # top, right, bottom, left
    axis.title.x = element_blank(), 
    axis.title.y = element_blank(),
    axis.text.x = element_blank(), 
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    legend.position = c(0.95, 0.45), # c(0.5, -0.02)
    legend.direction = "vertical",
    legend.text = element_text(size = 5, hjust = 1),
    legend.margin = margin(t=0, r=0, b=0, l=-0.5, unit="cm"), # remove white space around legend
    legend.title = element_blank()
    )
    
# --------------------------------------------------------------------------------------------------------------------------------
# response plots 

# plot the choropleth maps
county_map_plots <- list()
	
for (response in responses21) {
	print(response)

variable <- county_plot_data[, response]   	
  	
county_map_plots[[response]] <- ggplot(county_plot_data, aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = variable)) + # color = "grey", alpha = 1, lwd = I(1/20)
    geom_path(data = state_map_data , colour = "black", lwd = I(1/40)) +
    coord_map(projection = "albers", at0 = 45.5, lat1 = 29.5) +
    scale_fill_gradient2(low = "#4E79A7", mid = "white", high = "#F28E2B", midpoint = 0,  # "darkred" "darkgreen"
                                     guide = guide_colorbar(barwidth = 0.23, barheight = 2.7)) 
                         
ggsave(county_map_plots[[response]], 
    file = paste0("plots/final_plots/FIG_S4/county_", response, ".pdf"), 
    height = 0.8, width = 1.6) 

} 



#######################################################################################
#### FIG_S7 TOTAL BOXPLOTS and CHOROPLETH PLOTS
#######################################################################################

# BOXPLOTS (panel A)

preds_long_response7_diff_limits_nonRCP45 <- rm_box_outliers(preds_long_response7_diff_nonRCP45, limit_var = "diff_percent", by_vars = c("year", "response", "scenario"))
    
RCP45scenarios <- ggplot(preds_long_response7_diff_limits_nonRCP45, aes(x = year, y = diff_percent, fill = scenario, color = scenario)) +
    geom_hline(yintercept = 0, linetype = "solid", color = "grey20", size = 0.3) +
    geom_boxplot(outlier.size = 0.1, size = 0.2, alpha = 0.5, position = position_dodge(width = 0.9), outlier.shape = NA) +
    scale_fill_tableau(palette = "Tableau 10", name = "Scenario") +
    scale_color_tableau(palette = "Tableau 10", name = "Scenario") +
    #facet_wrap(~ response, scale = "free_y") +
    facet_grid(response ~ ., scale = "free_y") +
    labs(x = "Year", y = expression("%"~Delta~~"(future - present)")) + 
    guides(fill = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5), 
                color = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5)) + 
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 5, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          legend.position = "top",
          legend.key.size = unit(0.3, "cm"),
          legend.key.height = unit(0.4, "cm"),
          legend.key = element_blank(),
          legend.text = element_text(size = 6),
          legend.title = element_text(size = 7),
          legend.box.margin = margin(0, 0, -11, 0))
ggsave(RCP45scenarios, file = "plots/final_plots/Fig_S7/Fig_S7a.pdf", height = 18.1, width = 4, units = "cm")


# -------------------------------------------------------------------------------------------------------------------------------
# CHOROPLETH PLOTS (panel B)

# NEED TO USE THE DATA THAT IS AVERAGED OVER THE 4 SCENARIOS TO CREATE THE PLOTS DATA
# have two columns of maps, one with (future 2050 - present) and the other with (future 2070 - present)

# manual static choropleth plots

# load county map data from choroplethrMaps
data(county.map) # data(county.regions)
head(county.map)
county_map_data <- county.map[!(county.map$STATE %in% c("02", "15")), ]
# ggplot(county_map_data, aes(long, lat, group=group)) + geom_polygon()

# load state map data from choroplethrMaps
data(state.map) # data(county.regions)
head(state.map)
state_map_data <- state.map[!(state.map$STATE %in% c("02", "15")), ]

# merge map data with prediction data
county_plot_data <- merge(county_map_data, preds_long_response7_diff_avg_wide_nonRCP45, by = "region", all.x = TRUE) 
# order polygons
county_plot_data <- with(county_plot_data, county_plot_data[order(group, order), ])

# --------------------------------------------------------------------------------------------------------------------------------
theme_set(theme_bw())
theme_blank <- theme_update(
    panel.background = element_blank(), 
    panel.border = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.spacing = unit(0, "lines"), 
    plot.background = element_blank(), 
    plot.margin = grid::unit(c(-0.12,-1,-0.22,-1.8), units = "cm"), # trouble
    #panel.spacing = unit(c(-.5, -.4, -1, -.4), "lines"), # top, right, bottom, left
    axis.title.x = element_blank(), 
    axis.title.y = element_blank(),
    axis.text.x = element_blank(), 
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    legend.position = c(0.95, 0.45), # c(0.5, -0.02)
    legend.direction = "vertical",
    legend.text = element_text(size = 5,hjust = 1),
    legend.margin = margin(t=0, r=0, b=0, l=-0.5, unit="cm"), # remove white space around legend
    legend.title = element_blank()
    )


# --------------------------------------------------------------------------------------------------------------------------------
# choropleth diff plots (takes roughly 45 seconds for each set of 14 plots)

# get max and min of bounding box for the legend
lower_upper_limit_list <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl("species|genus|family|PD", colnames(county_plot_data))]

for (diff in responseColumnsDiffs) {
	print(diff)
    lower_upper_limit_list[[diff]] <- quantile(county_plot_data[, diff], probs = c(0.025, 0.975), na.rm = TRUE)
}
names(lower_upper_limit_list) <- c(names(lower_upper_limit_list)[1:6], rep("ABC", 2), rep("DEF", 2), rep("GHI", 2), rep("JKL", 2))
lower_upper_limit_vec <- unlist(lower_upper_limit_list)

lower_upper_limit_legend_list <- list()
for (response in c("species", "genus", "family", "ABC", "DEF", "GHI", "JKL")) {
	print(response)
    lower_upper_limit_legend_list[[response]] <- 	range(lower_upper_limit_vec[grepl(response, names(lower_upper_limit_vec))])    
}
lower_upper_limit_legend_list <- rep(lower_upper_limit_legend_list, times = 2)
names(lower_upper_limit_legend_list) <- paste(rep(c("species", "genus", "family", "PD", "MPD", "PD.s", "MPD.s"), times = 2), rep(c("2050", "2070"), each = 7), sep = "_")


# plot the choropleth maps
county_map_plots_diff_area <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl(".+\\d+", colnames(county_plot_data))]
	
for (diff in responseColumnsDiffs) {
	print(diff)

#lower_upper_limit <- quantile(county_plot_data[, diff], probs = c(0.025, 0.975), na.rm = TRUE)
scaled_variable <- county_plot_data[, diff]   	

#lower_upper_limit <- quantile((county_plot_data[, diff] / county_plot_data[, "area"]) * 100, probs = c(0.025, 0.975), na.rm = TRUE)
#scaled_variable <- (county_plot_data[, diff] / county_plot_data[, "area"]) * 100 
  	
#scaled_variable <- as.numeric(ifelse(scaled_variable < lower_upper_limit["2.5%"], lower_upper_limit["2.5%"], scaled_variable))
#scaled_variable <- as.numeric(ifelse(scaled_variable > lower_upper_limit["97.5%"], lower_upper_limit["97.5%"], scaled_variable))

scaled_variable <- as.numeric(ifelse(scaled_variable < lower_upper_limit_legend_list[[diff]][1], lower_upper_limit_legend_list[[diff]][1], scaled_variable))
scaled_variable <- as.numeric(ifelse(scaled_variable > lower_upper_limit_legend_list[[diff]][2], lower_upper_limit_legend_list[[diff]][2], scaled_variable))
    	
county_map_plots_diff_area[[diff]] <- ggplot(county_plot_data, aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = scaled_variable)) + # color = "grey", alpha = 1, lwd = I(1/20)
    geom_path(data = state_map_data , colour = "black", lwd = I(1/40)) +
    coord_map(projection = "albers", at0 = 45.5, lat1 = 29.5) +
    scale_fill_gradient2(low = "#4E79A7", mid = "white", high = "#F28E2B", midpoint = 0,  
                         limits = lower_upper_limit_legend_list[[diff]], # lower_upper_limit, 
                         guide = guide_colorbar(barwidth = 0.23, barheight = 2.7)) 
# red: "#D62728", green: "#2CA02C"                         
ggsave(county_map_plots_diff_area[[diff]], 
    file = paste0("plots/final_plots/Fig_S7/county_", diff, "_percent.pdf"), 
    height = 0.8, width = 1.6) 

} 



#######################################################################################
#### FIG_S8 NATIVE VS NON-NATIVE BOXPLOTS and CHOROPLETH PLOTS
#######################################################################################

# BOXPLOTS (panel A)

preds_long_response7_diff_limits_ni_nonRCP45 <- rm_box_outliers(preds_long_response7_diff_ni_nonRCP45, limit_var = "diff_percent", by_vars = c("year", "response", "scenario", "NAT_INTRO"))
preds_long_response7_diff_limits_ni_nonRCP45 <- preds_long_response7_diff_limits_ni_nonRCP45[complete.cases(preds_long_response7_diff_limits_ni_nonRCP45$diff_percent), ]   
    
    
RCP45scenarios_ni <- ggplot(preds_long_response7_diff_limits_ni_nonRCP45, aes(x = year, y = diff_percent, fill = scenario, color = scenario)) +
    geom_hline(yintercept = 0, linetype = "solid", color = "grey20", size = 0.3) +
    geom_boxplot(outlier.size = 0.1, size = 0.2, alpha = 0.5, position = position_dodge(width = 0.9), outlier.shape = NA) +
    scale_fill_tableau(palette = "Tableau 10", name = "Scenario") +
    scale_color_tableau(palette = "Tableau 10", name = "Scenario") +
    #facet_wrap(~ response, scale = "free_y") +
    facet_grid(response ~ NAT_INTRO, scale = "free_y") +
    labs(x = "Year", y = expression("%"~Delta~~"(future - present)")) + 
    guides(fill = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5), 
                color = guide_legend(ncol = 2, title.position = "top", title.hjust = 0.5)) + 
    theme_publication() +
    theme(axis.title.x = element_text(size = 8, margin = margin(t = 1, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(size = 8, margin = margin(t = 0, r = -3, b = 0, l = 0)),
          axis.text.x = element_text(size = 6),
          axis.text.y = element_text(size = 6),
          strip.text = element_text(size = 7),
          panel.border = element_rect(size = 0.5),
          legend.position = "top",
          legend.key.size = unit(0.3, "cm"),
          legend.key.height = unit(0.4, "cm"),
          legend.key = element_blank(),
          legend.text = element_text(size = 6),
          legend.title = element_text(size = 7),
          legend.box.margin = margin(0, 0, -20, 0))
ggsave(RCP45scenarios_ni, file = "plots/final_plots/Fig_S8/Fig_S8a.pdf", height = 17.9, width = 5.45, units = "cm")


# --------------------------------------------------------------------------------------------------------------------------------
# CHOROPLETH PLOTS (panel B)

# predicted changes (delta) in plant biodoversity relative to current values (ac45, gs45, gd45, he45, averaged), for all 7 metrics

# USE % OF COUNTIES AS RESPONSE SCALE

# manual static choropleth plots

# load county map data from choroplethrMaps
data(county.map) # data(county.regions)
head(county.map)
county_map_data <- county.map[!(county.map$STATE %in% c("02", "15")), ]
# ggplot(county_map_data, aes(long, lat, group=group)) + geom_polygon()

# load state map data from choroplethrMaps
data(state.map) # data(county.regions)
head(state.map)
state_map_data <- state.map[!(state.map$STATE %in% c("02", "15")), ]

# merge map data with prediction data
county_plot_data <- merge(county_map_data, preds_long_response7_diff_avg_wide_ni_nonRCP45, by = "region", all.x = TRUE) 
# order polygons
county_plot_data <- with(county_plot_data, county_plot_data[order(group, order), ])

# --------------------------------------------------------------------------------------------------------------------------------
theme_set(theme_bw())
theme_blank <- theme_update(
    panel.background = element_blank(), 
    panel.border = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.spacing = unit(0, "lines"), 
    plot.background = element_blank(), 
    plot.margin = grid::unit(c(-0.12,-1,-0.22,-1.8), units = "cm"), # trouble
    #panel.spacing = unit(c(-.5, -.4, -1, -.4), "lines"), # top, right, bottom, left
    axis.title.x = element_blank(), 
    axis.title.y = element_blank(),
    axis.text.x = element_blank(), 
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    legend.position = c(0.95, 0.45), # c(0.5, -0.02)
    legend.direction = "vertical",
    legend.text = element_text(size = 5, hjust = 1),
    legend.margin = margin(t=0, r=0, b=0, l=-0.5, unit="cm"), # remove white space around legend
    legend.title = element_blank()
    )
    
# --------------------------------------------------------------------------------------------------------------------------------
# diff plots (takes roughly 45 seconds for each set of 14 plots)

# get max and min of bounding box for the legend
lower_upper_limit_list <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl("species|genus|family|PD", colnames(county_plot_data))]

for (diff in responseColumnsDiffs) {
	print(diff)
    lower_upper_limit_list[[diff]] <- quantile(county_plot_data[, diff], probs = c(0.025, 0.975), na.rm = TRUE)
}
names(lower_upper_limit_list) <- gsub("(.+\\_)\\d+\\_(.+)", "\\1\\2", names(lower_upper_limit_list))
lower_upper_limit_vec <- unlist(lower_upper_limit_list)
names(lower_upper_limit_vec) <- rep(names(lower_upper_limit_list), each = 2)

lower_upper_limit_legend_list <- list()
for (response in unique(names(lower_upper_limit_list))) {
	print(response)
    lower_upper_limit_legend_list[[response]] <- range(lower_upper_limit_vec[which(response == names(lower_upper_limit_vec))])    
}
lower_upper_limit_legend_list_final <- rep(lower_upper_limit_legend_list, times = 2)
names(lower_upper_limit_legend_list_final) <- c(gsub("(\\w+)\\_(\\w+)", "\\1_2050_\\2", names(lower_upper_limit_legend_list)), gsub("(\\w+)\\_(\\w+)", "\\1_2070_\\2", names(lower_upper_limit_legend_list)))


# plot the choropleth maps
county_map_plots_diff_area <- list()
responseColumnsDiffs <- colnames(county_plot_data)[grepl("species|genus|family|PD", colnames(county_plot_data))]
	
for (diff in responseColumnsDiffs) {
	print(diff)

scaled_variable <- county_plot_data[, diff]   	

scaled_variable <- as.numeric(ifelse(scaled_variable < lower_upper_limit_legend_list_final[[diff]][1], lower_upper_limit_legend_list_final[[diff]][1], scaled_variable))
scaled_variable <- as.numeric(ifelse(scaled_variable > lower_upper_limit_legend_list_final[[diff]][2], lower_upper_limit_legend_list_final[[diff]][2], scaled_variable))
    	
county_map_plots_diff_area[[diff]] <- ggplot(county_plot_data, aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = scaled_variable)) + # color = "grey", alpha = 1, lwd = I(1/20)
    geom_path(data = state_map_data , colour = "black", lwd = I(1/40)) +
    coord_map(projection = "albers", at0 = 45.5, lat1 = 29.5) +
    scale_fill_gradient2(low = "#4E79A7", mid = "white", high = "#F28E2B", midpoint = 0,  # "darkred" "darkgreen"
                         limits = lower_upper_limit_legend_list_final[[diff]], # lower_upper_limit, 
                         guide = guide_colorbar(barwidth = 0.23, barheight = 2.7)) 
                         
ggsave(county_map_plots_diff_area[[diff]], 
    file = paste0("plots/final_plots/FIG_S8/county_", diff, "_percent.pdf"), 
    height = 0.8, width = 1.6) 

} 



