
options(stringsAsFactors = FALSE)

# ------------------------------------------------------------------------------------
# load data

dir <- "file-path-to-data"
setwd(dir)

dat <- read.csv("MHH_data.csv")
str(dat)


# ---------------------------------------------
# color blind palette

cbbPalette <- c("#E69F00", "#555555", "#56B4E9", "#009E73",
                           "#F0E442", "#0072B2", "#D55E00", "#CC79A7")


# ------------------------------------------------------------------------------------
# load and install binary packages

ipak <- function(pkg){
    new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
    if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
    sapply(pkg, require, character.only = TRUE)
}

packages <- c("ggplot2", "scales", "grid", "gridExtra", "reshape2", "plyr",  
                         "data.table", "car", "rms", "texreg", "lsmeans", "RColorBrewer",
                         "effects", "xtable")
ipak(packages)

# devtools::install_github("lionel-/ggstance")
library(ggstance)


# ------------------------------------------------------------------------------------
dat$pop_density_tern_ord <- ordered(dat$pop_density_tern_ord, 
    levels = c("low",  "medium", "high"), labels = c("Low",  "Medium", "High"))
    
dat$subsistence <- factor(dat$subsistence, 
    levels = c("Foraging", "Horticulture", "Intensive agriculture", "Pastoralism"),
    labels = c("Foraging", "Horticulture", "Int. agriculture", "Pastoralism"))
    
    
# ------------------------------------------------------------------------------------
# set global ggplot theme

theme_nice <- theme_set(theme_bw())
theme_nice <- theme_update(
    axis.title.x = element_text(size = 10, margin = margin(t=10)),
    axis.title.y = element_text(size = 10, angle = 90, margin = margin(r=10)),
    axis.text.x = element_text(size = 8, angle = 0, margin = margin(t=2), color = "black"),
    axis.text.y = element_text(size = 8, hjust = 1, margin = margin(r=2), color = "black"),
    # strip.background = element_rect(color = "grey", fill = "white"),
    strip.text = element_text(size = 10),
    strip.background = element_blank(),
    panel.border = element_rect(color = "grey20"),
    panel.spacing.x = unit(0.2, "lines"),
    panel.spacing.y = unit(0.05,"lines"),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    # explicitly set the horizontal lines (or they will disappear too)
    panel.grid.major.x = element_line(size=.5, color="#f0f0f0"),
    legend.title = element_text(size = 8, face = "italic"),
    legend.key.size = unit(.8, units = "line"),
    legend.text = element_text(size = 8),
    legend.background = element_rect(fill = "gray95", color = "gray20",
                                     size = 0.5, linetype = "dotted"))


###################################################################################
###################################################################################
###################################################################################
# ---------------------------------------------------------------------------------------------------------
# MODEL polr 15 km

# ------------------------------------------------------------------------------------
#  functions
   
polr.p <- function(model) {
    ctable <- coef(summary(model))
    p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
    ctable <- cbind(ctable, "p value" = p)
    ctable
}

options(width = 250)

# 11 societies omitted because of NAs (9 in warm subsample)
model_polr_15 <- polr(pop_density_tern_ord ~ subsistence +
                                       npp_max_15km + 
                                       ET + 
                                       mean_annual_precip_mm + 
                                       latitude_abs + 
                                       marine_dist +
                                       mobility_bin + 
                                       fishing_bin +
                                       subsistence:npp_max_15km, 
                Hess = TRUE, 
                data = dat) # dat[dat$climate %in% "warm", ]
summary(model_polr_15)
polr.p(model_polr_15)
Anova(model_polr_15, type = "III") # subsistence:npp_max_15km: p = 0.035
#confint(model_polr_15)

# proportional odds assumption
par(mfrow = c(2, 4))
plot.xmean.ordinaly(pop_density_tern_ord ~ subsistence + npp_max_15km + ET + mean_annual_precip_mm + latitude_abs + marine_dist, data = dat, cr = TRUE, topcats = 3)
par(mfrow = c(1, 1))

# post-estimation effects
eff_polr_15 <- allEffects(model_polr_15, xlevels = list(npp_max_15km = seq(0, 2090, by = 10)))
eff_df_polr_15 <- as.data.frame(eff_polr_15[["subsistence:npp_max_15km"]])

# reshape effects
eff_df_long_polr_15 <- setDF(melt(
    data = setDT(eff_df_polr_15),
    measure.vars = list(c("prob.Low", "prob.Medium", "prob.High"), 
                        c("logit.Low", "logit.Medium", "logit.High"),
                        c("se.prob.Low", "se.prob.Medium", "se.prob.High"),
                        c("se.logit.Low", "se.logit.Medium", "se.logit.High"),
                        c("L.prob.Low", "L.prob.Medium", "L.prob.High"),
                        c("U.prob.Low", "U.prob.Medium", "U.prob.High"),
                        c("L.logit.Low", "L.logit.Medium", "L.logit.High"),
                        c("U.logit.Low", "U.logit.Medium", "U.logit.High")  
                        ),
    variable.name = "pop_density_tern_ord",
    value.name = c("prob", "logit", "se.prob", "se.logit", "L.prob", "U.prob", "L.logit", "U.logit"),
    variable.factor = FALSE
    ))

eff_df_long_polr_15[eff_df_long_polr_15$pop_density_tern_ord == 1, "pop_density_tern_ord"] <- "Low"
eff_df_long_polr_15[eff_df_long_polr_15$pop_density_tern_ord == 2, "pop_density_tern_ord"] <- "Medium"
eff_df_long_polr_15[eff_df_long_polr_15$pop_density_tern_ord == 3, "pop_density_tern_ord"] <- "High"

eff_df_long_polr_15$pop_density_tern_ord <- factor(eff_df_long_polr_15$pop_density_tern_ord, 
    levels = c("Low", "Medium", "High"))
eff_df_long_polr_15$pop_density_tern_ord_rev <- factor(eff_df_long_polr_15$pop_density_tern_ord, 
    levels = rev(c("Low", "Medium", "High")))

ddply(dat, .(subsistence), summarise, 
    Min = min(npp_max_15km, na.rm = TRUE), 
    Max = max(npp_max_15km, na.rm = TRUE),
    Min120 = min(npp_max_120km, na.rm = TRUE), 
    Max120 = max(npp_max_120km, na.rm = TRUE)
    )

# omit out of sample predictions (15km full sample, minus 11)
eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Foraging" & npp_max_15km < 142 | subsistence %in% "Foraging" & npp_max_15km > 1815), c("prob", "L.prob", "U.prob")] <- NA
eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Foraging" & pop_density_tern_ord %in% "High"), c("L.prob", "U.prob")] <- NA
eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Horticulture" & npp_max_15km < 203), c("prob", "L.prob", "U.prob")] <- NA
eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Int. agriculture" & npp_max_15km < 19 | subsistence %in% "Int. agriculture" & npp_max_15km > 1843), c("prob", "L.prob", "U.prob")] <- NA
eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Pastoralism" & npp_max_15km > 1610), c("prob", "L.prob", "U.prob")] <- NA

# omit out of sample predictions (15km warm subsample, minus 9)
# eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Foraging" & npp_max_15km < 142 | subsistence %in% "Foraging" & npp_max_15km > 1815), c("prob", "L.prob", "U.prob")] <- NA
# eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Horticulture" & npp_max_15km < 203  | subsistence %in% "Horticulture" & npp_max_15km > 1991), c("prob", "L.prob", "U.prob")] <- NA
# eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Int. agriculture" & npp_max_15km > 1843), c("prob", "L.prob", "U.prob")] <- NA
# eff_df_long_polr_15[with(eff_df_long_polr_15, subsistence %in% "Pastoralism" & npp_max_15km > 1610), c("prob", "L.prob", "U.prob")] <- NA

thousands <- function(x) x/1000
    
# plot
pop_den_polr_15 <- ggplot(eff_df_long_polr_15, aes(x = npp_max_15km, y = prob, 
                          fill = subsistence, color = subsistence)) +
    geom_ribbon(aes(ymin = L.prob, ymax = U.prob), color = NA, alpha = 0.3) +
    geom_line(size = 0.6) +
    scale_y_continuous(labels = percent, limits = c(0, 1)) +
    scale_fill_manual(values = cbbPalette) + 
    scale_color_manual(values = cbbPalette) +
    scale_x_continuous(labels = thousands) +
    facet_grid(subsistence ~ pop_density_tern_ord) + # reverse?
    geom_rug(data = dat[complete.cases(dat$pop_density_tern_ord), ], 
        sides = "b", aes(x = npp_max_15km, y = NULL)) +
    labs(x = bquote(NPP[max]~~15~km~radius~(Kg~C/m^{2}/year)), 
        y = "Predicted probability of population density") +
    theme(legend.position = "none",
          panel.grid.major.y = element_line(size = 0.5, color = "#f0f0f0"),
          axis.text.x = element_text(size = 8, angle = 0, margin = margin(t=2), color = "black"),
    axis.text.y = element_text(size = 8, hjust = 1, margin = margin(r=2), color = "black"))
ggsave(pop_den_polr_15, file = "figure_4.pdf", height = 5, width = 4.5) 



###########################################################################################################
###########################################################################################################
# POLR: odds ratios for NPP 15 km vs pop density, by each subsistence group

dat$subsistence <- relevel(dat$subsistence, ref = "Foraging")
model_polr_15_forg <- polr(pop_density_tern_ord ~ subsistence * I(npp_max_15km/500) + ET + mean_annual_precip_mm + latitude_abs + 
                                       marine_dist + mobility_bin + fishing_bin, Hess = TRUE, data = dat)                                        
Foragers_polr_15 <- exp(c(coef(model_polr_15_forg)["I(npp_max_15km/500)"], confint.default(model_polr_15_forg)["I(npp_max_15km/500)", ]))


dat$subsistence <- relevel(dat$subsistence, ref = "Horticulture")
model_polr_15_hort <- polr(pop_density_tern_ord ~ subsistence * I(npp_max_15km/500) + ET + mean_annual_precip_mm + latitude_abs + 
                                       marine_dist + mobility_bin + fishing_bin, Hess = TRUE, data = dat)                                        
Horticulture_polr_15 <- exp(c(coef(model_polr_15_hort)["I(npp_max_15km/500)"], confint.default(model_polr_15_hort)["I(npp_max_15km/500)", ]))


dat$subsistence <- relevel(dat$subsistence, ref = "Intensive agriculture")
model_polr_15_intAgr <- polr(pop_density_tern_ord ~ subsistence * I(npp_max_15km/500) + ET + mean_annual_precip_mm + latitude_abs + 
                                       marine_dist + mobility_bin + fishing_bin, Hess = TRUE, data = dat)                                        
Inten_agriculture_polr_15 <- exp(c(coef(model_polr_15_intAgr)["I(npp_max_15km/500)"], confint.default(model_polr_15_intAgr)["I(npp_max_15km/500)", ]))


dat$subsistence <- relevel(dat$subsistence, ref = "Pastoralism")
model_polr_15_past <- polr(pop_density_tern_ord ~ subsistence * I(npp_max_15km/500) + ET + mean_annual_precip_mm + latitude_abs + 
                                       marine_dist + mobility_bin + fishing_bin, Hess = TRUE, data = dat)                                        
Pastoralism_polr_15 <- exp(c(coef(model_polr_15_past)["I(npp_max_15km/500)"], confint.default(model_polr_15_past)["I(npp_max_15km/500)", ]))


# odds ratio of increasing population density by one unit, per 500 unit increase in NPP Max for each subsistence mode
OR_polr_15 <- rbind(Foragers_polr_15, Horticulture_polr_15, Inten_agriculture_polr_15, Pastoralism_polr_15)






###################################################################################
###################################################################################
###################################################################################
# ---------------------------------------------------------------------------------------------------------
# MODEL polr 120 km

options(width = 250)

# 11 societies omitted because of NAs (9 in warm subsample)
model_polr_120 <- polr(pop_density_tern_ord ~ subsistence +
                                       npp_max_120km + 
                                       ET + 
                                       mean_annual_precip_mm + 
                                       latitude_abs + 
                                       marine_dist +
                                       mobility_bin + 
                                       fishing_bin +
                                       subsistence:npp_max_120km, 
                Hess = TRUE, 
                data = dat) # dat[dat$climate %in% "warm", ]
summary(model_polr_120)
polr.p(model_polr_120)
Anova(model_polr_120, type = "III") # subsistence:npp_max_120km: p = 0.067
#confint(model_polr_120)

# proportional odds assumption
par(mfrow = c(2, 4))
plot.xmean.ordinaly(pop_density_tern_ord ~ subsistence + npp_max_120km + ET + mean_annual_precip_mm + latitude_abs + marine_dist, data = dat, cr = TRUE, topcats = 3)
par(mfrow = c(1, 1))

# post-estimation effects
eff_polr_120 <- allEffects(model_polr_120, xlevels = list(npp_max_120km = seq(36, 2386, by = 10)))
eff_df_polr_120 <- as.data.frame(eff_polr_120[["subsistence:npp_max_120km"]])

# reshape effects
eff_df_long_polr_120 <- setDF(melt(
    data = setDT(eff_df_polr_120),
    measure.vars = list(c("prob.Low", "prob.Medium", "prob.High"), 
                        c("logit.Low", "logit.Medium", "logit.High"),
                        c("se.prob.Low", "se.prob.Medium", "se.prob.High"),
                        c("se.logit.Low", "se.logit.Medium", "se.logit.High"),
                        c("L.prob.Low", "L.prob.Medium", "L.prob.High"),
                        c("U.prob.Low", "U.prob.Medium", "U.prob.High"),
                        c("L.logit.Low", "L.logit.Medium", "L.logit.High"),
                        c("U.logit.Low", "U.logit.Medium", "U.logit.High")   
                        ),
    variable.name = "pop_density_tern_ord",
    value.name = c("prob", "logit", "se.prob", "se.logit", "L.prob", "U.prob", "L.logit", "U.logit"),
    variable.factor = FALSE
    ))

eff_df_long_polr_120[eff_df_long_polr_120$pop_density_tern_ord == 1, "pop_density_tern_ord"] <- "Low"
eff_df_long_polr_120[eff_df_long_polr_120$pop_density_tern_ord == 2, "pop_density_tern_ord"] <- "Medium"
eff_df_long_polr_120[eff_df_long_polr_120$pop_density_tern_ord == 3, "pop_density_tern_ord"] <- "High"

eff_df_long_polr_120$pop_density_tern_ord <- factor(eff_df_long_polr_120$pop_density_tern_ord, 
    levels = c("Low", "Medium", "High"))
eff_df_long_polr_120$pop_density_tern_ord_rev <- factor(eff_df_long_polr_120$pop_density_tern_ord, 
    levels = rev(c("Low", "Medium", "High")))

ddply(dat, .(subsistence), summarise, 
    Min = min(npp_max_15km, na.rm = TRUE), 
    Max = max(npp_max_15km, na.rm = TRUE),
    Min120 = min(npp_max_120km, na.rm = TRUE), 
    Max120 = max(npp_max_120km, na.rm = TRUE)
    )

# omit out of sample predictions (120km full sample, minus 11)
eff_df_long_polr_120[with(eff_df_long_polr_120, subsistence %in% "Foraging" & npp_max_120km < 203 | subsistence %in% "Foraging" & npp_max_120km > 2346), c("prob", "L.prob", "U.prob")] <- NA
eff_df_long_polr_120[with(eff_df_long_polr_120, subsistence %in% "Foraging" & pop_density_tern_ord %in% "High"), c("L.prob", "U.prob")] <- NA
eff_df_long_polr_120[with(eff_df_long_polr_120, subsistence %in% "Horticulture" & npp_max_120km < 431 | subsistence %in% "Horticulture" & npp_max_120km > 2260), c("prob", "L.prob", "U.prob")] <- NA
eff_df_long_polr_120[with(eff_df_long_polr_120, subsistence %in% "Int. agriculture" & npp_max_120km < 59 | subsistence %in% "Int. agriculture" & npp_max_120km > 2384), c("prob", "L.prob", "U.prob")] <- NA
eff_df_long_polr_120[with(eff_df_long_polr_120, subsistence %in% "Pastoralism" & npp_max_120km < 38 | subsistence %in% "Pastoralism" & npp_max_120km > 1666), c("prob", "L.prob", "U.prob")] <- NA

thousands <- function(x) x/1000
    
# plot
pop_den_polr_120 <- ggplot(eff_df_long_polr_120, aes(x = npp_max_120km, y = prob, 
                          fill = subsistence, color = subsistence)) +
    geom_ribbon(aes(ymin = L.prob, ymax = U.prob), color = NA, alpha = 0.3) +
    geom_line(size = 0.6) +
    scale_y_continuous(labels = percent, limits = c(0, 1)) +
    scale_fill_manual(values = cbbPalette) + 
    scale_color_manual(values = cbbPalette) +
    scale_x_continuous(labels = thousands) +
    facet_grid(subsistence ~ pop_density_tern_ord) + # reverse?
    geom_rug(data = dat[complete.cases(dat$pop_density_tern_ord), ], 
        sides = "b", aes(x = npp_max_120km, y = NULL)) +
    labs(x = bquote(NPP[max]~~120~km~radius~(Kg~C/m^{2}/year)), 
        y = "Predicted probability of population density") +
    theme(legend.position = "none",
          panel.grid.major.y = element_line(size = 0.5, color = "#f0f0f0"),
          axis.text.x = element_text(size = 7, angle = 0, margin = margin(t=2), color = "black"),
    axis.text.y = element_text(size = 7, hjust = 1, margin = margin(r=2), color = "black"))
ggsave(pop_den_polr_120, file = "figure_S4.pdf", height = 5, width = 4.5) 



