# ---
#   title: 'Analyses: Changes in age-structure over four decades were a 
#           key determinant of population growth rate in a long-lived mammal'
#   author: "John Jackson"
#   date: "April 2020"
# ---

## Please ensure your working directory is the zipped file with this script
## Refer to markdown file for more information, This script has just been created for
## the ease of running analyses
rm(list = ls())

##__________________________________________________________________________________________________
#### 1. Loading packages, functions, and data ####

## You must have these packages
library(tidyverse)
library(gridExtra)
library(popbio)
library(expm)
library(viridis)

## Function to create age-structured Leslie matrix from 
## Fertility and Survival + n_0 from time series data
source("les_ele.R",echo = T)

## Data
load("Jackson_etal_data.RData", verbose = T)
glimpse(fit_year) # Fitted age-specific demographic data for each year
glimpse(ts_dat) # Time-series data
glimpse(fit_sim) # Posterior simulations of fitted data

##__________________________________________________________________________________________________
#### 2. Generating MPMs between 1970-2014 ####

years <- 1970:2014
projection_matrices <- list()
starting_agestructure <- list()

## a. Go through years and generate A[t] and n[t] i.e. Annual projection model
for(i in 1:length(years)){
  
  crr_year = years[i]
  crr_mod = les_ele(fitted_data = fit_year, timeseries_data = ts_dat, year = crr_year)
  
  projection_matrices[[i]] = crr_mod$projection_matrix
  starting_agestructure[[i]] = crr_mod$NT0
  
}

## b. Average environment projection model

# mean projection matrix
average_matrix <- Reduce("+", projection_matrices[which(years == 1970):length(years)])/ 
  length(which(years == 1970):length(years)) 

# right eignevector of average matrix, calculated using popbio
average_nt0 <- (stable.stage(average_matrix))* 
  mean(unlist(lapply(starting_agestructure[which(years == 1970):length(years)],FUN = sum)))

##__________________________________________________________________________________________________
#### 3. Decomposition experiment vs. average environment ####

## Now in each year, we have an 'observed' projection model, and an average
## environment model. In each year, do pairwise combinations of terms, to 
## incorporate observed A[t] and n[t] independently whilst holding the other 
## at the average environment

## a. Set up data for the pairwise combinations (also binary indicators for the ANOVA decomposition)
AD_data <- expand.grid(A_text = c("A_av","At"), 
                          N_text = c("N_av","Nt"),
                          year = years,  
                          lambda_realised = NA) %>% 
  mutate(A = ifelse(A_text == "A_av",0,1), 
         N = ifelse(N_text == "N_av",0,1), 
         AN = A*N, year_f =  factor(year))

glimpse(AD_data)

## b. Calculate lambda for each scenario
for(i in 1:nrow(AD_data)){
  
  crr_row = AD_data[i,]
  
  # current projection matrix
  if(crr_row$A_text == "A_av"){crr_A = average_matrix}
  else{crr_A = projection_matrices[[which(years == crr_row$year)]]}
  
  # current starting age-structure
  if(crr_row$N_text == "N_av"){crr_N = average_nt0}
  else{crr_N = starting_agestructure[[which(years == crr_row$year)]]}
  
  # calculate one-step ahead realised lambda
  n_t = (crr_A %*% crr_N)
  AD_data[i,"lambda_realised"] = sum(n_t)/sum(crr_N)
  
}

##__________________________________________________________________________________________________
#### 4. ANOVA Decomposition for annual contributions ####

AD_mod <- lm(log(lambda_realised) ~ 1 + A:year_f + N:year_f + AN:year_f, data = AD_data) 
model.matrix(log(lambda_realised) ~ 1 + A:year_f + N:year_f + AN:year_f, data = AD_data)

# Have a look at the contributions (i.e. coefficients in each year)
mod_coef <- coef(AD_mod)
coef_dat <- data.frame(year = 1970:2014, 
                       cont = c(mod_coef[grep("A[:]", names(mod_coef))], 
                                mod_coef[grep("[:]N", names(mod_coef))],
                                mod_coef[grep("[:]AN", names(mod_coef))]),
                       term = c(rep("A", 45), rep("N", 45), rep("AN",45)),
                       Intercept = coef(AD_mod)["(Intercept)"])
year_dat <- coef_dat %>% group_by(year) %>% summarise(pred_lambda = sum(cont) + Intercept[1])
coef_dat$term <- factor(coef_dat$term, levels = c("A", "N", "AN"))

glimpse(coef_dat)
ggplot(coef_dat, aes(x = year, y = cont, shape = term, colour = term)) +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_point(size = 3)

##__________________________________________________________________________________________________
#### 5. Decomposing Survival and Birth contributions ####

## a. VD data set up
AD_data_p_f <- expand.grid(p_text = c("p_av","pt"), 
                              f_text = c("f_av","ft"), 
                              N_text = c("N_av","Nt"),
                              year = years,  lambda_realised = NA) %>% 
  mutate(p = ifelse(p_text == "p_av",0,1),
         f = ifelse(f_text == "f_av",0,1), 
         N = ifelse(N_text == "N_av",0,1), 
         pFN = p*f*N, year_f =  factor(year))
glimpse(AD_data_p_f)

## b. Average environment terms
delta <- row(average_matrix) - col(average_matrix) # delta matrix to recreate A[t]

# Divide by S to get back to birth in order to split it out
f_av <- (fit_year %>% group_by(age) %>% summarise(f = mean(Fert)/mean(Surv)))$f 
p_av <- (fit_year %>% group_by(age) %>% summarise(p = mean(Surv)))$p

## c. Calculate lambda for each scenario again
for(i in 1:nrow(AD_data_p_f)){
  
  crr_row = AD_data_p_f[i,]
  
  # current SURVIVAL
  if(crr_row$p_text == "p_av"){crr_p = p_av}
  else{crr_p = (fit_year %>% filter(year == crr_row$year) %>%
                  group_by(age) %>% 
                  summarise(p = mean(Surv)))$p}
  
  # current FERTILITY - f = birth*p 
  if(crr_row$f_text == "f_av"){crr_f = f_av} 
  else{crr_f = (fit_year %>% filter(year == crr_row$year) %>%
                  group_by(age) %>%
                  summarise(f = mean(Fert)/mean(Surv)))$f}
  
  # current starting age-structure
  if(crr_row$N_text == "N_av"){crr_N = average_nt0}
  else{crr_N = starting_agestructure[[which(years == crr_row$year)]]}
  
  # create the matrix from each component
  crr_A = average_matrix*0
  
  # put in your new values
  crr_p_mat = crr_p[1:((length(crr_p)) - 1)]
  crr_A[delta == 1] = crr_p_mat
  crr_A[1,] = crr_f*crr_p
  
  # calculate one-steap ahead realised lambda
  n_t = (crr_A %*% crr_N)
  AD_data_p_f[i,"lambda_realised"] = sum(n_t)/sum(crr_N)
  
}

## d. Variance decomposition model for birth and survival
AD_mod_pf <- lm(log(lambda_realised) ~ 1 + (p + f + N + p:N + f:N):year_f, 
                data = AD_data_p_f)
model.matrix(log(lambda_realised) ~ 1 + (p + f + N + p:N + f:N):year_f, 
   data = AD_data_p_f)

coef_pf <- coef(AD_mod_pf)
coef_dat_pf <- data.frame(year = 1970:2014, 
                       cont = c(coef_pf[grep("p[:]year", names(coef_pf))], 
                                coef_pf[grep("f[:]year", names(coef_pf))],
                                coef_pf[grep("^N[:]year", names(coef_pf))]),
                       term = c(rep("p", 45), rep("f", 45), rep("N",45)),
                       Intercept = coef(AD_mod_pf)["(Intercept)"])

glimpse(coef_dat_pf)
ggplot(filter(coef_dat_pf, term != "N"), aes(x = year, y = cont, shape = term, colour = term)) +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_line() +
  geom_point(size = 3)


##__________________________________________________________________________________________________
#### 6. Posterior simulation for confidence intervals ####

glimpse(fit_sim)
# but doesn't this only estimate the uncertainty in the A term -> no uncertainty (apart from sampling) in N

# Generating the parts of the decomposition experiment
n <- 1000

delta <- row(average_matrix) - col(average_matrix) # delta matrix to recreate A[t]

#___________________________________________________________________________________________________
## a. Age-structure and environment components 

AD_fill <- expand.grid(A_text = c("A_av","At"), 
                       N_text = c("N_av","Nt"),
                       year = years, 
                       lambda_realised = NA) %>% 
  mutate(A = ifelse(A_text == "A_av",0,1), 
         N = ifelse(N_text == "N_av",0,1), 
         AN = A*N, year_f =  factor(year))

# Data to store the results
AD_sim <- expand.grid(sim = seq_len(n), 
                      A_text = c("A_av","At"), 
                      N_text = c("N_av","Nt"),
                      year = years, 
                      lambda_realised = NA) %>% 
  mutate(A = ifelse(A_text == "A_av",0,1), 
         N = ifelse(N_text == "N_av",0,1), 
         AN = A*N, year_f =  factor(year))

coef_sim <- expand.grid(sim = seq_len(n), 
                        year = 1970:2014, 
                        cont = NA, 
                        term = c("A","N","AN"))

### Running through simulations and repeating decomposition
for(i in seq_len(n)){
  
  # current simulation
  crr_sim = i
  
  #________________________________________________________
  ## 1. Generating model parameters
  
  # current fitted data
  crr_fit = dplyr::filter(fit_sim, sim == crr_sim)
  
  # Average environment model parameters
  fit_av = crr_fit %>% group_by(age) %>% 
    summarise(f = mean(Fert),
              p = mean(Surv))
  
  # Create average age-structure based on average f and p in the simulation
  av_A = average_matrix*0
  av_p_mat = fit_av$p[1:((nrow(fit_av)) - 1)]
  av_A[delta == 1] = av_p_mat
  av_A[1,] = fit_av$f # here don't separate fertility and survival 
  
  av_N = (popbio::stable.stage(av_A))* 
    mean(unlist(lapply(starting_agestructure[which(years == 1970):length(years)],FUN = sum)))
  
  #________________________________________________________
  ## 2. Calculate lambda for each scenario
  
  for(j in 1:nrow(AD_fill)){
    
    crr_row = AD_fill[j,]
    
    # current Projection matrix
    if(crr_row$A_text == "A_av"){
      crr_p = fit_av$p
      crr_f = fit_av$f}
    else{crr_p = (crr_fit %>% filter(year == crr_row$year) %>%
                    group_by(age) %>% 
                    summarise(p = mean(Surv)))$p
         crr_f = (crr_fit %>% filter(year == crr_row$year) %>%
               group_by(age) %>% 
               summarise(f = mean(Fert)))$f}
    
    # current starting age-structure
    if(crr_row$N_text == "N_av"){crr_N = av_N}
    else{crr_N = starting_agestructure[[which(years == crr_row$year)]]}
    
    # create the matrix from each component
    crr_A = average_matrix*0
    
    # put in your new values
    crr_p_mat = crr_p[1:((length(crr_p)) - 1)]
    crr_A[delta == 1] = crr_p_mat
    crr_A[1,] = crr_f
    
    # calculate one-steap ahead realised lambda
    n_t = (crr_A %*% crr_N)
    AD_fill[j,"lambda_realised"] = sum(n_t)/sum(crr_N)
  }
  
  #________________________________________________________
  ## 3. ANOVA decomposition
  
  AD_mod = lm(log(lambda_realised) ~ 1 + A:year_f + N:year_f + AN:year_f, data = AD_fill) 
  mod_coef = coef(AD_mod)
  
  #________________________________________________________
  ## 4. Storing data
  
  AD_sim[which(AD_sim$sim == crr_sim), "lambda_realised"] = AD_fill$lambda_realised
  coef_sim[which(coef_sim$sim == crr_sim), "cont"] = c(mod_coef[grep("A[:]", names(mod_coef))], 
                                                       mod_coef[grep("[:]N", names(mod_coef))],
                                                       mod_coef[grep("[:]AN", names(mod_coef))])
  
  cat('\r',"Your Job is",round(i/n, 3) * 100,"% Complete")
}


#___________________________________________________________________________________________________
## b. Fertility and survival components
AD_fill_pf <- expand.grid(p_text = c("p_av","pt"), 
                           f_text = c("f_av","ft"), 
                           N_text = c("N_av","Nt"),
                           year = years,  lambda_realised = NA) %>% 
  mutate(p = ifelse(p_text == "p_av",0,1),
         f = ifelse(f_text == "f_av",0,1), 
         N = ifelse(N_text == "N_av",0,1), 
         pFN = p*f*N, year_f =  factor(year))
glimpse(AD_fill_pf)


AD_sim_pf <- expand.grid(sim = seq_len(n),
                           p_text = c("p_av","pt"), 
                           f_text = c("f_av","ft"), 
                           N_text = c("N_av","Nt"),
                           year = years,  lambda_realised = NA) %>% 
  mutate(p = ifelse(p_text == "p_av",0,1),
         f = ifelse(f_text == "f_av",0,1), 
         N = ifelse(N_text == "N_av",0,1), 
         pFN = p*f*N, year_f =  factor(year))

coef_sim_pf <- expand.grid(sim = seq_len(n), 
                        year = 1970:2014, 
                        cont = NA, 
                        term = c("p","f","N"))

# Running through simulations and repeating decomposition
for(i in seq_len(n)){
  
  # current simulation
  crr_sim = i
  
  #________________________________________________________
  ## 1. Generating model parameters
  
  # current fitted data
  crr_fit = dplyr::filter(fit_sim, sim == crr_sim)

  # Average environment vital rates divide by S to get back to birth in order to split it out
  f_av = (crr_fit %>% group_by(age) %>% summarise(f = mean(Fert)/mean(Surv)))$f 
  p_av = (crr_fit %>% group_by(age) %>% summarise(p = mean(Surv)))$p
  
  # Create average age-structure based on average f and p in the simulation
  av_A = average_matrix*0
  av_p_mat = p_av[1:((length(p_av)) - 1)]
  av_A[delta == 1] = av_p_mat
  av_A[1,] = f_av*p_av
  
  av_N = (popbio::stable.stage(av_A))* 
    mean(unlist(lapply(starting_agestructure[which(years == 1970):length(years)],FUN = sum)))
  
  #________________________________________________________
  ## 2. Calculate lambda for each scenario
  
  for(j in 1:nrow(AD_fill_pf)){
    
    crr_row = AD_fill_pf[j,]
    
    # current SURVIVAL
    if(crr_row$p_text == "p_av"){crr_p = p_av}
    else{crr_p = (crr_fit %>% filter(year == crr_row$year) %>%
                    group_by(age) %>% 
                    summarise(p = mean(Surv)))$p}
    
    # current FERTILITY - f = birth*p 
    if(crr_row$f_text == "f_av"){crr_f = f_av} 
    else{crr_f = (crr_fit %>% filter(year == crr_row$year) %>%
                    group_by(age) %>%
                    summarise(f = mean(Fert)/mean(Surv)))$f}
    
    # current starting age-structure
    if(crr_row$N_text == "N_av"){crr_N = av_N}
    else{crr_N = starting_agestructure[[which(years == crr_row$year)]]}
    
    # create the matrix from each component
    crr_A = average_matrix*0
    
    # put in your new values
    crr_p_mat = crr_p[1:((length(crr_p)) - 1)]
    crr_A[delta == 1] = crr_p_mat
    crr_A[1,] = crr_f*crr_p
    
    # calculate one-steap ahead realised lambda
    n_t = (crr_A %*% crr_N)
    AD_fill_pf[j,"lambda_realised"] = sum(n_t)/sum(crr_N)
  }
  
  #________________________________________________________
  ## 3. ANOVA decomposition
  AD_mod_pf = lm(log(lambda_realised) ~ 1 + (p + f + N + p:N + f:N):year_f, 
                  data = AD_fill_pf)
  mod_coef_pf = coef(AD_mod_pf)
  
  #________________________________________________________
  ## 4. Storing data
  AD_sim_pf[which(AD_sim_pf$sim == crr_sim), "lambda_realised"] = AD_fill_pf$lambda_realised
  coef_sim_pf[which(coef_sim_pf$sim == crr_sim), "cont"] = c(mod_coef_pf[grep("p[:]year", names(mod_coef_pf))], 
                                                       mod_coef_pf[grep("f[:]year", names(mod_coef_pf))],
                                                       mod_coef_pf[grep("^N[:]year", names(mod_coef_pf))])
  
  cat('\r',"Your Job is",round(i/n, 3) * 100,"% Complete")

}

# #### Save the simulation data
# save(AD_sum, coef_sum, coef_sum_pf,
#      file = "Jackson_etal_simulationdata.RData")

##__________________________________________________________________________________________________
#### 7. Plots

load("Jackson_etal_simulationdata.RData", verbose = T)

##____________________________________________________________
# a. Figure 2
lamb <- AD_data %>% 
  mutate(param_comb = paste0(A_text,":",N_text)) %>% 
  filter(param_comb %in% c("At:Nt", "A_av:N_av") == T)

# Not needed here because these are in Jackson_etal_simmulationdata.RData
# coef_sum <- coef_sim %>% 
#   group_by(year, term) %>% 
#   summarise(mn_cont = mean(cont),
#             lwr = quantile(cont, 0.05),
#             upr = quantile(cont, 0.95))
# 
# AD_sum <- AD_sim %>% 
#   mutate(param_comb = paste0(A_text,":",N_text)) %>% 
#   group_by(year,param_comb) %>% 
#   summarise(lwr = quantile(log(lambda_realised), 0.05),
#             upr = quantile(log(lambda_realised), 0.95)) %>% 
#   filter(param_comb %in% c("At:Nt", "A_av:N_av") == T)

plasma_cols <- plasma(10)

f2a <- ggplot(lamb,aes(x = year,y = log(lambda_realised),
                               colour = param_comb, fill = param_comb)) +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_ribbon(data = AD_sum, aes(ymax = upr, ymin = lwr, 
                                 y = NULL,colour = NULL),
              alpha = 0.15, show.legend = FALSE) + 
  geom_line() +
  scale_color_manual(name = NULL, 
                     labels = c((expression(paste("Average-environment"))),
                                (expression(paste("Observed ",lambda,"(",bold("A")[italic("t")], ",", bold("n")[italic("t")], ")")))),
                     values = c("black", plasma_cols[8]),
                     aesthetics = c("colour", "fill")) +
  scale_x_continuous(breaks = seq(1970,2015, by = 5),
                     labels = c("1970", "", "1980", "", "1990",
                                "", "2000", "", "2010", ""),
                     limits = c(1970,2014), expand = c(0,0)) +
  labs(x = "Year", y = "ln Population growth rate", tag = "a)") + 
  theme_bw(base_size = 17) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        legend.key.size = unit(0.6, "cm"),
        legend.position = c(0.16,0.13), 
        legend.text = element_text(size = 8),
        legend.title = element_text(size = 14),
        legend.background = element_blank(),
        legend.text.align = 0) #### <- USEFUL TO ALIGN TEXT IN LEGEND

f2b <- ggplot(coef_sum, aes(x = year, y = mn_cont, colour = term, fill = term)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_ribbon(aes(ymax = upr, ymin = lwr, 
                                   y = NULL, colour = NULL),
              alpha = 0.15, show.legend = FALSE) + 
  geom_line() +
  scale_color_manual(name = NULL,labels = c(expression(paste("Environmental component ", 
                                                 Delta, bold("A")[italic("t")])),
                                expression(paste("Age-structure component ", 
                                                 Delta, bold("n")[italic("t")])),
                                expression(paste("Age-structure and environment, ", 
                                                 Delta, bold("A")[italic("t")], "x", 
                                                 Delta, bold("n")[italic("t")]))),
                     values = plasma_cols[c(6,2,8)],
                     aesthetics = c("colour", "fill")) +
  scale_x_continuous(breaks = seq(1970,2015, by = 5),
                     labels = c("1970", "", "1980", "", "1990",
                                "", "2000", "", "2010", ""),
                     limits = c(1970,2014), expand = c(0,0)) +
  labs(x = "Year", y = "Contribution to \n ln Population growth rate",tag = "b)") + 
  theme_bw(base_size = 17) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.key.size = unit(0.7, "cm"), legend.position = c(0.28,0.13), 
        legend.text = element_text(size = 8), legend.background = element_blank(),
        legend.text.align = 0)

grid.arrange(f2a,f2b, ncol = 2)

# ggsave(grid.arrange(f2a,f2b, ncol = 2),
#        filename = "figure2.pdf",
#        width = 13, height = 5, units = "in")

##____________________________________________________________
# b. Figure 3

# coef_sum_pf <- coef_sim_pf %>% 
#   mutate(term = as.character(term)) %>% 
#   group_by(year, term) %>% 
#   summarise(mn_cont = mean(cont),
#             lwr = quantile(cont, 0.05),
#             upr = quantile(cont, 0.95)) %>% 
#   filter(term != "N")

plasma_cols <- plasma(10)

codat_pf <- filter(coef_dat_pf, term != "N")

f3 <- ggplot(coef_sum_pf, aes(x = year, y = mn_cont, colour = term, fill = term)) +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_ribbon(aes(ymax = upr, ymin = lwr, y = NULL, colour = NULL),
              alpha = 0.15, show.legend = FALSE) + 
  geom_line(aes(fill = NULL)) +
  scale_color_manual(name = NULL, 
                     labels = c(expression(paste("Birth probability ", Delta, bold("f")[italic("t")])),
                                expression(paste("Survival probability ", Delta, bold("p")[italic("t")]))),
                     values = plasma_cols[c(7,1)],
                     aesthetics = c("colour", "fill")) +
  scale_x_continuous(breaks = seq(1970,2015, by = 5),
                     labels = c("1970", "", "1980", "", "1990",
                                "", "2000", "", "2010", ""),
                     limits = c(1970,2014), expand = c(0,0)) +
  labs(x = "Year", y = "Contribution to \n ln Population growth rate") + 
  theme_bw(base_size = 17) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        legend.position = c(0.22,0.13), 
        legend.text = element_text(size = 10),
        legend.background = element_blank(),
        legend.text.align = 0)
f3

# ggsave(f3,filename = "figure3.pdf",
#        width = 6.5, height = 5, units = "in")

##_____________________________END______________________________________________

