#NOTE you will need to download Rstan and brms` 
#RSTAN:: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
#BRMS:: https://github.com/paul-buerkner/brms
#There are alot of help brms vignettes online. I would start with the baseline brms vignette`

#### Load libraries ####
library(tidyverse)
library(brms)
library(ggplot2)
library(broom)
library(bayesplot)
library(glmmTMB)
library(loo)
library(lme4)
library(performance)
library(rethinking)
library(sjPlot)
library(tidybayes)
library(purrr)
library(forcats)
library(sjlabelled)
library(rstan)
library(magrittr)
library(car)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

#### Load your dataset ####
setwd("~/Desktop/Squirrels/FID_squirrel project/csvs")
FIDs <- read.csv("FID_Summary.csv", header = T, strip.white = T)

## The dataset ##

#Chelsea measured the flight initation distances of squirrels at her study site and additionally recorded their fleeing behaviour. Squirrels either fled into a burrow, or fled a distance away from an experimenter and typically observed the experimenter for some time from a distance. Chelsea is interested in finding out whether FIDs are repeatable, and whether or not the post-FID response is repeatable. Furthermore, she trapped squirrels over several days, and is interested in whether squirrel trappability is repeatbale and related to their FID response. 

#There are a number of components to the FID response. These components might be correlated, but it will be hard to test the repeatability of the overall response because of limitations of the data. However, we can breakdown the FID response into its individual components and test the repeatability of the individual components, and we can probably also look at correlation between the components aswell. 

#### QUESTIONS ####

#1. Is FID repeatable? What predicts FID response? For example, does home disturbance, trappability or the presence of conspecifics influence FID

#2. Does sex or life stage differ in repeatability/variance componenents

#3. Is the post-FID response repeatable?
# - do individuals that enter shelter take similar times to re-emerge from shelter
# - do individuals that take longer to emerge from shelter also have longer FID responses
# - is stop_look_distance or look_duration_sex repeatable and is related to FID response

#4. Is trappability repeatable 
# - this may be hard to do because we don't have a consistent daily metric to use for repeatability
# - Are in-trap behaviours repeatable (follow Petelle et al. 2013 and Reale et al. 2000)

## Variables in dataset ##

# `day`: day of the year the trial occurred 
# `area_tested`: location within the colony the FID occurred  
# `area_tested_disturbance_by_year`: disturbance rate at the location the FID occurred  
# `home_site`: Location where focal animal frequents the most within a season  
# `home_site_disturbance_year`: disturbance rate at the focal animal's home site  
# `life_stage`: juvenile (0) or adult (1) *  
# `sex`: male (0) or female (1)  *
# `focal_animal ` : The individual's ID  *
#   `exact_age`: the best guess age of the focal animal if known  
# `trial_num`: the number of times the focal animal has been tested at that date *  
#   `uid`: Another identifier for each squirrel  
# `date_before`: the date of the most recent fecal collection PRIOR to the FID trial  
# `diff_num_day_before`: number of days between fecal sample collection and FID trial  
# `CV`: the error within the well from the fecal coritosterone ELISA  
# `conc_ng_g_div50_before` : concentration of corticosterone in fecal sample  
# `ln_cort_before`: the logarithm of corticosterone  
# `altered_standard`: standard ran well(0) or standard did not run well (1)  
# `conspecifics_pres`: whether or not conspeficis were present during the FID measurement  
# `conspeficifics_5m_num`: the number of conspecifics within 5m of focal individual during FID trial  
# `substrate`: height of vegetation (high), low or dirt  
# `cover`: HV (1) or low and dirt (0)  
# `distance_to_safety_m`: distance in metres the focal animal was from the nearest burrow  
# `start_distance_m`: distance in metres from the walker to the focal animal at the beginning of the trial  
# `FID_m`: distance in metres at which the focal animal fled from the walker  
# `emerge_time`: the time after the trial in which the focal animal emerged from the burrow if they escaped into the burrow
# `stop_look`: focal animal ran a certain distance and watched the observer  
# `stop_look_burrow`: focal animal ran to a burrow and watched from beside it  
# `stop_look_distance_m`: the distance in metres in which the focal animal fled before watching  
# `peak_burrow`: focal animal ran to a burrow and partially entered and peaked its head to watch observer  
# `look_duration_sec`: the duration of time the focal animal watched the walker after they fled  
# `second_look_duration_sec`: the duration of time the focal animal watched the walker after they fled for a  second time  
# `go_into_burrow`: focal animal ran into a burrow after FID trial
# `out_of_site`: focal animal ran out of site after FID trial

#preliminary analysis also suggests the corticosterone also does not have an effect on FID. If we include corticosterone in the model, then we also have to remove individuals that we do not have this data for. 

#### GENERAL DATA EXPLORATION ####

glimpse(FIDs)

#Need to change the class of some of the variables
FIDs$year <- as.factor(FIDs$year)
FIDs$conspecifics_pres <- as.character(as.numeric(FIDs$conspecifics_pres))
FIDs$sex <- as.character(as.numeric(FIDs$sex))
FIDs$life_stage <- as.character(as.numeric(FIDs$life_stage))
FIDs$cover <- as.character(as.numeric(FIDs$cover))
FIDs$high_disturbance <- as.character(as.integer(FIDs$high_disturbance))

#create FID sqrt variable
FIDs$FID_sqrt = sqrt(FIDs$FID_m)

#> ---- Create subsets of the data ----
#I also like to create subsets of data for groups that I might want to test independently (e.g. sex or life stage)
juveniles = FIDs %>%
  filter(life_stage == '0')

adults = FIDs %>%
  filter(life_stage == '1')

males = FIDs %>%
  filter(sex == '0')

females = FIDs %>%
  filter (sex == '1')

sheltered = FIDs %>%
  filter(!is.na(time_until_emerge)) #141 observations

#also make a column in the FIDs data.frame separating squirrels that sheltered with those that didnt
FIDs = FIDs %>%
  mutate(sheltered = ifelse(!is.na(time_until_emerge), 'yes', 'no'))

#> ---- Sample size per individual ----
sample_size = FIDs %>%
  group_by(focal_animal) %>%
  summarise(n_samples = n())

#range of samples per individual
range(sample_size$n_samples, na.rm = T)

ggplot(sample_size, aes(x = n_samples)) + 
  geom_histogram(binwidth = 0.5) +
  scale_x_continuous(limits = c(0, 15),breaks = seq(0:15))+
  scale_y_continuous(limits = c(0,20), breaks = seq(0:20))+
  theme_bw()+
  ylab("Number of individuals")+
  xlab("Number of trials")


#> ---- Sample size per sex and age group ----
(sample_size_sex = FIDs %>%
    group_by(sex) %>%
    summarise(n_samples = length(unique(focal_animal))))

#   sex   n_samples
#   <chr>     <int>
# 1 0            25 #males
# 2 1            63 #females


(sample_size_age = FIDs %>%
    group_by(year, life_stage) %>%
    summarise(n_samples = length(unique(focal_animal))))

# year  life_stage n_samples
# <fct> <chr>          <int>
# 1 2018  0                 32
# 2 2018  1                 26
# 3 2019  0                 23
# 4 2019  1                 29

#More juveniles than adults tested. Given numbers are close we could consider at looking at repeatability seperately for each life-stage

(sample_size_age = FIDs %>%
    group_by(sex,life_stage) %>%
    summarise(n_samples = length(unique(focal_animal))))

# sex   life_stage n_samples
# <chr> <chr>          <int>
# 1 0     0                 18
# 2 0     1                 11
# 3 1     0                 37
# 4 1     1                 33


#> ---- Population summary statistics ----
(FIDs %>%
  summarise(n_individuals = length(unique(focal_animal)),
            mean_FID = mean(FID_m),
            std_FID = sd(FID_m)))
#88 unique individuals, and the average FID was around 8.5 metres with std of 4.76 metres

#> ---- Explore the FID data ----
hist(FIDs$FID_m)
#slightly right skewed
hist(sqrt(FIDs$FID_m))
#sqrt-transformation makes the data look closer to normally distributed


boxplot(FID_m ~ sex, data = FIDs)
hist(males$FID_m)
hist(females$FID_m)
# sex FIDs look similarly distributed, and have similar variation

boxplot(FID_m ~ life_stage, data = FIDs)
hist(juveniles$FID_m)
hist(adults$FID_m)
# life_stage FIDs look similarly distributed, but it looks like adults might have more variation

#We can also use a Cleveland dotcharts
dotchart(FIDs$FID_m)
dotchart(sqrt(FIDs$FID_m))

#let's run a basic model and look at the distribution of the residuals without the transformation
FID.model <- lmer(FID_m ~ sex + life_stage + conspecifics_pres + distance_to_safety_m + home_site_disturbance_by_year + trial_num + start_distance_m + trap_prop + (1|focal_animal) + (1|walker), data = FIDs)
drop1(FID.model, test = 'Chi') 
mod.resid = resid(FID.model)
qqnorm(mod.resid)
qqline(mod.resid) # a bit of right skew as our normal probability plot shows
hist(mod.resid) # the residual looks normally distributed, and centered around 0
plot(FIDs$FID_m, mod.resid, ylab = 'Residuals', xlab = 'FID_m')
abline(0,0)# a bit linear trend in the residuals


#now with the square-root transformation
sqrtFID.model <- lmer(FID_sqrt ~ sex + life_stage + conspecifics_pres + distance_to_safety_m + home_site_disturbance_by_year + trial_num + start_distance_m + trap_prop + (1|focal_animal) + (1|walker), data = FIDs)
drop1(sqrtFID.model, test = 'Chi') 
summary(sqrtFID.model)
mod.resid = resid(sqrtFID.model)
qqnorm(mod.resid)
qqline(mod.resid) # improves the fit, with some outlier points
hist(mod.resid)#model residuals looks a little skewed
plot(sqrt(FIDs$FID_m), mod.resid, ylab = 'Residuals', xlab = 'sqrt_FID_m')
abline(0,0)# less of a linear trend, but still somewhat present. 

#Given that mixed-effects are relatively robust, i think it is fine to use a Gaussian error distribution as the data distribution looks relatively normal. 

#The other thing we should do is check for collinearity between our predictor variables of interest. To do this we can look at the variance inflation factor
car::vif(sqrtFID.model)
#all values are near one which is good, suggesting that we have now issues with collinearity

library(Hmisc)
#filter data to only include variables in my model
corr.check = FIDs %>%
  select(distance_to_safety_m, home_site_disturbance_by_year, start_distance_m, trap_prop, area_tested_disturbance_by_year)
round(cor(as.matrix(corr.check), method = 'spearman'),2)
#looks like a negative association between home_site_disturbance and start_distance_m
scatterplot(corr.check$home_site_disturbance_by_year, corr.check$start_distance_m)
scatterplot(corr.check$home_site_disturbance_by_year, corr.check$area_tested_disturbance_by_year)

#> ----  Model selection for random effects using lmer ----
#Should I include area-tested? Maybe not because it can be soaked up by other variables (e.g distance from burrow etc.)
library(MuMIn)

#Here I am just comparing 3 models to test the random-effects structure of the model that we ultimately use
#When comparing random effects, always use REML = TRUE

RAND1 <- lm(FID_sqrt ~ 1, data = FIDs, REML = TRUE)
RAND2 <- lmer(FID_sqrt ~ 1 + (1|focal_animal), data = FIDs, REML = TRUE)
RAND3 <- lmer(FID_sqrt ~ 1 + (1|walker), data = FIDs, REML = TRUE)
RAND4 <- lmer(FID_sqrt ~ 1 + (1|focal_animal) + (1|walker), data = FIDs, REML = TRUE)

model.sel(RAND1, RAND2, RAND3, RAND4)
# looks like the model with both random factors is important (i.e ~87% certain that the best model contains this random effects structure)

#### ANALYSIS WITH BRMS ####
#Here I create a brms models with both age classes, I then compare models with and without certain random effects to determine what is the best random-effects structure for our model (similar to above but using brms), and to determine whether we have significant among-individual variation in FIDs. I then calculate FID repeatability, as well as walker repeatability. 

#The easiest analysis to do would be to look at FID repeatability (i.e. are individuals consistently different in their FIDs). 

#Both life-stages
#Note I decided not to include day or time in the model. I didn't include time because it looks like all observations were made in the morning, and I didn't include day because I can't really see how this is important. 

#> ---- Model1: General model ----
FID.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + cover + distance_to_safety_m + start_distance_m*home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  #intercept prior
  set_prior("normal(0,20)", class = "Intercept"),
  #ID random intercept prior
  set_prior("cauchy(5,15)", class = "sd"),
  #residual priors
  set_prior("cauchy(5,15)", class = "sigma"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"))

#run the model
FIDs.model1 <- brm(FID.bf,
                   data = FIDs,
                   family = gaussian,
                   cores = 4, #this cores used on your computer, cores should be equal to the number of chains
                   chains = 4, #number of chains the model runs for
                   warmup = 1000, #warmup (aka burnin) before the posterior is built
                   iter = 10000, #number of interations per chain (can lower potentially if you want the model to run faster)
                   prior = prior1,
                   control = list(adapt_delta = 0.99)) 


#note that you should run the model first WITHOUT this control = list(adapt_delta = 0.99) argument. If the model tells you that adapt_delta should be increased, then run this model again with the argument - the warning message also provides a link which you can go to, to read more about it. Don't run the model with this argument if you don't have to because it causes the model to take longer to run. 

#we now add a model comparison information critarion (WAIC). WAIC stands for widely applicable information criterion and is essentially a generalisation of AIC for use within a Baysesian framework. LOO is another form of information criterion used in Bayesian statistics, but this is more compuationally intensive so I don't like to use it.
FIDs.model1 <- add_criterion(FIDs.model1, 'loo')

tab_model(FIDs.model1, pred.labels = c("Intercept", "year", "sex (female)", "life stage (adult)", "trial number", "conspecifics present", "cover", "distance to safety", "start distance", "home disturbance score", "trappability", "start dist * home site disturb"), show.se = TRUE, show.icc = FALSE, show.ngroups = FALSE, title = "Flight initiation distance", dv.labels = "")

#Model summary. If you want to change the CIs you can add the argument prob = 0.89 (e.g. 89% CI). The default is 95% CIs
summary(FIDs.model1)
#summary(FIDs.model1, prob = '0.89')
#Note that we want all Rhat values to be close 1 and below 1.1. If this is the case then our chains have converged properly which is good. 

#save the model (I recommend doing this after you run a Bayesian model to avoid having to re-run it again if R crashes)
#set the directory where you want to save the model (I would recommend making a folder called model outputs (or something like this))
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs")
saveRDS(FIDs.model1, file = 'FIDs.model1')
##use the below code the re-load the model when you need to
#FIDs.model1 = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model1")

#diagnostics of the model. you are looking for good mixing (i.e. the lines showing a consistent pattern) and a most normal distribution of the parameter estimates. IF mixing is bad, likely need more interations or better priors. Plots take a while to load, but you can use the arrows on the plotting device (on the bottom right) to circulate through the graphs

plot(FIDs.model1, ask = FALSE) #looks good, but the distribution for sd_walker_Intercept is skewed. Depending on whether or not walker explains much we might remove it to improve model fit

#This model diagnostic tool. It allows you too look at how well the model fits your data. 
pp_check(FIDs.model1) # this looks pretty good

#often good to compare our Bayesian summary predictions to that of a frequentist model. Predictions should be similar if we are using uninformative priors (because frequentist doesn't use prior information at all, an uninfomrative prior essentially tells the models that we don't have any/much prior information).

FID.tmb <- glmmTMB(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + cover + distance_to_safety_m + start_distance_m*home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'gaussian', data = FIDs)
summary(FID.tmb) #if you look at the model estimates they should be similar which in this case is good. The variance estimates should also be similar. E.g. sigma (residual variance in the brms model) should be close to = to the residual std.dev, which it is 
model.resid = resid(FID.tmb)
qqnorm(model.resid)
qqline(model.resid) # not bad
plot(FIDs$FID_sqrt, model.resid)
abline(0,0)

#We might also want to compare models with and without our chosen random effects to determine how important they are to our model (like we did in the lmer above) - this is important particularly for walker which did some weird things

#we can use the update function to quickly change and re-run the model. This is faster than writing out the entire model again and running it

#model without (1|walker)
FIDs.model2 = update(FIDs.model1, formula = ~.  -(1|walker), newdata = FIDs, warmup = 1000, iter = 10000, cores = 4)
FIDs.model2 <- add_criterion(FIDs.model2, 'loo', overwrite = T)

#model without (1|focal_animal)
FIDs.model3 = update(FIDs.model1, formula = ~.  -(1|focal_animal), newdata = FIDs, warmup = 1000, iter = 10000, cores = 4)
FIDs.model3 <- add_criterion(FIDs.model3, 'loo', overwrite = T)

#model without both (1|focal_animal) and (1|walker)
FIDs.model4 = update(FIDs.model1, formula = ~.  -(1|focal_animal) - (1|walker), newdata = FIDs, warmup = 1000, iter = 10000, cores = 4)
FIDs.model4 <- add_criterion(FIDs.model4, 'loo', overwrite = T)

model.comp = loo_compare(FIDs.model1, FIDs.model2, FIDs.model3, FIDs.model4, criterion = 'waic')
print(model.comp, simplify = F)
# elpd_diff se_diff elpd_waic se_elpd_waic p_waic se_p_waic waic   se_waic
# FIDs.model1    0.0       0.0  -331.0      16.8         63.9    5.1     662.1   33.5 
# FIDs.model2   -5.8       4.7  -336.8      16.2         59.9    4.7     673.7   32.3 
# FIDs.model3  -67.0      11.6  -398.0      14.8         17.9    1.4     796.0   29.7 
# FIDs.model4  -86.4      12.6  -417.4      14.2         11.9    1.0     834.9   28.5 

#It looks like both random effects are important, but in particular (focal_animal) seems to be important - which is good because this suggests that there is significant among-individual variation in FIDs in our population even when accounting for certain fixed effects

#remove non-significant interactions
FID2.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + cover + distance_to_safety_m + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'Gaussian')


#run the model
FIDs.model2 <- brm(FID2.bf,
                   data = FIDs,
                   family = gaussian,
                   cores = 4, 
                   chains = 4, 
                   warmup = 1000, 
                   iter = 10000, 
                   prior = prior1,
                   control = list(adapt_delta = 0.99)) 

FIDs.model2 <- add_criterion(FIDs.model2, 'loo', moment_match = TRUE)

summary(FIDs.model2)

tab_model(FIDs.model2, pred.labels = c("Intercept", "year", "sex (female)", "life stage (adult)", "trial number", "conspecifics present", "cover", "distance to safety", "start distance", "home disturbance score", "trappability"), show.se = TRUE, show.icc = FALSE, show.ngroups = FALSE, title = "Flight initiation distance", dv.labels = "")

setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs")
saveRDS(FIDs.model2, file = 'FIDs.model2')
##use the below code the re-load the model when you need to
#FIDs.model2 = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model2")

model.comp = loo_compare(FIDs.model1, FIDs.model2,criterion = 'loo')
print(model.comp, simplify = F)
# elpd_diff se_diff elpd_loo se_elpd_loo p_loo  se_p_loo looic  se_looic
# FIDs.model2    0.0       0.0  -325.8     16.8        66.8    5.1    651.5   33.5  
# FIDs.model1   -2.3       1.3  -328.0     16.3        68.6    5.3    656.0   32.6 

#Model with interactions isn't as good a model fit.

#> ---- Extract the variance components and calculate repeatability ----
library(tidybayes)
get_variables(FIDs.model2)[1:30] #This just tells us what parameters we can pull from the model
#Extract the variance components
post.data = FIDs.model2 %>% spread_draws(sd_focal_animal__Intercept,sd_home_site__Intercept, sd_walker__Intercept, sigma) 
#Among-individual variance
post.data$Va_focal_animal = post.data$sd_focal_animal__Intercept^2 # we square it because it is currently a standard deviation, and by squaring this value we turn it into a variance
post.data$Va_walker = post.data$sd_walker__Intercept^2 #same as above
post.data$Va_site = post.data$sd_home_site__Intercept^2

#Within-individual variance 
post.data$Vw = post.data$sigma^2

#Repeatability
post.data = post.data %>%
  dplyr::mutate(Runadj_focal_animal = Va_focal_animal/(Va_focal_animal + Vw),
                Radj_focal_animal = Va_focal_animal/(Va_focal_animal + Va_site + Va_walker + Vw),
                R_site = Va_site/(Va_focal_animal + Va_site + Va_walker + Vw),
                R_walker = Va_walker/(Va_walker + Va_site + Va_focal_animal + Vw))

#Focal animal repeatability
hist(post.data$Runadj_focal_animal) #histrogram of the repeatabilities obtained from 36000 posterior distributions. Looks like the mean ~0.40
mean(post.data$Runadj_focal_animal)
#[1] 0.2514121
rethinking::HPDI(post.data$Runadj_focal_animal, prob = 0.95)
# |0.95     0.95| 
#   0.1330146 0.3787393 

#Adjusted focal animal repeatability (i.e. measure also takes into account variation arising from observer differences)
hist(post.data$Radj_focal_animal)
mean(post.data$Radj_focal_animal)
#0.1651531
rethinking::HPDI(post.data$Radj_focal_animal, prob = 0.95)
# |0.95      0.95| 
# 0.06769403 0.27744517  

#Note that all of these repeatabilities are an adjusted repeatability estimate because we have taken into account variation arising from the experimental design.

#Site repeatability
mean(post.data$R_site)
#0.2127452
rethinking::HPDI(post.data$R_site, prob = 0.95)

#Observer repeatability
hist(post.data$R_walker)
mean(post.data$R_walker)
#0.15
rethinking::HPDI(post.data$R_walker, prob = 0.95)
# |0.95       0.95| 
# 0.009608298 0.354937052 

#Observer isnt repeatable. This may be because 1. there is not enough variation among individuals (which is good - i.e. observers are not consistently different from each other) and/or 2. there is high within-individual differences in observer (which is also good, because squirrel FIDs are different and thus observers should be measuring these differences - as long as there is no bias in what squirrels an observer is recording).

#> ---- Model output ----
#Here is an easy way to extract the population-level effects as a dataframe from a model in R. You can easily save this as .txt file and open in Word to make a table for your manuscript
library(sjPlot)
library(sjmisc)

#tab_model is a really cool function for making tables easily. There is a helpful tutorial online in using this function that I would recommend that you check out. You have to save a tab_model as an html file, but this can be opened in word for editing
#https://strengejacke.github.io/sjPlot/articles/tab_mixed.html
#https://strengejacke.github.io/sjPlot/articles/tab_model_estimates.html

(FID.model1.tab = tab_model(FIDs.model2, 
                           title = NULL, 
                           show.se = TRUE, 
                           show.icc = FALSE, 
                           digits = 3, 
                           show.re.var = TRUE, 
                           pred.labels = c("Intercept", "Year (2019)", "Sex (male)", "Life stage (adult)", "Trial number", "Conspecifics (present)", "Vegetation cover (high)", "Distance to shelter", "Starting distance", "Home site disturbance", "Trappability"), 
                           dv.labels = "Flight initiation distance", 
                           string.pred = "Coeffcient",
                           string.se = "SE"))



#> ---- Model conditional-effects and graphing  ----
#We can also easily graph the model conditional effects (i.e. the fixed effects). That is the model predicted effects of each of our fixed effects when all other fixed effects are held at a constant values (usually the mean if a continuous variable).
library(ggplot2)
library(extrafont)
library(RColorBrewer)

#Extract the conditional effects
cond <- conditional_effects(FIDs.model2, robust = FALSE)

#As an example of going to use sex
cond.sex <- cond$sex

#set my graphing parameters for use in ggplot (this obviously an optional step)
figurefont <- "Helvetica"
pd <- position_dodge(0.1)
margin = theme(plot.margin = unit(c(0.5, 1, 0.5, 1), "cm"))
themeof <- theme(
  text = element_text(family = figurefont,
                      face = "plain", color = 'black'),
  axis.text = element_text(size = 12, color = 'black'),
  axis.title = element_text(size = 14),
  axis.title.y = element_text(vjust = 3.5),
  axis.text.x = element_text(vjust = -0.75),
  legend.key.width = unit(1.25, "lines"), legend.key.size = unit(1.5, "lines"),
  legend.text = element_text(size = 14), legend.position = c(1.15, 0.5),
  legend.title = element_blank())

(sex_plot = ggplot(data = cond.sex, aes(x = sex, y = estimate__))+
    geom_point(data = FIDs, aes(y = FID_sqrt, x = sex, fill = sex), alpha = 0.3, size = 2, color = 'black', pch = 21)+
    geom_errorbar(aes(ymin = lower__, ymax = upper__), width = .2, position = pd, color = 'black', lwd = 0.8)+
    geom_point(aes(fill = sex), color = 'black', pch = 21, position = pd, size = 4) +
    ylab("Flight initiation distance (m)")+
    xlab("")+
    scale_x_discrete(labels = c("0" = "Male", "1" = "Female"))+
    theme_classic()+
    themeof+ theme(legend.position = "none", axis.ticks.x = element_blank()) + margin)

# Here is a shortcut for looking at the conditional effects without all the editing - note that these graphs are not weighted by the raw data like above, so the effects might look stronger than they 'technically are' --- however you can also edit graphs from these baseline plots
cond.plots = plot(conditional_effects(FIDs.model2, robust = FALSE), plot = F)
cond.plots$start_distance_m 

#edit graph using ggplot functions

start_dist.plot = cond.plots$start_distance_m + #geom_point(data = FIDs, aes(y = FID_sqrt, x = start_distance_m), alpha = 0.5, inherit.aes = F, color = 'black')+
  geom_line(size = 1.5, color = "black")+
  geom_ribbon(color  ="blue", fill = "blue", alpha = 0.2)+
  theme_classic() + 
  ylab("Flight initiation distance (m)") + 
  xlab("Starting distance from squirrel (m)") + 
  themeof + 
  margin + 
  theme(axis.title.x = element_text(vjust = -1))

trapping.plot = cond.plots$trap_prop + 
  geom_line(size = 1.5, color = "black")+
  #geom_point(data = FIDs, aes(y = FID_sqrt, x = trap_prop), alpha = 0.5, inherit.aes = F, color = 'black')+
  geom_ribbon(color  ="blue", fill = "blue", alpha = 0.2)+
  theme_classic() + 
  ylab("Flight initiation distance (m)") + 
  xlab("Proportion of days trapped") + 
  themeof + 
  margin + 
  theme(axis.title.x = element_text(vjust = -1))

home_site.plot = cond.plots$home_site_disturbance_by_year+ 
  geom_line(size = 1.5, color = "black")+
  geom_ribbon(color  ="blue", fill = "blue", alpha = 0.2)+
  theme_classic() + 
  ylab("Flight initiation distance (m)") + 
  xlab("Home site disturbance score") + 
  themeof + 
  margin + 
  theme(axis.title.x = element_text(vjust = -1))



#combine plots into one figure
library(gridExtra)
fullmodel_plots = grid.arrange(sex_plot, start_dist.plot, trapping.plot, home_site.plot, nrow = 2, ncol = 2)
setwd("~/Desktop/Squirrels/FID_squirrel project/plots")
ggsave(fullmodel_plots, device = "pdf", width = 8, height = 6.5, units = "in", family = figurefont, filename = "fullmodel_plots.pdf")

#### LIFE-STAGE repeatability ####

#In this next step, I am going to allow both the among- and within-individual variation vary among life_stage so that I can compute a seperate repeatability for adults and juveniles. To test for this, we first can compare models where the variances are assumed to be equal across life_stage (i.e. we assume they are the same) to models where we allow the variances to vary across life_stage (i.e. we allow them to be different). So below I construct 4 models and then we compare them to see which is the strongest model. The four models are 
# 1. Equal variance model (model 1)
# 2. A model where only among-individual variance (Va) is calculated seperately for each life_stage (model 2)
# 3. A model where only within-individual variances (Vw) in calculated seperately for each life_stage (model 3)
# 4. A model where Va and Vw is calculated seperately for each life_stage (model 4)

# If we find that model 1 is the best, then this tell us that variation (and thus repeatability) does not differ between life_stage (i.e. there is equal variation amongst life stages).

# our current FIDs.model1 will be our null model (i.e. equal variance model)

#Note that the alternative to the above option is to run seperate univariate models for each life stage

#> ---- Model 1: Null model ----
FID.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num
            + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial)
            + (1|focal_animal) + (1|walker), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  #intercept prior
  set_prior("normal(0,20)", class = "Intercept"),
  #ID random intercept prior
  set_prior("cauchy(5,15)", class = "sd"),
  #residual priors
  set_prior("cauchy(5,15)", class = "sigma"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"))

#run the model
FIDs.lifestage.null <- brm(FID.bf,
                   data = FIDs,
                   family = gaussian,
                   cores = 4, 
                   chains = 4, 
                   warmup = 1000, 
                   iter = 10000, 
                   prior = prior1,
                   control = list(adapt_delta = 0.99)) 

FIDs.lifestage.null <- add_criterion(FIDs.lifestage.null, 'loo', moment_match = TRUE)
summary(FIDs.lifestage.null)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/life_history_models")
saveRDS(FIDs.lifestage.null, file = 'FIDs.lifestage.null')


#> ---- Model 2: only among-individual variance (Va) is calculated seperately for each life_stage ----
FID.lifestage.among = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (0+life_stage||focal_animal) + (1|walker), family = 'Gaussian')
#notice the new term (0+life_stage||focal_animal) which specifies that we now want the random intercept to vary among life_stage

#Setup the prior - this is an uninformative prior
prior1 <- c(
  set_prior("normal(0,20)", class = "Intercept"),
  set_prior("cauchy(5,15)", class = "sd"),
  set_prior("cauchy(5,15)", class = "sigma"),
  set_prior("normal(0,20)", class = "b"))

#run the model
FIDs.model.lifestage.among <- brm(FID.lifestage.among,
                        data = FIDs,
                        family = gaussian,
                        cores = 4, 
                        chains = 4, 
                        warmup = 1000, 
                        iter = 10000, 
                        prior = prior1,
                        control = list(adapt_delta = 0.99)) 

FIDs.model.lifestage.among <- add_criterion(FIDs.model.lifestage.among, 'loo', moment_match = TRUE)
summary(FIDs.model.lifestage.among)
#you will notice in the model summary under Group-level effects that there is sd now estimated for each life_stage

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/life_history_models")
saveRDS(FIDs.model.lifestage.among, file = 'FIDs.model.lifestage.among')

#load the model
#FIDs.model.among = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model.among")

#Check model diagnostics
plot(FIDs.model.lifestage.among, ask = FALSE) #look good
pp_check(FIDs.model.lifestage.among, resp = "FIDsqrt") # looks good

#> ---- Model 3: only within-individual variances (Vw) in calculated seperately for each life_stage ----
FID.lifestage.within = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), sigma ~ 0+life_stage, family = 'Gaussian')

get_prior(FID.within, data = FIDs)

prior1 <- c(
  #Intercept
  set_prior("normal(0,20)", class = "Intercept"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"),
  #sd
  set_prior("cauchy(5,15)", class = "sd"))

#run the model
FIDs.model.lifestage.within <- brm(FID.lifestage.within,
                         data = FIDs,
                         family = gaussian,
                         cores = 4, 
                         chains = 4, 
                         warmup = 1000, 
                         iter = 10000, 
                         prior = prior1,
                         control = list(adapt_delta = 0.99)) 

FIDs.model.lifestage.within <- add_criterion(FIDs.model.lifestage.within, 'loo', moment_match = TRUE)
summary(FIDs.model.lifestage.within)
#you will notice in the model summary under Population-level effects that there is now a sigma coefficient estimated for each life_stage

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/life_history_models")
saveRDS(FIDs.model.lifestage.within, file = 'FIDs.model.lifestage.within')

#load the model 
#FIDs.model.within = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model.within")

#Check model diagnostics
plot(FIDs.model.lifestage.within, ask = FALSE) #look good
pp_check(FIDs.model.lifestage.within, resp = "FIDsqrt") # looks good

#> ---- Model 4: Va and Vw is calculated seperately for each life_stage ----
FID.lifestage.bothvar = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (0+life_stage||focal_animal) + (1|walker), sigma ~ 0+life_stage, family = 'Gaussian')

get_prior(FID.lifestage.bothvar, data = FIDs)

prior1 <- c(
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"),
  #sd
  set_prior("cauchy(5,15)", class = "sd"))

FIDs.model.lifestage.bothvar <- brm(FID.lifestage.bothvar,
                          data = FIDs,
                          family = gaussian,
                          cores = 4, 
                          chains = 4, 
                          warmup = 1000, 
                          iter = 10000, 
                          prior = prior1,
                          control = list(adapt_delta = 0.99)) 

FIDs.model.lifestage.bothvar <- add_criterion(FIDs.model.lifestage.bothvar, 'loo', moment_match = 'TRUE')
summary(FIDs.model.lifestage.bothvar)
#Now we have a sigma and sd estimated for both life stages.

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/life_history_models")
saveRDS(FIDs.model.lifestage.bothvar, file = 'FIDs.model.lifestage.bothvar')

#load the model 
#FIDs.model.bothvar = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model.bothvar")

#Check model diagnostics
plot(FIDs.model.bothvar, ask = FALSE) #look good
pp_check(FIDs.model.bothvar, resp = "FIDsqrt") # looks good

#> ---- Model comparison to compare our 4 candidate models ----
model.comp = loo_compare(FIDs.model1, FIDs.model.among, FIDs.model.within, FIDs.model.bothvar, criterion = 'waic')
print(model.comp, simplify = T)
# elpd_diff se_diff elpd_waic se_elpd_waic p_waic se_p_waic waic   se_waic
# FIDs.model.among      0.0       0.0  -329.2      15.5         66.3    5.0     658.5   31.0 
# FIDs.model.bothvar   -0.4       2.5  -329.7      16.6         68.7    5.7     659.4   33.2 
# FIDs.model1          -1.8       4.0  -331.0      16.8         63.9    5.1     662.1   33.5 
# FIDs.model.within    -2.0       5.5  -331.2      18.1         66.0    5.9     662.5   36.1 

#It looks like there might be some differences in the among-individual variation within life-stages, but there is no clear best model suggesting that any differences in variation are not large

#> ---- Aggregate variance and repeatability for each life_stage ----
get_variables(FIDs.model.bothvar)[1:30]
post.data = FIDs.model.bothvar %>% spread_draws(sd_focal_animal__life_stage0,sd_focal_animal__life_stage1, b_sigma_life_stage0, b_sigma_life_stage1)

#Among-individual variance for each life_stage
post.data$Va_lifestage_0 <- post.data$sd_focal_animal__life_stage0^2
post.data$Va_lifestage_1 <- post.data$sd_focal_animal__life_stage1^2

#Within-individual variance for each life stage
post.data$Vw_lifestage_0 = exp(post.data$b_sigma_life_stage0)^2
post.data$Vw_lifestage_1 = exp(post.data$b_sigma_life_stage1)^2

#Repeatability
post.data = post.data %>%
  dplyr::mutate(R_lifestage_0 = Va_lifestage_0/(Va_lifestage_0 + Vw_lifestage_0),
                R_lifestage_1 = Va_lifestage_1/(Va_lifestage_1 + Vw_lifestage_1))

#Juvenile repeatability
hist(post.data$R_lifestage_0)
mean(post.data$R_lifestage_0)
# 0.2841323
rethinking::HPDI(post.data$R_lifestage_0, prob = 0.95)
# |0.95     0.95| 
#   0.1321628 0.4393982

#Adult repeatability
hist(post.data$R_lifestage_1)
mean(post.data$R_lifestage_1)
# 0.4760768
rethinking::HPDI(post.data$R_lifestage_1, prob = 0.95)
# |0.95     0.95| 
#   0.3229095 0.6357703 

#So it looks like adults are more repeatable than juveniles (albeit not significantly if we look at overlap of confidence intervals)

# Remember that our model selection seemed to suggest that among-variation was not equal across life-stages, whereas within-individual variance seems to be similar. We can look at this.

#Among-individual variance
FID.Va = post.data %>%
  dplyr::select(starts_with("Va")) %>%
  pivot_longer(cols = starts_with("Va"),
               names_to = 'Life_stage',
               names_prefix = "Va_",
               values_to ="Estimate")

(FID.Va = FID.Va %>%
  dplyr::group_by(Life_stage)%>%
  dplyr::summarise(Va = round(mean(Estimate), 3),
                   lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                   upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
  as.data.frame())
# Life_stage    Va lowerCI upperCI
# 1 lifestage_0 0.095   0.035   0.167
# 2 lifestage_1 0.286   0.135   0.460

#More among-individual variation in adults. 

(FID.Va.plot = ggplot(FID.Va, aes(x = Life_stage, y = Va, color = Life_stage)) +
    geom_errorbar(aes(ymin = lowerCI, ymax = upperCI), width = 0, position = pd,color = 'black')+
    geom_point(aes(fill = Life_stage), color = 'black', pch = 22 , position = pd, size = 3.5, stroke = 0.75)+
    ylab(label = "Among-individual variation") +
    xlab("")+
    scale_x_discrete(labels = c("lifestage_0" = "Juvenile", "lifestage_1" = "Adult"))+
    coord_flip()+
    theme_classic() +
    themeof +
    theme(axis.title.x = element_text(vjust = -1), legend.position = "none"))

#Within-individual variance
FID.Vw = post.data %>%
  dplyr::select(starts_with("Vw")) %>%
  pivot_longer(cols = starts_with("Vw"),
               names_to = 'Life_stage',
               names_prefix = "Vw_",
               values_to ="Estimate")

(FID.Vw = FID.Vw %>%
    dplyr::group_by(Life_stage)%>%
    dplyr::summarise(Vw = round(mean(Estimate), 3),
                     lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                     upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
    as.data.frame())

# Life_stage    Vw lowerCI upperCI
# 1 lifestage_0 0.234   0.182   0.290
# 2 lifestage_1 0.304   0.237   0.378

#As our model comparison suggested, not much difference in within-individual variation between life-stages, but it seems that there simply more variation in general within adults

(FID.Vw.plot = ggplot(FID.Vw, aes(x = Life_stage, y = Vw, color = Life_stage)) +
    geom_errorbar(aes(ymin = lowerCI, ymax = upperCI), width = 0, position = pd,color = 'black')+
    geom_point(aes(fill = Life_stage), color = 'black', pch = 22 , position = pd, size = 3.5, stroke = 0.75)+
    ylab(label = "Within-individual variation") +
    xlab("")+
    scale_x_discrete(labels = c("lifestage_0" = "Juvenile", "lifestage_1" = "Adult"))+
    coord_flip()+
    theme_classic() +
    themeof +
    theme(axis.title.x = element_text(vjust = -1), legend.position = "none"))

#> ---- Calculate delta (magnitude of differences) in the variance components ----
# We can also calcuate the magnitude of difference (i.e. the effect size) in variance components between both life-stages.
#delta Va and Vw
post.data$delta_Va = with(post.data, Va_lifestage_0 - Va_lifestage_1)
post.data$delta_Vw = with(post.data, Vw_lifestage_0 - Vw_lifestage_1)
post.data$delta_R = with(post.data, R_lifestage_0 - R_lifestage_1)

#Difference in repeatability
mean(post.data$delta_R)
#-0.1919445
rethinking::HPDI(post.data$delta_R, prob = 0.95)
# |0.95       0.95| 
#   -0.40578252  0.02702899 

#Difference in among-individual variability
mean(post.data$delta_Va)
#-0.1906455
rethinking::HPDI(post.data$delta_Va, prob = 0.95)
# |0.95       0.95| 
# -0.37662400 -0.01706939

#Difference in within-individual variability
mean(post.data$delta_Vw)
#-0.06791337
rethinking::HPDI(post.data$delta_Vw, prob = 0.95)
# |0.95       0.95| 
# -0.16106467  0.02142639 


#### SEX repeatability ####

#We are now going to do essentially the same thing as we did with life_stage, but now we going to do it for sex instead.

#> ---- Null Model ----
FID.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num
            + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial)
            + (1|focal_animal) + (1|walker), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  #intercept prior
  set_prior("normal(0,20)", class = "Intercept"),
  #ID random intercept prior
  set_prior("cauchy(5,15)", class = "sd"),
  #residual priors
  set_prior("cauchy(5,15)", class = "sigma"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"))

#run the model
FIDs.sex.null <- brm(FID.bf,
                           data = FIDs,
                           family = gaussian,
                           cores = 4, 
                           chains = 4, 
                           warmup = 1000, 
                           iter = 10000, 
                           prior = prior1,
                           control = list(adapt_delta = 0.99)) 

FIDs.sex.null <- add_criterion(FIDs.sex.null, 'loo')
summary(FIDs.lifestage.null)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_models")
saveRDS(FIDs.sex.null, file = 'FIDs.sex.null')


#> ---- Model 2: only among-individual variance (Va) is calculated seperately for each sex ----
FID.sex.among = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num +  start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (0+sex||focal_animal) + (1|walker), family = 'Gaussian')

get_prior(FID.sex.among, data = FIDs)

#Setup the prior - this is an uninformative prior
prior1 <- c(
  set_prior("normal(0,20)", class = "Intercept"),
  set_prior("cauchy(5,15)", class = "sd"),
  set_prior("cauchy(5,15)", class = "sigma"),
  set_prior("normal(0,20)", class = "b"))

#run the model
FID.sexmodel.among <- brm(FID.sex.among,
                        data = FIDs,
                        family = gaussian,
                        cores = 4, 
                        chains = 4, 
                        warmup = 1000, 
                        iter = 10000, 
                        prior = prior1,
                        control = list(adapt_delta = 0.99))

summary(FID.sexmodel.among)
FID.sexmodel.among <- add_criterion(FID.sexmodel.among, 'loo', moment_match = TRUE)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_models")
saveRDS(FID.sexmodel.among, file = 'FIDs.sexmodel.among')
#FID.sexmodel.among = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_models/FIDs.sexmodel.among")

#Check model diagnostics
plot(FID.sexmodel.among, ask = FALSE) #look good
pp_check(FID.sexmodel.among, resp = "FIDsqrt") # looks good

#> ---- Model 3: only within-individual variances (Vw) in calculated seperately for each life_stage ----
FID.sex.within = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num +  start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), sigma ~ 0+sex, family = 'Gaussian')

get_prior(FID.sex.within, data = FIDs)

prior1 <- c(
  #Intercept
  set_prior("normal(0,20)", class = "Intercept"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"),
  #sd
  set_prior("cauchy(5,15)", class = "sd"))

#run the model
FIDs.sexmodel.within <- brm(FID.sex.within,
                         data = FIDs,
                         family = gaussian,
                         cores = 4, 
                         chains = 4, 
                         warmup = 1000, 
                         iter = 10000, 
                         prior = prior1,
                         control = list(adapt_delta = 0.99)) 

FIDs.sexmodel.within <- add_criterion(FIDs.sexmodel.within, 'loo', moment_match = TRUE)
summary(FIDs.sexmodel.within)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_models")
saveRDS(FIDs.sexmodel.within, file = 'FIDs.sexmodel.within')

#Check model diagnostics
plot(FIDs.sexmodel.within, ask = FALSE) #look good
pp_check(FIDs.sexmodel.within, resp = "FIDsqrt") # looks good

#> ---- Model 4: Va and Vw is calculated seperately for each life_stage ----
FID.sex.bothvar = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num +  start_distance_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (0+sex||focal_animal) + (1|walker), sigma ~ 0+sex, family = 'Gaussian')

get_prior(FID.sex.bothvar, data = FIDs)

prior1 <- c(
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"),
  #sd
  set_prior("cauchy(5,15)", class = "sd"))

FIDs.sexmodel.bothvar <- brm(FID.sex.bothvar,
                          data = FIDs,
                          family = gaussian,
                          cores = 4, 
                          chains = 4, 
                          warmup = 1000, 
                          iter = 10000, 
                          prior = prior1,
                          control = list(adapt_delta = 0.99)) 

FIDs.sexmodel.bothvar <- add_criterion(FIDs.sexmodel.bothvar, 'loo', moment_match = TRUE)
summary(FIDs.sexmodel.bothvar)
#Now we have a sigma and sd estimated for both life stages.

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_models")
saveRDS(FIDs.sexmodel.bothvar, file = 'FIDs.sexmodel.bothvar')

#Check model diagnostics
plot(FIDs.model.bothvar, ask = FALSE) #look good
pp_check(FIDs.model.bothvar, resp = "FIDsqrt") # looks good

#> ---- Model comparison to compare our 4 candidate models ----
model.comp = loo_compare(FIDs.model1, FIDs.sexmodel.among, FIDs.sexmodel.within, FIDs.sexmodel.bothvar, criterion = 'waic')

print(model.comp, simplify = F)
#                       elpd_diff se_diff elpd_waic se_elpd_waic p_waic se_p_waic waic  
# FIDs.sexmodel.among      0.0       0.0  -328.0      16.4         58.4    4.5     656.1
# FIDs.sexmodel.bothvar   -1.0       1.3  -329.0      16.3         60.2    4.6     658.0
# FIDs.model1             -2.0       2.0  -330.1      16.6         63.2    5.0     660.1
# FIDs.sexmodel.within    -3.0       2.9  -331.0      16.5         65.0    5.3     662.1
# se_waic
# FIDs.sexmodel.among     32.7 
# FIDs.sexmodel.bothvar   32.5 
# FIDs.model1             33.3 
# FIDs.sexmodel.within    33.1 

#> ---- Aggregate variance and repeatability for each life_stage ----
get_variables(FIDs.sexmodel.bothvar)[1:15]
post.data = FIDs.sexmodel.bothvar %>% spread_draws(sd_focal_animal__sex0,sd_focal_animal__sex1, b_sigma_sex0, b_sigma_sex1)

#Among-individual variance for each life_stage
post.data$Va_sex_0 <- post.data$sd_focal_animal__sex0^2
post.data$Va_sex_1 <- post.data$sd_focal_animal__sex1^2

#Within-individual variance for each life stage
post.data$Vw_sex_0 = exp(post.data$b_sigma_sex0)^2
post.data$Vw_sex_1 = exp(post.data$b_sigma_sex1)^2

#Repeatability
post.data = post.data %>%
  dplyr::mutate(R_sex_0 = Va_sex_0/(Va_sex_0 + Vw_sex_0),
                R_sex_1 = Va_sex_1/(Va_sex_1 + Vw_sex_1))

#Male repeatability
hist(post.data$R_sex_0)
mean(post.data$R_sex_0)
#0.140137
round(rethinking::HPDI(post.data$R_sex_0, prob = 0.95), 3)
# |0.95 0.95| 
#   0.000 0.346 

#Female repeatability
hist(post.data$R_sex_1)
mean(post.data$R_sex_1)
# 0.4217261
round(rethinking::HPDI(post.data$R_sex_1, prob = 0.95), 3)
# |0.95 0.95| 
# 0.292 0.551 

#So it looks like females are more repeatable than males (albeit not significantly if we look at overlap of confidence intervals)


#Among-individual variance
FID.Va = post.data %>%
  dplyr::select(starts_with("Va")) %>%
  pivot_longer(cols = starts_with("Va"),
               names_to = 'sex',
               names_prefix = "Va_",
               values_to ="Estimate")

(FID.Va = FID.Va %>%
    dplyr::group_by(sex)%>%
    dplyr::summarise(Va = round(mean(Estimate), 3),
                     lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                     upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
    as.data.frame())
# sex    Va lowerCI upperCI
# 1 sex_0 0.041   0.000   0.114
# 2 sex_1 0.213   0.118   0.32

#More among-individual variation in females 

(FID.Va.plot = ggplot(FID.Va, aes(x = sex, y = Va, color = sex)) +
    geom_errorbar(aes(ymin = lowerCI, ymax = upperCI), width = 0, position = pd,color = 'black')+
    geom_point(aes(fill = sex), color = 'black', pch = 22 , position = pd, size = 3.5, stroke = 0.75)+
    ylab(label = "Among-individual variation") +
    xlab("")+
    scale_x_discrete(labels = c("sex_0" = "Male", "sex_1" = "Female"))+
    coord_flip()+
    theme_classic() +
    themeof +
    theme(axis.title.x = element_text(vjust = -1), legend.position = "none"))

#Within-individual variance
FID.Vw = post.data %>%
  dplyr::select(starts_with("Vw")) %>%
  pivot_longer(cols = starts_with("Vw"),
               names_to = 'sex',
               names_prefix = "Vw_",
               values_to ="Estimate")

(FID.Vw = FID.Vw %>%
    dplyr::group_by(sex)%>%
    dplyr::summarise(Vw = round(mean(Estimate), 3),
                     lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                     upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
    as.data.frame())

# sex    Vw lowerCI upperCI
# 1 sex_0 0.233   0.159   0.315
# 2 sex_1 0.286   0.236   0.341


(FID.Vw.plot = ggplot(FID.Vw, aes(x = sex, y = Vw, color = sex)) +
    geom_errorbar(aes(ymin = lowerCI, ymax = upperCI), width = 0, position = pd,color = 'black')+
    geom_point(aes(fill = sex), color = 'black', pch = 22 , position = pd, size = 3.5, stroke = 0.75)+
    ylab(label = "Within-individual variation") +
    xlab("")+
    scale_x_discrete(labels = c("sex_0" = "Male", "sex_1" = "Female"))+
    coord_flip()+
    theme_classic() +
    themeof +
    theme(axis.title.x = element_text(vjust = -1), legend.position = "none"))

#> ---- Calculate delta (magnitude of differences) in the variance components ----
# We can also calcuate the magnitude of difference (i.e. the effect size) in variance components between both life-stages.
#delta Va and Vw
post.data$delta_Va = with(post.data, Va_sex_0 - Va_sex_1)
post.data$delta_Vw = with(post.data, Vw_sex_0 - Vw_sex_1)
post.data$delta_R = with(post.data, R_sex_0 - R_sex_1)

#Difference in repeatability
mean(post.data$delta_R)
#-0.2815891
round(rethinking::HPDI(post.data$delta_R, prob = 0.95), 3)
# |0.95  0.95| 
# -0.506 -0.029 

#Difference in among-individual variability
mean(post.data$delta_Va)
#-0.1722275
round(rethinking::HPDI(post.data$delta_Va, prob = 0.95), 3)
# |0.95  0.95| 
# -0.302 -0.042 

#Difference in within-individual variability
mean(post.data$delta_Vw)
#-0.06791337
round(rethinking::HPDI(post.data$delta_Vw, prob = 0.95), 3)
# |0.95  0.95| 
# -0.147  0.047

#### RESPONSE POST-FLIGHT ####
#Squirrels may have either sheltered or not, those that sheltered at an emergence time recorded, whereas those had a stare and wait time that was recorded
#I think that it might be worthwhile looking at adding this response into the above models to see if individuals that had larger FIDs were more likely to enter a shelter or not - this will give us an idea on whether these responses were correlated or not. We may have add an interaction terms with distance from shelter (because individuals are probably more likely to enter a shelter if they close to a shelter)

#### Emergence time from shelter ####

samples = sheltered %>%
  group_by(focal_animal) %>%
  summarise(n_trials = length(unique(day)))
  
range(samples$n_trials)
summary(samples$n_trials)
hist(samples$n_trials) #so it looks like most individuals only have 1 trial



#> ---- Data exploration ----
sheltered %>%
  summarise(n_indiv = length(unique(focal_animal)))
#60 out of the total 88 unique focal animals at one point responded by going into the shelter

hist(sheltered$time_until_emerge)
#heavily skewed, probably should use a gamma distribution.

#Gamma distribution contain values that are continuous, cannot be less than zero, and the variance increases with the mean


hist(log(sheltered$time_until_emerge)) #improves things a little bit

library(lme4)
#check how the transformation might perform in a model
log_emerge.model <- lmer(log(time_until_emerge) ~ sex + life_stage + area_tested_disturbance_by_year + FID_m + (1|focal_animal), data = sheltered)
drop1(log_emerge.model, test = 'Chi') 
#note that sex and presence of conspecifics have become more important
summary(log_emerge.model)
mod.resid = resid(log_emerge.model)
qqnorm(mod.resid)
qqline(mod.resid) # not too bad
plot(log(sheltered$time_until_emerge), mod.resid, ylab = 'Residuals', xlab = 'log_emerge_time')
abline(0,0)# not great

#i'll add log-transforemd variable to data anyway
sheltered$log_time_to_emerge = log(sheltered$time_until_emerge)

#> ---- Model in brms using a gamma distribution ----
emerge.bf = bf(time_until_emerge ~ year + sex + life_stage + home_site_disturbance_by_year + FID_m + trial_num + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = Gamma('log'))

get_prior(emerge.bf, data = sheltered)

#Setup the prior - this is an uninformative prior
prior1 <- c(
  set_prior("normal(0,5)", class = "Intercept"),
  set_prior("cauchy(0,5)", class = "sd"),
  #residual priors, instead of sigma, we have the shape parameter
  set_prior('gamma(0.01, 0.01)', class = "shape"),
  set_prior("normal(0,5)", class = "b"))

#run the model
emerge.model1.gamma <- brm(emerge.bf,
                   data = sheltered,
                   family = Gamma(link = 'log'),
                   cores = 4, 
                   chains = 2, 
                   warmup = 1000, 
                   iter = 10000, 
                   prior = prior1,
                   control = list(adapt_delta = 0.995)) 

emerge.model1.gamma <- add_criterion(emerge.model1.gamma, 'loo', moment_match = TRUE)
summary(emerge.model1.gamma)


model.plots.gamma = plot(conditional_effects(emerge.model1.gamma), plot = F)
model.plots.gamma$FID_m + 
  geom_point(data = sheltered, aes(y = time_until_emerge, x = FID_m), alpha = 0.5, inherit.aes = F, color = 'black') + geom_line(color = 'black', size = 1)+ ylab("Time until emerge from shelter") + xlab('Flight initation distance') + theme_classic()
#This looks like it has fit the data pretty well. There are some odd large emerge time scores which makes things a bit funky to model

model.plots.gamma$home_site_disturbance_by_year + 
  geom_point(data = sheltered, aes(y = time_until_emerge, x = home_site_disturbance_by_year), alpha = 0.5, inherit.aes = F, color = 'black') + geom_line(color = 'black', size = 1)+ ylab("Time until emerge from shelter") + xlab('Home site disturbance score') + theme_classic()

#Posterior predictive check
brms::pp_check(emerge.model1.gamma, resp = 'timeuntilemerge')
#this is a more flexible way of doing this check
y = sheltered$time_until_emerge
yrep = posterior_predict(emerge.model1.gamma, resp = "timeuntilemerge", draws = 500)
ppc_dens_overlay(y, yrep[1:50,]) + xlim(0,50) #This xlim part allows you to zoom in to more closely examine how the model is performing
ppc_hist(y, yrep[1:5,])

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/shelter_models")
saveRDS(emerge.model1.gamma, file = 'emerge.model1.gamma')
##use the below code the re-load the model when you need to
#emerge.model1.gamma = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/shelter_models/emerge.model1.gamma")


emerge.tmb <- glmmTMB(time_until_emerge ~ year + sex + life_stage + home_site_disturbance_by_year + FID_m + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = Gamma('log'), data = sheltered)
summary(emerge.tmb) #if you look at the model estimates they should be similar which in this case is good. The variance estimates should also be similar. E.g. sigma (residual variance in the brms model) should be close to = to the residual std.dev, which it is 

#> ---- Extract the variance components and calculate repeatability from gamma ----
library(tidybayes)
get_variables(emerge.model1.gamma)[1:20] 
#Extract the variance components (extract shape, instead of sigma to calculate repeatability from a Gamma distribution)
post.data = emerge.model1.gamma %>% spread_draws(sd_focal_animal__Intercept, shape) 
#Among-individual variance
post.data$Va_focal_animal = post.data$sd_focal_animal__Intercept^2 # 
post.data$Vw = post.data$shape
post.data$Vw = log(1+(1/post.data$Vw)) # this how to calculate the residual variance from a Gamma distribution according to Nakagawa et al. 2017, Journal of Statistical Interface (or something like that)

#Repeatability
post.data = post.data %>%
  dplyr::mutate(R_focal_animal = Va_focal_animal/(Va_focal_animal + Vw))

#Focal animal repeatability
hist(post.data$R_focal_animal)
mean(post.data$R_focal_animal)
#[1] 0.3172427
round(rethinking::HPDI(post.data$R_focal_animal, prob = 0.95),3)
# |0.95 0.95| 
#   0.000 0.558  

#> ---- Model in brms using a gaussian distribution ----
emerge.bf.gaus = bf(log_time_to_emerge ~ year + sex + life_stage + home_site_disturbance_by_year + FID_m + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'gaussian')

get_prior(emerge.bf.gaus, data = sheltered)

#Setup the prior - this is an uninformative prior
prior1 <- c(
  set_prior("normal(0,50)", class = "Intercept"),
  set_prior("cauchy(5,15)", class = "sd"),
  #residual priors, instead of sigma, we have the shape parameter
  set_prior('cauchy(5,15)', class = "sigma"),
  set_prior("normal(0,50)", class = "b"))

#run the model
emerge.model1.gaus <- brm(emerge.bf.gaus,
                           data = sheltered,
                           family = 'gaussian',
                           cores = 4, 
                           chains = 4, 
                           warmup = 1000, 
                           iter = 10000, 
                           prior = prior1,
                           control = list(adapt_delta = 0.995)) 

model.plots.gaus = plot(conditional_effects(emerge.model1.gaus), plot = F)
model.plots.gaus$FID_m + 
  geom_point(data = sheltered, aes(y = log_time_to_emerge, x = FID_m), alpha = 0.5, inherit.aes = F, color = 'black') + geom_line(color = 'black', size = 1)+ ylab("Time until emerge from shelter") + xlab('Flight initation distance') + theme_classic()
#Not a terrible fit to the data, but I think the gamma performs better

pp_check(emerge.model1.gaus, resp = 'logtimetoemerge')# this looks a bit wonky
summary(emerge.model1.gaus)
emerge.model1.gaus <- add_criterion(emerge.model1.gaus, "waic")
plot(emerge.model1, ask = F)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs")
saveRDS(emerge.model1.gaus, file = 'emerge.model1.gaus')

emerge.tmb <- glmmTMB(log_time_to_emerge ~ year + sex + life_stage + home_site_disturbance_by_year + FID_m + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'gaussian', data = sheltered)
summary(emerge.tmb) #if you look at the model estimates they should be similar which in this case is good. The variance estimates should also be similar. E.g. sigma (residual variance in the brms model) should be close to = to the residual std.dev, which it is 
mod.resid = resid(emerge.tmb)
qqnorm(mod.resid)
qqline(mod.resid) # not too bad
plot(sheltered$log_time_to_emerge, mod.resid, ylab = 'Residuals', xlab = 'log_emerge_time')
abline(0,0)# not great

#> ---- Extract the variance components and calculate repeatability from gaussian ----
library(tidybayes)
get_variables(emerge.model1.gaus)[1:20] 
post.data = emerge.model1.gaus %>% spread_draws(sd_focal_animal__Intercept, sigma) 
#Among-individual variance
post.data$Va_focal_animal = post.data$sd_focal_animal__Intercept^2 # 
post.data$Vw = post.data$sigma^2

#Repeatability
post.data = post.data %>%
  dplyr::mutate(R_focal_animal = Va_focal_animal/(Va_focal_animal + Vw))

#Focal animal repeatability
hist(post.data$R_focal_animal)
mean(post.data$R_focal_animal)
#[1] 0.1816127 --- note that the repeatability estimate is quite different from the gamma model
round(rethinking::HPDI(post.data$R_focal_animal, prob = 0.95),3)
# |0.95 0.95| 
# 0.000 0.401

#> ---- Model output ----
#Here is an easy way to extract the population-level effects as a dataframe from a model in R. You can easily save this as .txt file and open in Word to make a table for your manuscript
library(sjPlot)
library(sjmisc)

#tab_model is a really cool function for making tables easily. There is a helpful tutorial online in using this function that I would recommend that you check out. You have to save a tab_model as an html file, but this can be opened in word for editing
#https://strengejacke.github.io/sjPlot/articles/tab_mixed.html
#https://strengejacke.github.io/sjPlot/articles/tab_model_estimates.html

(emerge.model1.gamma.tab = tab_model(emerge.model1.gamma,
                            transform = NULL,
                            title = NULL, 
                            show.se = TRUE, 
                            show.icc = FALSE, 
                            digits = 3, 
                            show.re.var = TRUE, 
                            pred.labels = c("Intercept", "Year (2019)", "Sex (male)", "Life stage (adult)", "Area disturbance", "Flight initiation distance"), 
                            dv.labels = "Time taken to emerge from refuge", 
                            string.pred = "Coeffcient",
                            string.se = "SE"))


#> ---- Model conditional-effects and graphing  ----
#Extract the conditional effects
cond.plots = plot(conditional_effects(emerge.model1.gamma, robust = FALSE), plot = F)

FID_emerge.plot = cond.plots$FID_m + #geom_point(data = FIDs, aes(y = FID_sqrt, x = start_distance_m), alpha = 0.5, inherit.aes = F, color = 'black')+
  geom_line(size = 1.5, color = "black")+
  geom_ribbon(color  ="blue", fill = "blue", alpha = 0.2)+
  theme_classic() + 
  ylab("Time to emerge from shelter (mins)") + 
  xlab("Flight initiation distance (m)") + 
  themeof + 
  margin + 
  theme(axis.title.x = element_text(vjust = -1))

Area_emerge.plot = cond.plots$area_tested_disturbance_by_year + 
  geom_line(size = 1.5, color = "black")+
  #geom_point(data = FIDs, aes(y = FID_sqrt, x = trap_prop), alpha = 0.5, inherit.aes = F, color = 'black')+
  geom_ribbon(color  ="blue", fill = "blue", alpha = 0.2)+
  theme_classic() + 
  ylab("Time to emerge from shelter (mins)") + 
  xlab("Area disturbance score") + 
  themeof + 
  margin + 
  theme(axis.title.x = element_text(vjust = -1))

#combine plots into one figure
library(gridExtra)
emergemodel_plots = grid.arrange(FID_emerge.plot, Area_emerge.plot, nrow = 1, ncol = 2)
setwd("~/Desktop/Squirrels/FID_squirrel project/plots")
ggsave(emergemodel_plots, device = "pdf", width = 8.5, height = 4, units = "in", family = figurefont, filename = "emergemodel_plots.pdf")


# #> ---- Multivariate brms ----
# #We might want to model emergence time and FID is the same model and look at the covariation between these traits. To do this we would have to use a multivate mixed-effect approach. This is relatively easy to do in brms

#should only include significant predictor effects in the model to help with model convergence
emerge.bf = bf(time_until_emerge ~ year + sex + life_stage + home_site_disturbance_by_year + trial_num + trap_prop  + (1|home_site_w_spatial) + (1|p|focal_animal) + (1|walker), family = 'Gamma')

fid.bf = bf(FID_sqrt ~ year + sex + trial_num + life_stage + home_site_disturbance_by_year + start_distance_m + trap_prop + conspecifics_5m_num + (1|home_site_w_spatial) + (1|p|focal_animal) + (1|walker), family = 'gaussian')

get_prior(emerge.bf, fid.bf, data = sheltered)

#Setup the prior for each response variable
prior1 <- c(
  set_prior("normal(0,5)", class = "Intercept", resp = 'timeuntilemerge'),
  set_prior("normal(0,20)", class = "Intercept", resp = 'FIDsqrt'),
  set_prior("cauchy(0,5)", class = "sd", resp = 'timeuntilemerge'),
  set_prior("cauchy(5,15)", class = "sd", resp = 'FIDsqrt'),
  set_prior('gamma(0.01, 0.01)', class = "shape", resp = 'timeuntilemerge'),
  set_prior("cauchy(5,15)", class = "sigma", resp = 'FIDsqrt'),
  set_prior("normal(0,5)", class = "b", resp = 'timeuntilemerge'),
  set_prior("normal(0,20)", class = "b", resp = 'FIDsqrt'),
  set_prior("lkj(2)", class = "cor"))

#run the model
shelter.multi.model <- brm(emerge.bf+fid.bf+set_rescor(FALSE),
                     data = sheltered,
                     cores = 4, 
                     chains = 4, 
                     warmup = 1000, 
                     iter = 10000, 
                     prior = prior1,
                     control = list(adapt_delta = 0.999))

summary(shelter.multi.model)
plot(shelter.multi.model, ask = F)
brms::pp_check(shelter.multi.model, resp='timeuntilemerge')
brms::pp_check(shelter.multi.model, resp='FIDsqrt')

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/shelter_models")
saveRDS(shelter.multi.model, file = 'shelter.multi.model')
##use the below code the re-load the model when you need to
#shelter_multi.model = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/shelter_models/multi.model")


#Look at repeatability of each traits and measure covariation between each trait
get_variables(shelter.multi.model)[1:30]
post.multi.shelter = shelter.multi.model %>% 
  spread_draws(sd_focal_animal__timeuntilemerge_Intercept,
               sd_focal_animal__FIDsqrt_Intercept, 
               sd_walker__timeuntilemerge_Intercept,
               sd_walker__FIDsqrt_Intercept, 
               shape_timeuntilemerge, sigma_FIDsqrt,
               cor_focal_animal__timeuntilemerge_Intercept__FIDsqrt_Intercept,
               sd_home_site_w_spatial__FIDsqrt_Intercept,
               sd_home_site_w_spatial__timeuntilemerge_Intercept)


Va_FID = post.multi.shelter$sd_focal_animal__FIDsqrt_Intercept^2
Va_emerge = post.multi.shelter$sd_focal_animal__timeuntilemerge_Intercept^2
Va_walker_FID = post.multi.shelter$sd_walker__FIDsqrt_Intercept^2
Va_walker_emerge = post.multi.shelter$sd_walker__timeuntilemerge_Intercept^2
Va_homesite_FID = post.multi.shelter$sd_home_site_w_spatial__FIDsqrt_Intercept^2
Va_homesite_emerge = post.multi.shelter$sd_home_site_w_spatial__timeuntilemerge_Intercept^2
Vw_FID = post.multi.shelter$sigma_FIDsqrt^2
Vw_emerge = log(1 + (1/post.multi.shelter$shape_timeuntilemerge))
Radj_FID = Va_FID/(Va_FID + Va_walker_FID + Va_homesite_FID + Vw_FID)
Radj_emerge = Va_emerge/(Va_emerge + Va_walker_emerge + Va_homesite_emerge + Vw_emerge)
Runadj_FID = Va_FID/(Va_FID + Vw_FID)
Runadj_emerge = Va_emerge/(Va_emerge + Vw_emerge)

# extract the correlation
emerge_FID = post.multi.shelter$cor_focal_animal__timeuntilemerge_Intercept__FIDsqrt_Intercept

#calculate the covariation

emerge_FID_cov = emerge_FID * sqrt(Va_FID) * sqrt(Va_emerge)

#For the slope we divide the covariance between the two focal behaviors by the one to be plotted on the x-axis
emerge_FID_slope = emerge_FID_cov/Va_FID

#### ~ Time until emerge repeatability ####

#adj. repeatability
mean(Radj_emerge)
# 0.2571421
rethinking::HPDI(Radj_emerge, prob = 0.95)
# |0.95      0.95| 
# 0.05161904 0.48532600 


#unadj. repeatability
mean(Runadj_emerge)
# 0.3967942
rethinking::HPDI(Runadj_emerge, prob = 0.95)
# |0.95     0.95| 
# 0.1468792 0.6313173 

#### ~ FID repeatability ####

#adj. repeatability

mean(Radj_FID)
# 0.2277527
rethinking::HPDI(Radj_FID, prob = 0.95)
# |0.95      0.95| 
#   0.05657379 0.42692813 


#unadj. repeatability

mean(Runadj_FID)
#0.3275544
rethinking::HPDI(Runadj_FID, prob = 0.95)
# |0.95     0.95| 
# 0.1148058 0.5441513 


#### ~ Correlation between FID and emergence time ####
mean(emerge_FID)
#0.5679535
rethinking::HPDI(emerge_FID, prob = 0.95)
# |0.95     0.95| 
#   0.1296424 0.9629289  

#Covariation between FID and emergence
mean(emerge_FID_cov)
#0.1105065
rethinking::HPDI(emerge_FID_cov, prob = 0.95)
# |0.95       0.95| 
#   0.001135116 0.238238520 


#Just a basic spearman rank correlation test
cor.test(sheltered$FID_sqrt, sheltered$time_until_emerge)

#Extract the BLUPS
BLUPS = multi.model %>% spread_draws(r_focal_animal__timeuntilemerge[animal_ID,], r_focal_animal__FIDsqrt[animal_ID,]) %>% 
  mean_qi

#Plot the correlation
(p1 <- ggplot(BLUPS, aes(x = r_focal_animal__FIDsqrt, 
                       y = r_focal_animal__timeuntilemerge, group = animal_ID)) +
  geom_point(size = 2) +
  geom_abline(intercept = 0, slope = mean(emerge_FID_slope)) +
  labs(x = "Flight initiation distance (BLUP)",
       y = "Shelter emergence time (BLUP)")+
  theme_classic()+
  ggtitle("Behavioural syndrome between FID and shelter emergence")+themeof)


#### Stop and look distance ####
#> ---- Data exploration ----
FIDs %>% 
  filter(stop_look == '1') %>%
  summarise(stop_dist_NAs = sum(is.na(stop_look_distance_m)),
            stop_dura_NAs = sum(is.na(look_duration_sec)))

# stop_dist_NAs stop_dura_NAs
#         11            35


cor.test(FIDs$stop_look_distance_m, FIDs$look_duration_sec, method = 'spearman')
#They are correlated, but not that strongly

#To me it probably makes more sense to use stop_look_distance. 1. we have more data for it and 2. it probably more meaningful in terms of an FID response.

stoplook = FIDs %>%
  filter(stop_look == '1')%>%
  filter(!is.na(stop_look_distance_m))



#STOP LOOK DISTANCE
summary(stoplook$stop_look_distance_m)
#looks like the mean stop distance was about 5 metres and median was about 3.5 metres
hist(stoplook$stop_look_distance_m)
#heavily skewed, probably should use a gamma distribution again.
hist(sqrt(stoplook$stop_look_distance_m))#improve it a bit
hist(log(stoplook$stop_look_distance_m))#probably better

#Gamma distribution contain values that are continuous, cannot be less than zero, and the variance increases with the mean

library(lme4)
#check how the transformation might perform in a model
log_stoplook.model <- lmer(log(stop_look_distance_m) ~ sex + life_stage + area_tested_disturbance_by_year + FID_m + (1|focal_animal), data = stoplook)
drop1(log_stoplook.model, test = 'Chi') 
#note that sex and presence of conspecifics have become more important
summary(log_stoplook.model)
mod.resid = resid(log_stoplook.model)
qqnorm(mod.resid)
qqline(mod.resid) # not great
plot(log(stoplook$stop_look_distance_m), mod.resid, ylab = 'Residuals', xlab = 'Stop look distance (log)')
abline(0,0)# not great

#> ---- Model in brms using a gamma distribution ----
stoplookdist.bf = bf(stop_look_distance_m ~ year + sex + life_stage + home_site_disturbance_by_year + FID_m + trial_num + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = Gamma('log'))

get_prior(stoplookdist.bf, data = stoplook)

#Setup the prior - this is an uninformative prior
prior1 <- c(
  set_prior("normal(0,5)", class = "Intercept"),
  set_prior("cauchy(0,5)", class = "sd"),
  #residual priors, instead of sigma, we have the shape parameter
  set_prior('gamma(0.01, 0.01)', class = "shape"),
  set_prior("normal(0,5)", class = "b"))

#run the model
stoplookdist.model.gamma <- brm(stoplookdist.bf,
                           data = stoplook,
                           family = Gamma(link = 'log'),
                           cores = 4, 
                           chains = 2, 
                           warmup = 1000, 
                           iter = 10000, 
                           prior = prior1,
                           control = list(adapt_delta = 0.995)) 

stoplookdist.model.gamma <- add_criterion(stoplookdist.model.gamma, criterion = 'loo', moment_match = TRUE)

summary(stoplookdist.model.gamma)

model.plots.gamma = plot(conditional_effects(stoplookdist.model.gamma), plot = F)
model.plots.gamma$FID_m + 
  geom_point(data = stoplook, aes(y = stop_look_distance_m, x = FID_m), alpha = 0.5, inherit.aes = F, color = 'black') + geom_line(color = 'black', size = 1)+ ylab("Stop look distance (m)") + xlab('Flight initation distance (m)') + theme_classic()
#This looks like it has fit the data pretty well. 

#A though, does this actually mean anything. If an individual flees from an observer from a further distance, then is not surprising that their stop looks distance will also be further away from the observer (i.e because it already started from a further distance away))

brms::pp_check(stoplookdist.model.gamma, resp = 'stoplookdistancem')
summary(stoplookdist.model.gamma)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/stop_look_models")
saveRDS(stoplookdist.model.gamma, file = 'stoplookdist.model.gamma')
##use the below code the re-load the model when you need to
#stoplookdist.model.gamma = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/stop_look_models/stoplookdist.model.gamma")


#> ---- Extract the variance components and calculate repeatability from gamma ----
library(tidybayes)
get_variables(stoplookdist.model.gamma)[1:20] 
#Extract the variance components (extract shape, instead of sigma to calculate repeatability from a Gamma distribution)
post.stoplook = stoplookdist.model.gamma %>% spread_draws(sd_focal_animal__Intercept, sd_home_site_w_spatial__Intercept,  sd_walker__Intercept, shape) 

#Among-individual variance
post.stoplook$Va_focal_animal = post.stoplook$sd_focal_animal__Intercept^2 # 
post.stoplook$Va_walker = post.stoplook$sd_walker__Intercept^2
post.stoplook$Va_homesite = post.stoplook$sd_home_site_w_spatial__Intercept^2

#Within-individual variance
post.stoplook$Vw = post.stoplook$shape
post.stoplook$Vw = log(1+(1/post.stoplook$Vw)) # this how to calculate the residual variance from a Gamma distribution according to Nakagawa et al. 2017, Journal of Statistical Interface (or something like that)

#Repeatability
post.stoplook = post.stoplook %>%
  dplyr::mutate(Radj_focal_animal = Va_focal_animal/(Va_focal_animal + Va_walker + Va_homesite + Vw),
                Runadj_focal_animal = Va_focal_animal/(Va_focal_animal + Vw))


#### ~ Adjusted repeatability ####
mean(post.stoplook$Radj_focal_animal)
#0.09964151
round(rethinking::HPDI(post.stoplook$Radj_focal_animal, prob = 0.95),3)
# |0.95 0.95| 
# 0.000 0.254 

#### ~ Unadjusted repeatability ####
mean(post.stoplook$Runadj_focal_animal)
#0.1337759 
round(rethinking::HPDI(post.stoplook$Runadj_focal_animal, prob = 0.95),3)
# |0.95 0.95| 
# 0.000 0.327


#> ---- Model output ----
#Here is an easy way to extract the population-level effects as a dataframe from a model in R. You can easily save this as .txt file and open in Word to make a table for your manuscript
library(sjPlot)
library(sjmisc)

#tab_model is a really cool function for making tables easily. There is a helpful tutorial online in using this function that I would recommend that you check out. You have to save a tab_model as an html file, but this can be opened in word for editing
#https://strengejacke.github.io/sjPlot/articles/tab_mixed.html
#https://strengejacke.github.io/sjPlot/articles/tab_model_estimates.html

(stoplook.model1.gamma.tab = tab_model(stoplookdist.model.gamma,
                                     transform = NULL,
                                     title = NULL, 
                                     show.se = TRUE, 
                                     show.icc = FALSE, 
                                     digits = 3, 
                                     show.re.var = TRUE, 
                                     pred.labels = c("Intercept", "Year (2019)", "Sex (male)", "Life stage (adult)", "Area disturbance", "Flight initiation distance"), 
                                     dv.labels = "Time taken to emerge from refuge", 
                                     string.pred = "Coeffcient",
                                     string.se = "SE"))


#> ---- Model conditional-effects and graphing  ----
#Extract the conditional effects
cond.plots = plot(conditional_effects(stoplookdist.model.gamma, robust = FALSE), plot = F)

FID_stoplook.plot = cond.plots$FID_m + #geom_point(data = FIDs, aes(y = FID_sqrt, x = start_distance_m), alpha = 0.5, inherit.aes = F, color = 'black')+
  geom_line(size = 1.5, color = "black")+
  geom_ribbon(color  ="blue", fill = "blue", alpha = 0.2)+
  theme_classic() + 
  ylab("Stop look distance (m)") + 
  xlab("Flight initiation distance (m)") + 
  themeof + 
  margin + 
  theme(axis.title.x = element_text(vjust = -1))

#combine plots into one figure
setwd("~/Desktop/Squirrels/FID_squirrel project/plots")
ggsave(FID_stoplook.plot, device = "pdf", width = 6, height = 4, units = "in", family = figurefont, filename = "FID_stoplook.plot.pdf")


#> ---- Multivariate brms ----
stoplook.bf = bf(stop_look_distance_m ~ year + sex + life_stage + home_site_disturbance_by_year + trial_num + trap_prop + conspecifics_5m_num + (1|home_site_w_spatial) + (1|p|focal_animal) + (1|walker), family = 'Gamma')

fid.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + home_site_disturbance_by_year + start_distance_m + trap_prop + conspecifics_5m_num + (1|home_site_w_spatial) + (1|p|focal_animal) + (1|walker), family = 'gaussian')

get_prior(stoplook.bf, fid.bf, data = stoplook)

#Setup the prior for each response variable
prior1 <- c(
  set_prior("normal(0,5)", class = "Intercept", resp = 'stoplookdistancem'),
  set_prior("normal(0,20)", class = "Intercept", resp = 'FIDsqrt'),
  set_prior("cauchy(0,5)", class = "sd", resp = 'stoplookdistancem'),
  set_prior("cauchy(5,15)", class = "sd", resp = 'FIDsqrt'),
  set_prior('gamma(0.01, 0.01)', class = "shape", resp = 'stoplookdistancem'),
  set_prior("cauchy(5,15)", class = "sigma", resp = 'FIDsqrt'),
  set_prior("normal(0,5)", class = "b", resp = 'stoplookdistancem'),
  set_prior("normal(0,20)", class = "b", resp = 'FIDsqrt'),
  set_prior("lkj(2)", class = "cor"))

#run the model
multi.stoplook.model <- brm(stoplook.bf+fid.bf+set_rescor(FALSE),
                   data = stoplook,
                   cores = 4, 
                   chains = 4, 
                   warmup = 1000, 
                   iter = 10000, 
                   prior = prior1,
                   control = list(adapt_delta = 0.998))

multi.stoplook.model <- add_criterion(multi.stoplook.model, criterion = 'loo', moment_match = TRUE)
summary(multi.stoplook.model)


brms::pp_check(multi.stoplook.model, resp='stoplookdistancem')
brms::pp_check(multi.stoplook.model, resp='FIDsqrt')

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/stop_look_models")
saveRDS(multi.stoplook.model, file = 'multi.stoplook.model')
##use the below code the re-load the model when you need to
#multi.stoplook.model = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/shelter_models/multi.stoplook.model")


#Look at repeatability of each traits and measure covariation between each trait
get_variables(multi.stoplook.model)[1:30]
post.multi.stoplook = multi.stoplook.model %>% 
  spread_draws(sd_focal_animal__stoplookdistancem_Intercept,
               sd_focal_animal__FIDsqrt_Intercept, 
               sd_walker__stoplookdistancem_Intercept,
               sd_walker__FIDsqrt_Intercept, 
               shape_stoplookdistancem, sigma_FIDsqrt,
               cor_focal_animal__stoplookdistancem_Intercept__FIDsqrt_Intercept,
               sd_home_site_w_spatial__FIDsqrt_Intercept,
               sd_home_site_w_spatial__stoplookdistancem_Intercept)


Va_FID = post.multi.stoplook$sd_focal_animal__FIDsqrt_Intercept^2
Va_stoplook = post.multi.stoplook$sd_focal_animal__stoplookdistancem_Intercept^2
Va_walker_FID = post.multi.stoplook$sd_walker__FIDsqrt_Intercept^2
Va_walker_stoplook = post.multi.stoplook$sd_walker__stoplookdistancem_Intercept^2
Va_homesite_FID = post.multi.stoplook$sd_home_site_w_spatial__FIDsqrt_Intercept^2
Va_homesite_stoplook = post.multi.stoplook$sd_home_site_w_spatial__stoplookdistancem_Intercept^2
Vw_FID = post.multi.stoplook$sigma_FIDsqrt^2
Vw_stoplook = log(1 + (1/post.multi.stoplook$shape_stoplookdistancem))
Radj_FID = Va_FID/(Va_FID + Va_walker_FID + Va_homesite_FID + Vw_FID)
Radj_stoplook = Va_stoplook/(Va_stoplook + Va_walker_stoplook + Va_homesite_stoplook + Vw_stoplook)
Runadj_FID = Va_FID/(Va_FID + Vw_FID)
Runadj_stoplook = Va_stoplook/(Va_stoplook + Vw_stoplook)

# extract the correlation
stoplook_FID = post.multi.stoplook$cor_focal_animal__stoplookdistancem_Intercept__FIDsqrt_Intercept

#calculate the covariation

stoplook_FID_cov = stoplook_FID * sqrt(Va_FID) * sqrt(Va_stoplook)

#For the slope we divide the covariance between the two focal behaviors by the one to be plotted on the x-axis
stoplook_FID_slope = stoplook_FID_cov/Va_FID

#### ~ Stop look distance repeatability ####

#adj. repeatability
mean(Radj_stoplook)
# 0.1965624
rethinking::HPDI(Radj_stoplook, prob = 0.95)
# |0.95      0.95| 
# 0.05826055 0.34622077 


#unadj. repeatability
mean(Runadj_stoplook)
# 0.2534193
rethinking::HPDI(Runadj_stoplook, prob = 0.95)
# |0.95      0.95| 
# 0.09111402 0.42458928 

#### ~ FID repeatability ####

#adj. repeatability

mean(Radj_FID)
# 0.2152932
rethinking::HPDI(Radj_FID, prob = 0.95)
# |0.95     0.95| 
#   0.0730972 0.3638252


#unadj. repeatability

mean(Runadj_FID)
#0.4075413
rethinking::HPDI(Runadj_FID, prob = 0.95)
# |0.95     0.95| 
#   0.2484988 0.5649439 


#### ~ Correlation between FID and stoplooknce time ####
mean(stoplook_FID)
#0.736098
rethinking::HPDI(stoplook_FID, prob = 0.95)
# |0.95     0.95| 
#   0.4283257 0.9876666   

#Covariation between FID and stoplooknce
mean(stoplook_FID_cov)
#0.1046791
rethinking::HPDI(stoplook_FID_cov, prob = 0.95)
# |0.95      0.95| 
#   0.03587733 0.18264016 


#Just a basic spearman rank correlation test
cor.test(sheltered$FID_sqrt, sheltered$stop_look_distance_m)

#Extract the BLUPS
BLUPS = multi.stoplook.model %>% spread_draws(r_focal_animal__stoplookdistancem[animal_ID,], r_focal_animal__FIDsqrt[animal_ID,]) %>% 
  mean_qi

#Plot the correlation
(p1 <- ggplot(BLUPS, aes(x = r_focal_animal__FIDsqrt, 
                         y = r_focal_animal__stoplookdistancem, group = animal_ID)) +
    geom_point(size = 2) +
    geom_abline(intercept = 0, slope = mean(stoplook_FID_slope)) +
    labs(x = "Flight initiation distance (BLUP)",
         y = "Stop look distance (BLUP)")+
    theme_classic()+
    ggtitle("Behavioural syndrome between FID and stop look distance")+themeof)

#### Compare variation between low and high sites ####

#Need to first workout the best way of splitting sites between low and high
#One approach may be to split by the median or mean dependening on how the site data is distributed

hist(FIDs$home_site_disturbance_by_year)
summary(FIDs$home_site_disturbance_by_year)
#median is 0.127
#let's go by the median

#make new column to designate sites as either high or low based on their median score
FIDs = FIDs %>%
  mutate(disturb_rating = ifelse(home_site_disturbance_by_year > 0.127, "high", "low"))

FIDs$disturb_rating = ordered(FIDs$disturb_rating, levels = c("low", "high"))
summary(FIDs$disturb_rating)


#> ---- Model 1: Null model ----
disturb.null = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_pres + cover + distance_to_safety_m + start_distance_m + disturb_rating + trap_prop + (1|focal_animal), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  #intercept prior
  set_prior("normal(0,20)", class = "Intercept"),
  #ID random intercept prior
  set_prior("cauchy(5,15)", class = "sd"),
  #residual priors
  set_prior("cauchy(5,15)", class = "sigma"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"))

#run the model
disturb.null.model <- brm(disturb.null,
                        data = FIDs,
                        family = gaussian,
                        cores = 4, 
                        chains = 4, 
                        warmup = 1000, 
                        iter = 10000, 
                        prior = prior1,
                        control = list(adapt_delta = 0.99)) 

disturb.null.model <- add_criterion(disturb.null.model, 'loo', moment_match = TRUE)
brms::pp_check(disturb.null.model)
summary(disturb.null.model)
cond.plots = plot(conditional_effects(disturb.null.model), plot = F)
cond.plots$disturb_rating
#as expected low disturbance sites have higher FIDs

## Calculate repeatability ##
get_variables(disturb.null.model)[1:30]
post.disturb.data = disturb.null.model %>% spread_draws(sd_focal_animal__Intercept,sd_home_site__Intercept, sd_walker__Intercept, sigma)

#Among-individual variance
post.disturb.data$Va_focal <- post.disturb.data$sd_focal_animal__Intercept^2

#Among-site variance
post.disturb.data$Va_site <- post.disturb.data$sd_home_site__Intercept^2

#Among-observer variance
post.disturb.data$Va_walker <- post.disturb.data$sd_walker__Intercept^2

#Residual variance
post.disturb.data$Vw <- post.disturb.data$sigma^2

#Repeatability
post.disturb.data = post.disturb.data %>%
  dplyr::mutate(R_focal_unadj = Va_focal/(Va_focal + Vw),
                R_focal_adj = Va_focal/(Va_focal + Va_site + Va_walker + Vw),
                R_site = Va_site/(Va_focal + Va_site + Va_walker + Vw),
                R_walker = Va_walker/(Va_focal + Va_site + Va_walker + Vw))


#Focal animal repeatability (unajusted)
mean(post.disturb.data$R_focal_unadj)
#0.2512295
rethinking::HPDI(post.disturb.data$R_focal_unadj, prob = 0.95)
# |0.95     0.95| 
# 0.1363314 0.3756826

#Focal animal repeatability (adjusted)
mean(post.disturb.data$R_focal_adj)
#0.1765669
rethinking::HPDI(post.disturb.data$R_focal_adj, prob = 0.95)
# |0.95      0.95| 
# 0.07226211 0.28854673 

#Home site repeatability 
mean(post.disturb.data$R_site)
#0.1559759
rethinking::HPDI(post.disturb.data$R_site, prob = 0.95)
# |0.95    0.95| 
#   0.011794 0.312259

#Walker repeatability
mean(post.disturb.data$R_walker)
#0.1476016
rethinking::HPDI(post.disturb.data$R_walker, prob = 0.95)
# |0.95      0.95| 
# 0.01140785 0.34858505 

#> ---- Model 2: only among-individual variance (Va) is calculated separately for each site type ----
disturb.among = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_pres + cover + distance_to_safety_m + start_distance_m + disturb_rating + trap_prop + (0+disturb_rating||focal_animal) + (1|walker), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  #intercept prior
  set_prior("normal(0,20)", class = "Intercept"),
  #ID random intercept prior
  set_prior("cauchy(5,15)", class = "sd"),
  #residual priors
  set_prior("cauchy(5,15)", class = "sigma"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"))

#run the model
disturb.among.model <- brm(disturb.among,
                        data = FIDs,
                        family = gaussian,
                        cores = 4, 
                        chains = 4, 
                        warmup = 1000, 
                        iter = 10000, 
                        prior = prior1,
                        control = list(adapt_delta = 0.99)) 

disturb.among.model <- add_criterion(disturb.among.model, 'loo', moment_match = TRUE)
summary(disturb.among.model)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs")
saveRDS(disturb.among.model, file = 'disturb.among.model')

#load the model
#FIDs.model.among = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model.among")

#> ---- Model 3: only within-individual variances (Vw) in calculated seperately for each site ----
disturb.within = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_pres + cover + distance_to_safety_m + start_distance_m + disturb_rating + trap_prop + (1|focal_animal) + (1|walker), sigma ~ 0 + disturb_rating, family = 'Gaussian')

get_prior(disturb.within, data = FIDs)

prior2 <- c(
  #Intercept
  set_prior("normal(0,20)", class = "Intercept"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"),
  #sd
  set_prior("cauchy(5,15)", class = "sd"))

#run the model
disturb.within.model <- brm(disturb.within,
                         data = FIDs,
                         family = gaussian,
                         cores = 4, 
                         chains = 4, 
                         warmup = 1000, 
                         iter = 10000, 
                         prior = prior2,
                         control = list(adapt_delta = 0.99)) 

disturb.within.model <- add_criterion(disturb.within.model, 'loo', moment_match = TRUE)
summary(disturb.within.model)
#you will notice in the model summary under Population-level effects that there is now a sigma coefficient estimated for each life_stage

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs")
saveRDS(disturb.within.model, file = 'disturb.within.model')

#load the model 
#FIDs.model.within = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model.within")

#> ---- Model 4: Va and Vw is calculated seperately for each site ----
disturb.allvar= bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_pres + cover + distance_to_safety_m + start_distance_m + disturb_rating + trap_prop + (1|home_site) + (0+disturb_rating||focal_animal), sigma ~ 0 + disturb_rating, family = 'Gaussian')

get_prior(disturb.allvar, data = FIDs)

prior3 <- c(
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"),
  #sd
  set_prior("cauchy(5,15)", class = "sd"))

disturb.allvar.model <- brm(disturb.allvar,
                          data = FIDs,
                          family = gaussian,
                          cores = 4, 
                          chains = 4, 
                          warmup = 1000, 
                          iter = 10000, 
                          prior = prior3,
                          control = list(adapt_delta = 0.99)) 

disturb.allvar.model <- add_criterion(disturb.allvar.model, 'loo', moment_match = TRUE)
summary(disturb.allvar.model)
#Now we have a sigma and sd estimated for both life stages.

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs")
saveRDS(disturb.allvar.model, file = 'disturb.allvar.model')

#load the model 
#FIDs.model.bothvar = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model.bothvar")


#> ---- Model comparison to compare our 4 candidate models ----
model.comp = loo_compare(disturb.null.model, disturb.among.model, disturb.within.model, disturb.allvar.model, criterion = 'loo')
print(model.comp, simplify = F)
#                     elpd_diff se_diff elpd_loo se_elpd_loo p_loo  se_p_loo looic 
# disturb.among.model     0.0       0.0  -334.8     16.0        70.3    5.5    669.5
# disturb.within.model   -0.5       4.6  -335.3     17.3        67.8    5.6    670.6
# disturb.allvar.model   -1.8       0.4  -336.5     16.2        72.2    5.7    673.0
# disturb.null.model     -4.5       6.0  -339.2     16.2        62.2    4.9    678.4
# se_looic
# disturb.among.model    31.9  
# disturb.within.model   34.7  
# disturb.allvar.model   32.4  
# disturb.null.model     32.5

#It looks like there are not variance differences between sites, at least when we use the median to split them up by high and low.

#> ---- Aggregate variance and repeatability for each life_stage ----
get_variables(disturb.allvar.model)[1:30]
post.data = disturb.allvar.model %>% spread_draws(sd_focal_animal__disturb_ratinghigh,sd_focal_animal__disturb_ratinglow, b_sigma_disturb_ratinghigh, b_sigma_disturb_ratinglow)

#Among-individual variance for each life_stage
post.data$Va_high <- post.data$sd_focal_animal__disturb_ratinghigh^2
post.data$Va_low <- post.data$sd_focal_animal__disturb_ratinglow^2

#Within-individual variance for each life stage
post.data$Vw_high = exp(post.data$b_sigma_disturb_ratinghigh)^2
post.data$Vw_low = exp(post.data$b_sigma_disturb_ratinglow)^2

#Repeatability
post.data = post.data %>%
  dplyr::mutate(R_high = Va_high/(Va_high + Vw_high),
                R_low = Va_low/(Va_low + Vw_low))

#High site repeatability
hist(post.data$R_high)
mean(post.data$R_high)
# 0.2481942
rethinking::HPDI(post.data$R_high, prob = 0.95)
# |0.95     0.95| 
#   0.09237501 0.41922985 

#Low site repeatability
hist(post.data$R_low)
mean(post.data$R_low)
# 0.3039674
rethinking::HPDI(post.data$R_low, prob = 0.95)
# |0.95     0.95| 
#   0.07361651 0.53173114  

#Among-individual variance
disturb.Va = post.data %>%
  dplyr::select(starts_with("Va")) %>%
  pivot_longer(cols = starts_with("Va"),
               names_to = 'Disturb_rating',
               names_prefix = "Va_",
               values_to ="Estimate")

(disturb.Va = disturb.Va %>%
    dplyr::group_by(Disturb_rating)%>%
    dplyr::summarise(Va = round(mean(Estimate), 3),
                     lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                     upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
    as.data.frame())
# Disturb_rating    Va lowerCI upperCI
Disturb_rating    Va lowerCI upperCI
# 1           high 0.097   0.026   0.186
# 2            low 0.129   0.014   0.275

#Within-individual variance
disturb.Vw = post.data %>%
  dplyr::select(starts_with("Vw")) %>%
  pivot_longer(cols = starts_with("Vw"),
               names_to = 'Disturb_rating',
               names_prefix = "Vw_",
               values_to ="Estimate")

(disturb.Vw = disturb.Vw %>%
    dplyr::group_by(Disturb_rating)%>%
    dplyr::summarise(Vw = round(mean(Estimate), 3),
                     lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                     upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
    as.data.frame())

# Disturb_rating    Vw lowerCI upperCI
# Disturb_rating    Vw lowerCI upperCI
# 1           high 0.284   0.223   0.352
# 2            low 0.275   0.212   0.344

#> ---- Calculate delta (magnitude of differences) in the variance components ----
# We can also calcuate the magnitude of difference (i.e. the effect size) in variance components between both life-stages.
#delta Va and Vw
post.data$delta_Va = with(post.data, Va_high - Va_low)
post.data$delta_Vw = with(post.data, Vw_high - Vw_low)
post.data$delta_R = with(post.data, R_high - R_low)

#Difference in repeatability
mean(post.data$delta_R)
#--0.05577315
rethinking::HPDI(post.data$delta_R, prob = 0.95)
# |0.95      0.95| 
#   --0.3580758  0.2405146

#Difference in among-individual variability
mean(post.data$delta_Va)
# -0.03237488
rethinking::HPDI(post.data$delta_Va, prob = 0.95)
# |0.95        0.95| 
#   -0.2147235  0.1373455 

#Difference in within-individual variability
mean(post.data$delta_Vw)
#0.008576943
rethinking::HPDI(post.data$delta_Vw, prob = 0.95)
# |0.95       0.95| 
# -0.08604639  0.10572344  

#### Using Chelsea's defined score ####
#Sites were separated dependent on how close they were to areas with heavy human traffic. We were interested in whether squirrels from sites with higher human traffic would have larger variance in their FIDs than squirrels from sites with lower human disturbance. 

class(FIDs$high_disturbance)
FIDs$high_disturbance = as.character(as.numeric(FIDs$high_disturbance))


#High disturbance sites = 1, Low disturbance sites = 0 

FIDs %>%
  group_by(high_disturbance)%>%
  summarise(n_indiv = length(unique(focal_animal)),
            mean_FID = mean(FID_m),
            stdev_FID = sd(FID_m))

FIDs %>%
  group_by(high_disturbance, sex)%>%
  summarise(n_indiv = length(unique(focal_animal)))
#sexes seem to be pretty evenly split between sites

#number of trials per disturbance site

#I think we may need to include home_site as random effect in our model because some squirrels depending on year may switch between sites, and not all high disturbance sites are equally disturbed

#> ---- Model 1: Null model ----
disturb.null = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + start_distance_m + high_disturbance + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  set_prior("normal(0,20)", class = "Intercept"),
  set_prior("cauchy(5,15)", class = "sd"),
  set_prior("cauchy(5,15)", class = "sigma"),
  set_prior("normal(0,20)", class = "b"))

#run the model
disturb.null.model <- brm(disturb.null,
                          data = FIDs,
                          family = gaussian,
                          cores = 4, 
                          chains = 4, 
                          warmup = 1000, 
                          iter = 10000, 
                          prior = prior1,
                          control = list(adapt_delta = 0.99)) 

disturb.null.model <- add_criterion(disturb.null.model, 'loo', moment_match = TRUE)
brms::pp_check(disturb.null.model)
summary(disturb.null.model)
cond.disturb = conditional_effects(disturb.null.model, robust = FALSE)

ggplot(cond.disturb$high_disturbance, aes(x = high_disturbance, y = estimate__)) + 
  geom_point(data = FIDs, aes(x = high_disturbance, y = FID_sqrt), position  = position_jitter(0.25), inherit.aes = F, size = 1.5, color = 'blue', alpha = 0.3)+
  geom_point(size = 4, pch = 21, fill = 'blue')+
  geom_errorbar(aes(ymin = lower__, ymax = upper__), width = 0.25, color = 'blue')+
  xlab("")+
  ylab("Flight initiation distance (m)")+
  theme_classic()+
    themeof

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models")
saveRDS(disturb.null.model, file = 'disturb.null.model')
##use the below code the re-load the model when you need to
#disturb.null.model = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models/disturb.null.model")

#as expected low disturbance sites have higher FIDs

#> ---- Model 2: only among-individual variance (Va) is calculated separately for each site type ----
disturb.among = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + start_distance_m + high_disturbance + trap_prop + (1|home_site_w_spatial) + (0+high_disturbance||focal_animal) + (1|walker), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  set_prior("normal(0,20)", class = "Intercept"),
  set_prior("cauchy(5,15)", class = "sd"),
  set_prior("cauchy(5,15)", class = "sigma"),
  set_prior("normal(0,20)", class = "b"))

#run the model
disturb.among.model <- brm(disturb.among,
                           data = FIDs,
                           family = gaussian,
                           cores = 4, 
                           chains = 4, 
                           warmup = 1000, 
                           iter = 10000, 
                           prior = prior1,
                           control = list(adapt_delta = 0.99)) 

disturb.among.model <- add_criterion(disturb.among.model, 'loo', moment_match = TRUE)
summary(disturb.among.model)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models")
saveRDS(disturb.among.model, file = 'disturb.among.model')
#disturb.among.model = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models/disturb.among.model")

#> ---- Model 3: only within-individual variances (Vw) in calculated seperately for each site ----
disturb.within = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + start_distance_m + high_disturbance + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), sigma ~ 0 + high_disturbance, family = 'Gaussian')

prior2 <- c(
  set_prior("normal(0,20)", class = "Intercept"),
  set_prior("normal(0,20)", class = "b"),
  set_prior("cauchy(5,15)", class = "sd"))

#run the model
disturb.within.model <- brm(disturb.within,
                            data = FIDs,
                            family = gaussian,
                            cores = 4, 
                            chains = 4, 
                            warmup = 1000, 
                            iter = 10000, 
                            prior = prior2,
                            control = list(adapt_delta = 0.99)) 

disturb.within.model <- add_criterion(disturb.within.model, 'loo', moment_match = TRUE)
summary(disturb.within.model)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models")
saveRDS(disturb.within.model, file = 'disturb.within.model')
#disturb.within.model = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models/disturb.within.model")

#> ---- Model 4: Va and Vw is calculated separately for each site ----
disturb.allvar= bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + start_distance_m + high_disturbance + trap_prop + (1|home_site_w_spatial) + (0+high_disturbance||focal_animal) + (1|walker), sigma ~ 0 + high_disturbance, family = 'Gaussian')

prior3 <- c(
  set_prior("normal(0,20)", class = "b"),
  set_prior("cauchy(5,15)", class = "sd"))

disturb.allvar.model <- brm(disturb.allvar,
                            data = FIDs,
                            family = gaussian,
                            cores = 4, 
                            chains = 4, 
                            warmup = 1000, 
                            iter = 10000, 
                            prior = prior3,
                            control = list(adapt_delta = 0.99)) 

disturb.allvar.model <- add_criterion(disturb.allvar.model, 'loo', moment_match = TRUE)

#save the model
setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models")
saveRDS(disturb.allvar.model, file = 'disturb.allvar.model')
#disturb.allvar.model = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/site_disturbance_models/disturb.allvar.model")

#> ---- Model comparison to compare our 4 candidate models ----
model.comp = loo_compare(disturb.null.model, disturb.among.model, disturb.within.model, disturb.allvar.model, criterion = 'loo')
print(model.comp, simplify = F)
# elpd_diff se_diff elpd_loo se_elpd_loo p_loo  se_p_loo looic 
# disturb.allvar.model    0.0       0.0  -322.4     17.0        63.4    5.6    644.9
# disturb.among.model    -0.2       2.6  -322.7     16.1        63.7    5.0    645.4
# disturb.within.model   -1.2       2.5  -323.6     17.9        64.0    5.5    647.3
# disturb.null.model     -1.8       2.8  -324.3     17.1        63.6    5.0    648.5
# se_looic
# disturb.allvar.model   34.0  
# disturb.among.model    32.3  
# disturb.within.model   35.8  
# disturb.null.model     34.1

#> ---- Aggregate variance and repeatability for each site ----
get_variables(disturb.allvar.model)[1:30]
post.data = disturb.allvar.model %>% spread_draws(sd_focal_animal__high_disturbance0,sd_focal_animal__high_disturbance1, sd_home_site__Intercept, sd_walker__Intercept, b_sigma_high_disturbance0, b_sigma_high_disturbance1)

#Among-individual variance for each life_stage
post.data$Va_high <- post.data$sd_focal_animal__high_disturbance1^2
post.data$Va_low <- post.data$sd_focal_animal__high_disturbance0^2

#Within-individual variance for each life stage
post.data$Vw_high = exp(post.data$b_sigma_high_disturbance1)^2
post.data$Vw_low = exp(post.data$b_sigma_high_disturbance0)^2

#Repeatability
post.data = post.data %>%
  dplyr::mutate(R_high = Va_high/(Va_high + Vw_high),
                R_low = Va_low/(Va_low + Vw_low))

#High site repeatability
mean(post.data$R_high)
# 0.3923218
rethinking::HPDI(post.data$R_high, prob = 0.95)
# |0.95     0.95| 
# 0.2091947 0.5704514 

#Low site repeatability
mean(post.data$R_low)
# 0.1303906
round(rethinking::HPDI(post.data$R_low, prob = 0.95), 3)
# |0.95 0.95| 
# 0.0   0.3 

#Among-individual variance
disturb.Va = post.data %>%
  dplyr::select(starts_with("Va")) %>%
  pivot_longer(cols = starts_with("Va"),
               names_to = 'Disturb_rating',
               names_prefix = "Va_",
               values_to ="Estimate")

(disturb.Va = disturb.Va %>%
    dplyr::group_by(Disturb_rating)%>%
    dplyr::summarise(Va = round(mean(Estimate), 3),
                     lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                     upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
    as.data.frame())
# Disturb_rating    Va lowerCI upperCI
# 1           high 0.151   0.051   0.267
# 2            low 0.048   0.000   0.122

#Within-individual variance
disturb.Vw = post.data %>%
  dplyr::select(starts_with("Vw")) %>%
  pivot_longer(cols = starts_with("Vw"),
               names_to = 'Disturb_rating',
               names_prefix = "Vw_",
               values_to ="Estimate")

(disturb.Vw = disturb.Vw %>%
    dplyr::group_by(Disturb_rating)%>%
    dplyr::summarise(Vw = round(mean(Estimate), 3),
                     lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
                     upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
    as.data.frame())

# Disturb_rating    Vw lowerCI upperCI
# 1           high 0.222   0.173   0.273
# 2            low 0.307   0.236   0.385

# We can also calcuate the magnitude of difference (i.e. the effect size) in variance components between both disturbance sites.

#delta Va and Vw
post.data$delta_Va = with(post.data, Va_high - Va_low)
post.data$delta_Vw = with(post.data, Vw_high - Vw_low)
post.data$delta_R = with(post.data, R_high - R_low)

#Difference in repeatability
mean(post.data$delta_R)
#0.2619313
rethinking::HPDI(post.data$delta_R, prob = 0.95)
# |0.95        0.95| 
#   -0.001539563  0.508348572 

#Difference in among-individual variability
mean(post.data$delta_Va)
# 0.1022216
rethinking::HPDI(post.data$delta_Va, prob = 0.95)
# |0.95       0.95| 
#   -0.03686949  0.24888482 


#Difference in within-individual variability
mean(post.data$delta_Vw)
#-0.08510165
rethinking::HPDI(post.data$delta_Vw, prob = 0.95)
# |0.95        0.95| 
#   -0.179943686  0.007511012


#### What determines whether a squirrel shelters or not ####
FIDs = FIDs %>%
  mutate(sheltered = ifelse(time_until_emerge == 0, '0', '1'))
FIDs$sheltered[is.na(FIDs$sheltered)] <- 0
class(FIDs$sheltered)
FIDs$sheltered <- as.numeric(FIDs$sheltered)

FIDs %>% group_by(sheltered) %>% summarise(ntrials = length(day))



#> ---- bernoulli model ----
shelter.bf = bf(sheltered ~ year + sex + life_stage + trial_num + conspecifics_5m_num + cover + distance_to_safety_m + start_distance_m + FID_m + home_site_disturbance_by_year + trap_prop + (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = bernoulli(link='logit'))

get_prior(shelter.bf, data = FIDs)
sheltered.model <- brm(shelter.bf,  
                          data=FIDs, 
                          family = bernoulli(link='logit'),
                          warmup = 1000, 
                          iter = 10000, 
                          chains = 4, 
                          cores=4,
                          seed = 123,
                       control = list(adapt_delta = 0.95))

brms::pp_check(sheltered.model)


setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs")
saveRDS(sheltered.model, file = 'sheltered.model')
##use the below code the re-load the model when you need to
#sheltered.model = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/sheltered.model")


summary(sheltered.model, prob = 0.95)


#Caterpillar plot to check model convergence
mcmc_plot(sheltered.model, type = "trace")

#Check autocorrelation
mcmc_plot(sheltered.model, type = "acf_bar")

mcmc_plot(sheltered.model, type = "areas", prob = 0.95)

#### ~ Repeatability of the decision to flee or flee into shelter ####

#Accroding to Nakagawa for binomial distribution, the within-individual variance can be estimated as pi^2/3

get_variables(sheltered.model)
post.data.shelter = sheltered.model %>% 
  spread_draws(sd_focal_animal__Intercept, 
               sd_home_site_w_spatial__Intercept, 
               sd_walker__Intercept )

post.data.shelter$Va_focal_animal = post.data.shelter$sd_focal_animal__Intercept^2
post.data.shelter$Va_homesite = post.data.shelter$sd_home_site_w_spatial__Intercept^2
post.data.shelter$Va_walker = post.data.shelter$sd_walker__Intercept^2
Vw = (pi^2/3)

shelter.repeat = post.data.shelter %>%
  summarise(Radj = Va_focal_animal / (Va_focal_animal + Va_homesite + Va_walker + Vw),
            Runadj = Va_focal_animal / (Va_focal_animal + Vw))


# Adjusted repeatability
mean(shelter.repeat$Radj)
#0.07546859

round(rethinking::HPDI(shelter.repeat$Radj, prob = 0.95), 3)
# |0.95 0.95| 
# 0.000 0.188

# Unadjusted repeatability
mean(shelter.repeat$Runadj)
#0.08801034

round(rethinking::HPDI(shelter.repeat$Runadj, prob = 0.95),3)
# |0.95 0.95| 
# 0.000 0.215 


#### Conspecific presence ####

### What proportion of FIDs were conspeficis present ###

class(FIDs$conspecifics_pres)
FIDs %>% 
  group_by(conspecifics_pres)%>%
  summarise(trials = length(day),
            total = 267 + 117,
            prop = trials/total)

# conspecifics_pres trials total  prop
# <chr>              <int> <dbl> <dbl>
# 1 0                    267   384 0.695
# 2 1                    117   384 0.305

#~70% were trials in isolation

FIDs$conspecifics_5m_num = as.integer(FIDs$conspecifics_5m_num)

FIDs %>% 
  group_by(conspecifics_5m_num)%>%
  summarise(trials = length(day),
            prop = trials/384)

# conspecifics_5m_num trials    prop
# <chr>                <int>   <dbl>
# 1 0                      267 0.695  
# 2 1                       93 0.242  
# 3 2                       21 0.0547 
# 4 3                        3 0.00781

con_pres = FIDs %>%
  filter(conspecifics_pres == "1")

#Alot of individuals tested with conspecificis didn't have the surrounding conspecifics also tested. Chelsea did not that they only really started recording the FID of all conspeficific near each other in 2019

#For groups where multiple individuals within those groups were tested, I'm going to filter out the individuals with the  longest FID_m as its likely that these individuals influence the FIDs of surrounding individuals

FIDs = FIDs %>%
  group_by(group_id)%>%
  mutate(max_FID = max(FID_m),
            min_FID = min(FID_m),
            mean_FID = mean(FID_m))

#Now im filtering the data to only include individuals within groups with the highest FID. All other individuals within those groups will be removed from the dataset

FIDs_filt = FIDs %>%
  group_by(group_id)%>%
  filter(FID_m == max_FID)

#As a result of the filtering we lost 38 data points which represents ~10% of the data

#now we re-run our final general model with this filtered data

#> ---- Model1: General model ----
FID.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + cover + distance_to_safety_m + start_distance_m*home_site_disturbance_by_year + trap_prop+ (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'Gaussian')

#Setup the prior - this is an uninformative prior
prior1 <- c(
  #intercept prior
  set_prior("normal(0,20)", class = "Intercept"),
  #ID random intercept prior
  set_prior("cauchy(5,15)", class = "sd"),
  #residual priors
  set_prior("cauchy(5,15)", class = "sigma"),
  #coefficient priors for population-level effects
  set_prior("normal(0,20)", class = "b"))

#run the model
FIDs_filt.model1 <- brm(FID.bf,
                   data = FIDs_filt,
                   family = gaussian,
                   cores = 4, 
                   chains = 4, 
                   warmup = 1000, 
                   iter = 10000, 
                   prior = prior1,
                   control = list(adapt_delta = 0.99)) 

FIDs_filt.model1 <- add_criterion(FIDs_filt.model1, 'loo', moment_match = TRUE)

summary(FIDs_filt.model1)

setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/filtered_models")
saveRDS(FIDs_filt.model1, file = 'FIDs_filt.model1')
##use the below code the re-load the model when you need to
#FIDs_filt.model1 = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/filtered_models/FIDs_filt.model1")

FID2.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + cover + distance_to_safety_m  + start_distance_m + home_site_disturbance_by_year + trap_prop+ (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'Gaussian')


#run the model
FIDs_filt.model2 <- brm(FID2.bf,
                        data = FIDs_filt,
                        family = gaussian,
                        cores = 4, 
                        chains = 4, 
                        warmup = 1000, 
                        iter = 10000, 
                        prior = prior1,
                        control = list(adapt_delta = 0.99)) 

FIDs_filt.model2 <- add_criterion(FIDs_filt.model2, 'loo', moment_match = TRUE)

summary(FIDs_filt.model2)

setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/filtered_models")
saveRDS(FIDs_filt.model2, file = 'FIDs_filt.model2')
##use the below code the re-load the model when you need to
FIDs_filt.model2 = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/filtered_models/FIDs_filt.model2")
cond.plots = plot(conditional_effects(FIDs_filt.model2, robust = FALSE), plot = F)
cond.plots$conspecifics_5m_num

#keep individuals with the min FID, instead of the max FID
FIDs_filt_min = FIDs %>%
  group_by(group_id)%>%
  filter(FID_m == min_FID)

FID_min.bf = bf(FID_sqrt ~ year + sex + life_stage + trial_num + conspecifics_5m_num + cover + distance_to_safety_m + sheltered + start_distance_m + home_site_disturbance_by_year + trap_prop+ (1|home_site_w_spatial) + (1|focal_animal) + (1|walker), family = 'Gaussian')

#run the model
FIDs_filt_min.model2 <- brm(FID_min.bf,
                        data = FIDs_filt_min,
                        family = gaussian,
                        cores = 4, 
                        chains = 4, 
                        warmup = 1000, 
                        iter = 10000, 
                        prior = prior1,
                        control = list(adapt_delta = 0.99)) 

FIDs_filt_min.model2 <- add_criterion(FIDs_filt_min.model2, 'loo', moment_match = TRUE)

summary(FIDs_filt_min.model2)

setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/filtered_models")
saveRDS(FIDs_filt_min.model2, file = 'FIDs_filt_min.model2')
##use the below code the re-load the model when you need to
FIDs_filt.model2 = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/filtered_models/FIDs_filt_min.model2")



# #### TRAPPABILITY ####
# #Seperate from the FID response, but we can test repeatability of trappability, or at least, look at its correlation with FID
# 
# #First I'm just going to look at whether proportion of days trapped is correlated to total number of times trapped.
# 
# #Im going to filter the data to only include 1 data point per individuals
# FIDs_indiv = FIDs %>%
#   filter(trial_num == "1")
# 
# cor.test(FIDs_indiv$trap_sum_all, FIDs_indiv$trap_prop, method = 'spearman')
# # 
# # Spearman's rank correlation rho
# # 
# # data:  FIDs_indiv$trap_sum_all and FIDs_indiv$trap_prop
# # S = 9329.2, p-value < 2.2e-16
# # alternative hypothesis: true rho is not equal to 0
# # sample estimates:
# #       rho 
# # 0.9567734 
# 
# #This suggests to me that both columns are telling us the same thing, so I think we can use either metric as trappability score for each squirrel. 
# 
# #So now I'm going to update our previous model to include trappability to see if it has a relationship with FID_m
# 
# FID.model.trap = update(FIDs.model1.5, formula = ~.  + trap_prop - Sheltered - area_tested_disturbance_by_year + home_site_disturbance_by_year, newdata = FIDs, warmup = 1000, iter = 10000, cores = 4)
# summary(FID.model.trap)
# cond.plots = plot(conditional_effects(FID.model.trap, robust = FALSE), plot = F)
# cond.plots$home_site_disturbance_by_year
# cond.plots$trap_prop
# #Bolder individuals come from more disturbed sites and are easier to catch
# cond.plots$year
# class(FIDs$year)
# cond.plots$start_distance_m
# 
# #### Time until trapped analysis ####
# #load in the trapping data 
# ## Load your dataset in ##
# setwd("~/Desktop/Squirrels/FID_squirrel project")
# TRAP <- read.csv("Trapping_data.csv", header = T, strip.white = T)
# 
# str(TRAP)
# TRAP$day = as.character(as.numeric(TRAP$day))
# TRAP$week = as.character(as.numeric(TRAP$week))
# TRAP$year = as.character(as.numeric(TRAP$year))
# TRAP$adult = as.character(as.numeric(TRAP$adult))
# 
# 
# TRAP %>% 
#   summarise(n_indiv = length(unique(focal_animal)))
# # n_indiv
# # 1     335
# 
# TRAP = TRAP %>% 
#   group_by(focal_animal) %>%
#   mutate(trap_trials = length(unique(date)))
# 
# range(TRAP$trap_trials)
# #1 40
# summary(TRAP$trap_trials)
# # Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# # 1.00    7.00   13.00   16.33   25.00   40.00 
# 
# range(TRAP$mins_trapped)
# 
# 
# 
# #> ---- Explore the time until trapping data ----
# hist(TRAP$mins_trapped)
# 
# #some weird negative data, turns out this corresponds to some weird hastags in the csv, so I'm just going to remove these. Which I just deleted from the csv file
# TRAP = TRAP %>%
#   filter(!is.na(mins_trapped), !is.na(day), !is.na(adult), !is.na(sex), !is.na(focal_animal))
# 
# hist(TRAP$mins_trapped) #skewed
# hist(log(TRAP$mins_trapped))
# hist(sqrt(TRAP$mins_trapped)) #hmm not amazing
# 
# #We can also use a Cleveland dotchart
# dotchart(TRAP$mins_trapped)
# dotchart(sqrt(TRAP$mins_trapped))
# 
# 
# #let's run a basic model and look at the distribution of the residuals without the transformation
# Trap.model <- lmer(mins_trapped ~ adult + sex + (1|focal_animal) + (1|day), data = TRAP)
# drop1(Trap.model, test = 'Chi') 
# #note that walker, sex and presence of conspecifics might be important
# mod.resid = resid(Trap.model)
# qqnorm(mod.resid)
# qqline(mod.resid) # not good
# 
# #now with the square-root transformation
# sqrtTRAP.model <- lmer(sqrt(mins_trapped) ~ adult + sex + (1|focal_animal) + (1|day), data = TRAP)
# drop1(sqrtTRAP.model, test = 'Chi') 
# #note that sex and presence of conspecifics have become more important
# summary(sqrtTRAP.model)
# mod.resid = resid(sqrtTRAP.model)
# qqnorm(mod.resid)
# qqline(mod.resid) # WORSE
# 
# # I don't think gaussian is appropriate. I think the problem is that mins_trapped is not a continuous variable because squrirrels are clustered in trapping times (i.e. squirrels have the same mins_trapped, because the traps were checked at specific times). The other problem I hadn't previously considered was that mins_trapped doesn't take into account the day that certain squirrels were not caught. For example, on day 1 and day 2 a squirrel could be captured in the first 5 minutes that the traps are opened, but on day 3 that squirrel is never caught. However, the analysis as it is, would say that this is squirrels trapping success is highly consistent, when really its not that consistent (i.e. because it wasn't caught on day 3).                    
# 
# 
# #### Split variance between life-stage and sex ####
# #Need to create two dataframes one for male and one for female
# 
# males = FIDs%>%
#   filter(sex == 0)
# 
# females = FIDs %>%
#   filter(sex == 1)
# 
# 
# #### MALES ####
# #> ---- Null model ----
# male.null.bf = bf(FID_sqrt ~ year + life_stage + conspecifics_5m_num + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|focal_animal) + (1|walker), family = 'Gaussian')
# 
# #Setup the prior - this is an uninformative prior
# prior1 <- c(
#   #intercept prior
#   set_prior("normal(0,20)", class = "Intercept"),
#   #ID random intercept prior
#   set_prior("cauchy(5,15)", class = "sd"),
#   #residual priors
#   set_prior("cauchy(5,15)", class = "sigma"),
#   #coefficient priors for population-level effects
#   set_prior("normal(0,20)", class = "b"))
# 
# #run the model
# male.null.model <- brm(male.null.bf,
#                  data = males,
#                  family = gaussian,
#                  cores = 4, 
#                  chains = 4, 
#                  warmup = 1000, 
#                  iter = 10000, 
#                  prior = prior1,
#                  control = list(adapt_delta = 0.995)) 
# 
# male.null.model <- add_criterion(male.null.model, 'loo', moment_match = TRUE)
# summary(male.null.model)
# cond.plots.males = plot(conditional_effects(male.null.model, robust = FALSE), plot = F)
# cond.plots.males$life_stage
# #Males adults had higher FIDs than juvenile males
# 
# #save the model
# setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_age_models")
# saveRDS(male.null.model, file = 'male.null.model')
# 
# 
# #> ---- Model 2: only among-individual variance (Va) is calculated seperately for each life_stage ----
# male.among = bf(FID_sqrt ~ year + life_stage + conspecifics_5m_num + start_distance_m + home_site_disturbance_by_year + trap_prop + (0+life_stage||focal_animal) + (1|walker), family = 'Gaussian')
# 
# #Setup the prior - this is an uninformative prior
# prior1 <- c(
#   set_prior("normal(0,20)", class = "Intercept"),
#   set_prior("cauchy(5,15)", class = "sd"),
#   set_prior("cauchy(5,15)", class = "sigma"),
#   set_prior("normal(0,20)", class = "b"))
# 
# #run the model
# male.among.model <- brm(male.among,
#                         data = males,
#                         family = gaussian,
#                         cores = 4, 
#                         chains = 4, 
#                         warmup = 1000, 
#                         iter = 10000, 
#                         prior = prior1,
#                         control = list(adapt_delta = 0.995)) 
# 
# male.among.model <- add_criterion(male.among.model, 'loo', moment_match = TRUE)
# 
# #save the model
# setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_age_models")
# saveRDS(male.among.model, file = 'male.among.model')
# #load the model
# #FIDs.model.among = readRDS(file = "~/Desktop/Squirrels/FID_squirrel project/model_outputs/FIDs.model.among")
# 
# 
# #> ---- Model 3: only within-individual variances (Vw) in calculated seperately for each life_stage ----
# male.within = bf(FID_sqrt ~ year + life_stage + conspecifics_5m_num + start_distance_m + home_site_disturbance_by_year + trap_prop + (1|focal_animal) + (1|walker), sigma ~ 0+life_stage, family = 'Gaussian')
# 
# prior1 <- c(
#   #Intercept
#   set_prior("normal(0,20)", class = "Intercept"),
#   #coefficient priors for population-level effects
#   set_prior("normal(0,20)", class = "b"),
#   #sd
#   set_prior("cauchy(5,15)", class = "sd"))
# 
# #run the model
# male.within.model <- brm(male.within,
#                          data = males,
#                          family = gaussian,
#                          cores = 4, 
#                          chains = 4, 
#                          warmup = 1000, 
#                          iter = 10000, 
#                          prior = prior1,
#                          control = list(adapt_delta = 0.999)) 
# 
# male.within.model <- add_criterion(male.within.model, 'loo', moment_match = TRUE)
# 
# #save the model
# setwd("~/Desktop/Squirrels/FID_squirrel project/model_outputs/sex_age_models")
# saveRDS(male.within.model, file = 'male.within.model')
# 
# #> ---- Model 4: Va and Vw is calculated seperately for each life_stage ----
# male.bothvar = bf(FID_sqrt ~ year + life_stage + conspecifics_5m_num + start_distance_m + home_site_disturbance_by_year + trap_prop + (0+life_stage||focal_animal) + (1|walker), sigma ~ 0+life_stage, family = 'Gaussian')
# 
# 
# prior1 <- c(
#   #coefficient priors for population-level effects
#   set_prior("normal(0,20)", class = "b"),
#   #sd
#   set_prior("cauchy(5,15)", class = "sd"))
# 
# male.bothvar.model <- brm(male.bothvar,
#                           data = males,
#                           family = gaussian,
#                           cores = 4, 
#                           chains = 4, 
#                           warmup = 1000, 
#                           iter = 10000, 
#                           prior = prior1,
#                           control = list(adapt_delta = 0.99)) 
# 
# male.bothvar.model <- add_criterion(male.bothvar.model, 'loo', moment_match = TRUE)
# 
# #>---- Model comparison ----
# model.comp = loo_compare(male.null.model, male.among.model, male.within.model, male.bothvar.model, criterion = 'loo')
# print(model.comp, simplify = F)
# #                 elpd_diff se_diff elpd_loo se_elpd_loo p_loo se_p_loo looic se_looic
# # male.within.model    0.0       0.0   -68.9      6.8        18.7   2.6    137.8  13.5   
# # male.bothvar.model  -1.2       0.9   -70.1      7.1        21.4   3.0    140.2  14.1   
# # male.null.model     -1.5       2.6   -70.4      7.1        17.7   2.9    140.8  14.3   
# # male.among.model    -2.6       2.9   -71.5      7.5        21.1   3.6    143.0  15.1 
# 
# #> ---- Aggregate variance and repeatability for each site ----
# get_variables(male.bothvar.model)[1:30]
# post.data = male.bothvar.model %>% spread_draws(sd_focal_animal__life_stage0,sd_focal_animal__life_stage1, b_sigma_life_stage0,b_sigma_life_stage1, sd_walker__Intercept)
# 
# #Among-individual variance for each life_stage
# post.data$Va_juv <- post.data$sd_focal_animal__life_stage0^2
# post.data$Va_adult <- post.data$sd_focal_animal__life_stage1^2
# 
# #Among-walker variance
# post.data$Va_walker <- post.data$sd_walker__Intercept^2
# 
# #Within-individual variance for each life stage
# post.data$Vw_juv = exp(post.data$b_sigma_life_stage0)^2
# post.data$Vw_adult = exp(post.data$b_sigma_life_stage1)^2
# 
# #Repeatability
# post.data = post.data %>%
#   dplyr::mutate(R_juv = Va_juv/(Va_juv + Vw_juv ),
#                 R_adult = Va_adult/(Va_adult + Vw_adult))
# 
# #Male juvenile repeatability
# mean(post.data$R_juv)
# # 0.1375445
# round(rethinking::HPDI(post.data$R_juv, prob = 0.95),3)
# # |0.95 0.95| 
# # 0.000 0.383
# 
# #Male adult repeatability
# mean(post.data$R_adult)
# # 0.1463872
# round(rethinking::HPDI(post.data$R_adult, prob = 0.95), 3)
# # |0.95 0.95| 
# #   0.000 0.483 
# 
# #Among-individual variance
# male.lifestage.Va = post.data %>%
#   dplyr::select(starts_with("Va")) %>%
#   pivot_longer(cols = starts_with("Va"),
#                names_to = 'life_stage',
#                names_prefix = "Va_",
#                values_to ="Estimate")
# 
# (male.lifestage.Va = male.lifestage.Va %>%
#     dplyr::group_by(life_stage)%>%
#     dplyr::summarise(Va = round(mean(Estimate), 3),
#                      lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
#                      upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
#     as.data.frame())
# # life_stage    Va lowerCI upperCI
# # 1      adult 0.083       0   0.311
# # 2        juv 0.031       0   0.097
# # 3     walker 0.206       0   0.580
# 
# #Within-individual variance
# male.lifestage.Vw = post.data %>%
#   dplyr::select(starts_with("Vw")) %>%
#   pivot_longer(cols = starts_with("Vw"),
#                names_to = 'life_stage',
#                names_prefix = "Vw_",
#                values_to ="Estimate")
# 
# (male.lifestage.Vw = male.lifestage.Vw %>%
#     dplyr::group_by(life_stage)%>%
#     dplyr::summarise(Vw = round(mean(Estimate), 3),
#                      lowerCI = round(rethinking::HPDI(Estimate, prob = 0.95)[1], 3),
#                      upperCI = round(rethinking::HPDI(Estimate, prob = 0.95)[2], 3))%>%
#     as.data.frame())
# 
# # `summarise()` ungrouping output (override with `.groups` argument)
# # life_stage    Vw lowerCI upperCI
# # 1      adult 0.395   0.168    0.68
# # 2        juv 0.172   0.107    0.25
# 
# # We can also calcuate the magnitude of difference (i.e. the effect size) in variance components between both disturbance sites.
# 
# #delta Va and Vw
# post.data$delta_Va = with(post.data, Va_high - Va_low)
# post.data$delta_Vw = with(post.data, Vw_high - Vw_low)
# post.data$delta_R = with(post.data, R_high - R_low)
# 
# #Difference in repeatability
# mean(post.data$delta_R)
# #0.2619313
# rethinking::HPDI(post.data$delta_R, prob = 0.95)
# # |0.95        0.95| 
# #   -0.001539563  0.508348572 
# 
# #Difference in among-individual variability
# mean(post.data$delta_Va)
# # 0.1022216
# rethinking::HPDI(post.data$delta_Va, prob = 0.95)
# # |0.95       0.95| 
# #   -0.03686949  0.24888482 
# 
# 
# #Difference in within-individual variability
# mean(post.data$delta_Vw)
# #-0.08510165
# rethinking::HPDI(post.data$delta_Vw, prob = 0.95)
# # |0.95        0.95| 
# #   -0.179943686  0.007511012
# 
# 
# 
# #> ---- Females ----


###### Remaining graphs ###
#-------------------------------------
##  FID GRAPHS ##
#-------------------------------------

library(ggplot2)
library(ggpubr)
library(readxl)

#Upload data
d <- read_excel("FID_Summary_2018-2019.xlsx")

#Figure 1 graphs
#Graph of human activity & FID
c <- ggplot(d, aes(x = home_site_disturbance_by_year, y = FID_m)) + 
  theme_bw() + labs(x = "Home site human activity score", y = "Flight initiation distance (m)") +
  stat_smooth(method = "lm", formula = y~x, se=TRUE) +
  theme(text = element_text(size = 13.5))

#Graph of FID & trapability
a <- ggplot(d, aes(x = trap_prop, y = FID_m)) + 
  theme_bw() +
  stat_smooth(method = "lm", formula = y~x, se = TRUE) +
  labs(x = "Trappability", y = "Flight initiation distance (m)") +
  theme(text = element_text(size = 13.5))

#Graph FID & group size
b <- ggplot(d, aes(x = factor(conspecifics_5m_num), y = FID_m)) + theme_bw() +
  geom_boxplot() +
  labs(x = "Number of conspecifics", y = "Flight initiation distance (m)") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        text = element_text(size = 13.5))

#Combine individual plots
ggarrange(c, a, b, labels = c("A", "B", "C"), ncol = 1, nrow =3)

#Figure 2 graphs
d2 <- subset(d, time_until_emerge_min != "NA")
d2 <- subset(d2, go_into_burrow == "1" | out_of_site == "1")
#Emergence time & Trappability
e <- ggplot(d2, aes(x = trap_prop, y = as.numeric(time_until_emerge_min))) + 
  theme_bw() +
  stat_smooth(method = "lm", formula = y~x) +
  labs(x = "Trappability", y = "Emergence time (min)") +
  theme(text = element_text(size = 13.5))

#Emergence time & human activity
f <- ggplot(d2, aes(x = home_site_disturbance_by_year, y = time_until_emerge_min)) + 
  stat_smooth(method = "lm", formula = y~x) +
  theme_bw() + labs(x = "Home site human activity score", y = "Emergence time (min)") + 
  theme(text = element_text(size = 13.5))

ggarrange(f, e, labels = c("A", "B"), ncol = 2, nrow =1)
