library(Hmisc)
library(writexl)
library(ggplot2)
library(dplyr)
library(lmerTest)
library(optimx)
library(lme4)
library(PerformanceAnalytics)
library(effects)
library(MuMIn)
library(ggeffects) 
library(splines)
library(glmtoolbox)
library(afex)
library(nloptr)
library(dfoptim)
library(psych)
library(ordinal)
library(dplyr)
library(cowplot)

Setting of the directory

You need to set the directory by selecting the file where the dataset are located.

Load dataset in R

df <- read.table("Behaviors.txt", header = T)

The dataset has 1135 entries corresponding to the number of trial made on the 730 turtle sampled. However, all three behaviors were not always recorded at each trial. Thus, the number of observations for each behavior is less than the number of entries.

Preparation of the data for the mixed models

Verification of the format of each data

We want to make sure that the data are in the good format for the analyses.

# Risk-taking behaviors that will be used as response variable in their respective mixed model.

df$active.defenses <- as.numeric(df$active.defenses)
df$escape <- as.numeric(df$escape)
df$log.escape <- log(df$escape + 1) # log(x+1) transformation of the escape latency to be able to use a Gaussian distribution in the mixed model.
df$emergence <- as.numeric(df$emergence)

# Predictor variables

df$Year <- as.factor(df$Year)
df$JulianDay <- as.numeric(df$JulianDay)
df$Hour.active.defenses <- as.numeric(df$Hour.active.defenses)
df$Hour.Platform <- as.numeric(df$Hour.Platform)
df$Order.trial <- as.numeric(df$Order.trial)
df$CL <- as.numeric(df$CL)
df$Wind <- as.numeric(df$Wind)
df$ID.Temperature <- as.numeric(df$ID.Temperature)
df$Site <- as.factor(df$Site)

df$Distance.channel <- as.numeric(df$Distance.channel)
df$Vessels.activities.2019 <- as.numeric(df$Vessels.activities.2019)

df$House.200 <- as.numeric(df$House.200)
df$House.300 <- as.numeric(df$House.300)
df$House.400 <- as.numeric(df$House.400)

df$Urban.200 <- as.numeric(df$Urban.200)
df$Urban.600 <- as.numeric(df$Urban.600)
df$Urban.900 <- as.numeric(df$Urban.900)

Standardization of the predictor variables

All continuous predictor variables were standardized (mean zero, unit variance) before model selection.

df$JulianDay.scaled <- scale(df$JulianDay, center=TRUE, scale=TRUE)
df$Hour.active.defenses.scaled <- scale(df$Hour.active.defenses, center=TRUE, scale=TRUE)
df$Hour.Platform.scaled <- scale(df$Hour.Platform, center=TRUE, scale=TRUE)
df$Order.trial.scaled <- scale(df$Order.trial, center=TRUE, scale=TRUE)
df$CL.scaled <- scale(df$CL, center=TRUE, scale=TRUE)
df$Wind.scaled <- scale(df$Wind, center=TRUE, scale=TRUE)
df$ID.Temperature.scaled <- scale(df$ID.Temperature, center=TRUE, scale=TRUE)

df$Distance.channel.scaled <- scale(df$Distance.channel, center=TRUE, scale=TRUE)
df$Vessels.activities.2019.scaled <- scale(df$Vessels.activities.2019, center=TRUE, scale=TRUE)

df$House.200.scaled <- scale(df$House.200, center=TRUE, scale=TRUE)
df$House.300.scaled <- scale(df$House.300, center=TRUE, scale=TRUE)
df$House.400.scaled <- scale(df$House.400, center=TRUE, scale=TRUE)

df$Urban.200.scaled <- scale(df$Urban.200, center=TRUE, scale=TRUE)
df$Urban.600.scaled <- scale(df$Urban.600, center=TRUE, scale=TRUE)
df$Urban.900.scaled <- scale(df$Urban.900, center=TRUE, scale=TRUE)

Analyses by separating turtles tested in captivity (tanks) and in the wild (Sampling sites)

Here, we want to see if we obtained similar results from those with the complete dataset by separating turtles tested in captivity and in the wild.

Subset of the database to only keep observations made in tanks

df.tank <- df[df$Tank.Site == 'T', ]
table(df.tank$Site)
## 
##        BR1   BR2-2020         C1        CB1        CL2        LR1        RR1 
##         65          2         12         19         33          0          0 
##       RR10 RR2-2-2019   RR2-2020      RR3-1      RR3-2        RR4        RR5 
##          0          0          0          0          0          0          0 
##        RR6        RR7        RR8        RR9        RS1        SA1        UP6 
##          0          0          0          0         45         57          0 
##        WF1 
##         18
n_distinct(df.tank$ID)
## [1] 122

Now, we only have observations from eight sites mainly located in the south section of the canal reducing the variation in human disturbance compared to the entire dataset.

Subset of the database to only keep observations made on sampling sites

df.site <- df[df$Tank.Site == 'S', ]
table(df.site$Site)
## 
##        BR1   BR2-2020         C1        CB1        CL2        LR1        RR1 
##         53         30         28         55         21         51         62 
##       RR10 RR2-2-2019   RR2-2020      RR3-1      RR3-2        RR4        RR5 
##         30         15         50         25         36         32         46 
##        RR6        RR7        RR8        RR9        RS1        SA1        UP6 
##         78         63         37         40         29         20         42 
##        WF1 
##         41
n_distinct(df.site$ID)
## [1] 730

Histograms of the predictors quantifying human disturbance according to the dataset used

graph.vessel <- ggplot() + 
  geom_histogram(aes(x=df$Vessels.activities.2019),fill="black", alpha=0.9) + 
  geom_histogram(aes(x=df.site$Vessels.activities.2019), fill="chartreuse4") + geom_histogram(aes(x=df.tank$Vessels.activities.2019), fill="chartreuse") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Mean daily number of vessel crossings") 
   
graph.channel <- ggplot() + 
  geom_histogram(aes(x=df$Distance.channel),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.site$Distance.channel), fill="chartreuse4") + 
  geom_histogram(aes(x=df.tank$Distance.channel), fill="chartreuse") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Shortest distance to the navigation channel")   

graph.house <- ggplot() + 
  geom_histogram(aes(x=df$House.200),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.site$House.200), fill="chartreuse4") +
  geom_histogram(aes(x=df.tank$House.200), fill="chartreuse") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Number of houses within 200 m") 

graph.total <- plot_grid(graph.channel, graph.vessel, graph.house, ncol = 3, labels = "AUTO")
graph.total

Here, we compare the frequency of the variables quantifying human disturbance: A) the shortest aquatic distance to the navigation channel in meters; B) the mean daily number of vessel crossings, and C) the number of houses with access to the canal within 200 m of the sampling site, according to the dataset used (black: all the observations from the entire dataset, dark green: only the behavioral measurements performed at sampling sites, light green: only behavioral measurements performed in the controlled setting).

We can observed drastic changes in the frequency and diversity of human disturbance level observed between the different databases used.

Sum of active defensive behaviors

Only in tanks

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.active.defenses.vif)
##                                   GVIF df GVIF^(1/(2*df))
## House.200.scaled               10.0773  1          3.1745
## Urban.900.scaled                2.2025  1          1.4841
## Distance.channel.scaled         2.7524  1          1.6590
## Vessels.activities.2019.scaled  6.8818  1          2.6233
## Order.trial.scaled              2.0016  1          1.4148
## Sex                             1.2304  1          1.1092
## Year                            8.3392  1          2.8878
## JulianDay.scaled                4.3675  1          2.0899
## Hour.active.defenses.scaled     3.3399  1          1.8275
## ID.Temperature.scaled           3.9508  1          1.9877
## CL.scaled                       1.1598  1          1.0769
mod.active.defenses.vif <- lm(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.active.defenses.vif)
##                                  GVIF df GVIF^(1/(2*df))
## Urban.900.scaled               1.5731  1          1.2542
## Distance.channel.scaled        2.3267  1          1.5254
## Vessels.activities.2019.scaled 2.5494  1          1.5967
## Order.trial.scaled             1.4001  1          1.1833
## Sex                            1.1650  1          1.0794
## JulianDay.scaled               3.4714  1          1.8632
## Hour.active.defenses.scaled    2.3268  1          1.5254
## ID.Temperature.scaled          3.6080  1          1.8995
## CL.scaled                      1.1560  1          1.0752

Compared to the model with the complete dataset, number of houses with 200m and year were deleted given that the GVIF^(1/(2*df)) were over two. Urban areas within 900m was kept here. (Tank.Site was deleted given that all the tests are made in tanks)

Calculation of Pearson correlation coefficients
# Visualization of the correlations
cor.pearson.active.defenses <- df.tank[, c(23,16,17,6,3,4,12,8)]
chart.Correlation(cor.pearson.active.defenses, histogram=TRUE, pch=19)

We observed a Pearson correlation coefficient over 0.8 between Julian Day and Turtle temperature. Given that we have more missing observations in turtle temperature, We deleted this variable to only keep Julian Day.

Verification of the assumptions with the initial model
mod.ad.full <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, REML = TRUE, na.action=na.exclude)

summary(mod.ad.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ Urban.900.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled +  
##     (1 | Site) + (1 | ID)
##    Data: df.tank
## 
## REML criterion at convergence: 416.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6200 -0.6151 -0.1467  0.5279  2.4626 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.15279  0.3909  
##  Site     (Intercept) 0.08543  0.2923  
##  Residual             0.62227  0.7888  
## Number of obs: 157, groups:  ID, 113; Site, 8
## 
## Fixed effects:
##                                Estimate Std. Error       df t value Pr(>|t|)   
## (Intercept)                     0.93313    1.04563  2.17984   0.892  0.45958   
## Urban.900.scaled                0.24489    1.68026  2.24032   0.146  0.89620   
## Distance.channel.scaled        -0.19265    0.21574  5.83063  -0.893  0.40722   
## Vessels.activities.2019.scaled  0.30490    0.17266  2.56215   1.766  0.19104   
## Order.trial.scaled              0.01229    0.09945 52.01929   0.124  0.90212   
## SexM                            0.52788    0.17167 96.58996   3.075  0.00274 **
## JulianDay.scaled               -0.24101    0.27407  2.67864  -0.879  0.45093   
## Hour.active.defenses.scaled     0.11448    0.12211 35.15761   0.938  0.35488   
## CL.scaled                       0.07369    0.08538 81.58201   0.863  0.39063   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) U.900. Dstn.. V..201 Ordr.. SexM   JlnDy. Hr.c..
## Urbn.900.sc  0.937                                                 
## Dstnc.chnn. -0.567 -0.501                                          
## Vssl..2019.  0.241  0.286 -0.516                                   
## Ordr.trl.sc  0.094 -0.010 -0.102  0.063                            
## SexM        -0.061  0.046 -0.056  0.056 -0.008                     
## JulnDy.scld -0.370 -0.113  0.246 -0.059 -0.159 -0.001              
## Hr.ctv.dfn.  0.042  0.177  0.010  0.261 -0.226 -0.040 -0.052       
## CL.scaled    0.047  0.056 -0.060  0.039  0.026  0.302 -0.089  0.083
plot(mod.ad.full) 

hist(residuals(mod.ad.full)) 

qqnorm(resid(mod.ad.full))

Model selection

Random variables
Creation of the different mixed models
## Null mixed model
mod.ad.null <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|dummy), data = df.tank, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)

## only site identity as random variable
mod.ad.dummy.site <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## only turtle identity as random variable
mod.ad.dummy.ID <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## Turtle identity without the dummy variable
mod.ad.ID <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|ID), data = df.tank,  na.action=na.exclude, REML = FALSE)

## Turtle and site identity without the dummy variable
mod.ad.ID.site <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)

The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.

Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.ad.null, mod.ad.dummy.site)
## Data: df.tank
## Models:
## mod.ad.null: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.site: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy) + (1 | Site)
##                   npar   AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null         11 420.9 454.52 -199.45    398.9                    
## mod.ad.dummy.site   12 422.9 459.58 -199.45    398.9     0  1          1
anova(mod.ad.null, mod.ad.dummy.ID)
## Data: df.tank
## Models:
## mod.ad.null: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.ID: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy) + (1 | ID)
##                 npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.ad.null       11 420.90 454.52 -199.45   398.90                     
## mod.ad.dummy.ID   12 421.83 458.51 -198.92   397.83 1.0681  1     0.3014
anova(mod.ad.ID, mod.ad.ID.site)
## Data: df.tank
## Models:
## mod.ad.ID: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | ID)
## mod.ad.ID.site: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | Site) + (1 | ID)
##                npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.ID        11 419.83 453.45 -198.92   397.83                    
## mod.ad.ID.site   12 421.83 458.51 -198.92   397.83     0  1          1

The test is considered significant when chisq is over 3.84

No random variables need to be kept.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.ad.full <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)
summary(mod.ad.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: active.defenses ~ Urban.900.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled +      (1 | ID)
##    Data: df.tank
## 
##      AIC      BIC   logLik deviance df.resid 
##    419.8    453.5   -198.9    397.8      146 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6253 -0.6351 -0.1908  0.5570  2.6353 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.1306   0.3614  
##  Residual             0.6145   0.7839  
## Number of obs: 157, groups:  ID, 113
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      1.15984    0.57998  98.25710   2.000  0.04828
## Urban.900.scaled                 0.48429    0.94153  99.21310   0.514  0.60814
## Distance.channel.scaled         -0.09417    0.15667 146.43706  -0.601  0.54873
## Vessels.activities.2019.scaled   0.25128    0.10417 152.81309   2.412  0.01704
## Order.trial.scaled               0.06659    0.09313  71.48239   0.715  0.47689
## SexM                             0.50194    0.16591 102.57981   3.025  0.00314
## JulianDay.scaled                -0.19298    0.17077 150.08412  -1.130  0.26026
## Hour.active.defenses.scaled      0.03546    0.09950 148.33901   0.356  0.72207
## CL.scaled                        0.07100    0.08310  85.89838   0.854  0.39528
##                                  
## (Intercept)                    * 
## Urban.900.scaled                 
## Distance.channel.scaled          
## Vessels.activities.2019.scaled * 
## Order.trial.scaled               
## SexM                           **
## JulianDay.scaled                 
## Hour.active.defenses.scaled      
## CL.scaled                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) U.900. Dstn.. V..201 Ordr.. SexM   JlnDy. Hr.c..
## Urbn.900.sc  0.883                                                 
## Dstnc.chnn. -0.515 -0.316                                          
## Vssl..2019.  0.165  0.263 -0.446                                   
## Ordr.trl.sc  0.114 -0.027 -0.241  0.210                            
## SexM        -0.055  0.127 -0.040  0.081  0.005                     
## JulnDy.scld -0.409 -0.061  0.423  0.040 -0.246 -0.018              
## Hr.ctv.dfn.  0.134  0.370  0.060  0.377 -0.089 -0.040  0.029       
## CL.scaled    0.110  0.126 -0.079  0.069  0.029  0.299 -0.146  0.108
final.model
mod.ad.final <- lmer(active.defenses ~ Vessels.activities.2019.scaled + Sex + (1|ID), data = df.tank, na.action=na.exclude, REML = TRUE)
summary(mod.ad.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ Vessels.activities.2019.scaled + Sex + (1 |      ID)
##    Data: df.tank
## 
## REML criterion at convergence: 566.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1023 -0.5995 -0.1612  0.5621  2.6267 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.2087   0.4569  
##  Residual             0.5321   0.7294  
## Number of obs: 225, groups:  ID, 117
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      0.74886    0.12008 114.63114   6.237 7.71e-09
## Vessels.activities.2019.scaled   0.26246    0.06598 103.18791   3.978 0.000129
## SexM                             0.45228    0.14149 107.47018   3.196 0.001827
##                                   
## (Intercept)                    ***
## Vessels.activities.2019.scaled ***
## SexM                           ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) V..201
## Vssl..2019. -0.267       
## SexM        -0.818  0.113
Calculation of the 95% confidence intervals
confint(mod.ad.final, level = 0.95, method = "Wald")
##                                    2.5 %    97.5 %
## .sig01                                NA        NA
## .sigma                                NA        NA
## (Intercept)                    0.5135155 0.9842101
## Vessels.activities.2019.scaled 0.1331450 0.3917788
## SexM                           0.1749553 0.7295997
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.ad.final)
##            R2m       R2c
## [1,] 0.1184081 0.3668126

marginal: only for the fixed effect. conditional: both fixed and random effect.

Only on sampling sites

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.active.defenses.vif <- lm(active.defenses ~ Urban.900.scaled + House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, na.action=na.exclude)
gvif(mod.active.defenses.vif)
##                                  GVIF df GVIF^(1/(2*df))
## Urban.900.scaled               4.0764  1          2.0190
## House.200.scaled               3.2587  1          1.8052
## Distance.channel.scaled        1.6502  1          1.2846
## Vessels.activities.2019.scaled 1.8376  1          1.3556
## Order.trial.scaled             1.2559  1          1.1207
## Sex                            1.2966  1          1.1387
## Year                           1.5506  1          1.2452
## JulianDay.scaled               2.7134  1          1.6472
## Hour.active.defenses.scaled    1.1434  1          1.0693
## ID.Temperature.scaled          2.4248  1          1.5572
## CL.scaled                      1.3136  1          1.1461
mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, na.action=na.exclude)
gvif(mod.active.defenses.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.200.scaled               1.5196  1          1.2327
## Distance.channel.scaled        1.6352  1          1.2787
## Vessels.activities.2019.scaled 1.8142  1          1.3469
## Order.trial.scaled             1.2092  1          1.0996
## Sex                            1.2391  1          1.1131
## Year                           1.5504  1          1.2452
## JulianDay.scaled               2.5473  1          1.5960
## Hour.active.defenses.scaled    1.1420  1          1.0686
## ID.Temperature.scaled          2.4046  1          1.5507
## CL.scaled                      1.3118  1          1.1453

Here, urban areas within 900m was deleted given that the GVIF^(1/(2*df)) were over two as we did with the complete dataset (Tank.Site was deleted given that all the tests were performed at the sampling sites)

Calculation of Pearson correlation coefficients
# Visualization of the correlations
cor.pearson.active.defenses <- df.site[, c(18,16,17,6,3,4,12,8)]
chart.Correlation(cor.pearson.active.defenses, histogram=TRUE, pch=19)

No correlation over 0.8

Verification of the assumptions with the initial model
mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, REML = TRUE, na.action=na.exclude)

summary(mod.ad.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled +  
##     CL.scaled + (1 | Site) + (1 | ID)
##    Data: df.site
## 
## REML criterion at convergence: 1925.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.46216 -0.52423 -0.05754  0.48215  2.83807 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.44217  0.6650  
##  Site     (Intercept) 0.03788  0.1946  
##  Residual             0.32012  0.5658  
## Number of obs: 755, groups:  ID, 619; Site, 21
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      0.81725    0.08787  50.39952   9.301 1.61e-12
## House.200.scaled                -0.13508    0.05843  13.35051  -2.312 0.037336
## Distance.channel.scaled         -0.15796    0.05887  14.09975  -2.683 0.017752
## Vessels.activities.2019.scaled   0.19627    0.07084  11.82436   2.771 0.017150
## Order.trial.scaled               0.01177    0.03559 278.08647   0.331 0.741028
## SexM                             0.27443    0.07949 589.32876   3.452 0.000595
## Year2020                         0.08629    0.08493 415.91301   1.016 0.310215
## JulianDay.scaled                 0.02272    0.07306  38.17390   0.311 0.757483
## Hour.active.defenses.scaled      0.01218    0.03953 475.38650   0.308 0.758194
## ID.Temperature.scaled           -0.03639    0.04956 302.16070  -0.734 0.463311
## CL.scaled                        0.12225    0.04213 586.38619   2.902 0.003850
##                                   
## (Intercept)                    ***
## House.200.scaled               *  
## Distance.channel.scaled        *  
## Vessels.activities.2019.scaled *  
## Order.trial.scaled                
## SexM                           ***
## Year2020                          
## JulianDay.scaled                  
## Hour.active.defenses.scaled       
## ID.Temperature.scaled             
## CL.scaled                      ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.200. Dstn.. V..201 Ordr.. SexM   Yr2020 JlnDy. Hr.c..
## Hs.200.scld  0.073                                                        
## Dstnc.chnn.  0.191  0.107                                                 
## Vssl..2019. -0.080 -0.047 -0.515                                          
## Ordr.trl.sc  0.236 -0.048  0.022  0.052                                   
## SexM        -0.597  0.042 -0.025  0.009 -0.054                            
## Year2020    -0.413 -0.189 -0.219  0.178 -0.172  0.060                     
## JulnDy.scld  0.167  0.232  0.281 -0.278 -0.054  0.068  0.026              
## Hr.ctv.dfn.  0.076  0.066  0.093 -0.121  0.085  0.017  0.140  0.145       
## ID.Tmprtr.s -0.014  0.076 -0.117 -0.049  0.001  0.000 -0.268 -0.503 -0.061
## CL.scaled   -0.281 -0.032 -0.039  0.009 -0.065  0.394  0.081  0.108  0.071
##             ID.Tm.
## Hs.200.scld       
## Dstnc.chnn.       
## Vssl..2019.       
## Ordr.trl.sc       
## SexM              
## Year2020          
## JulnDy.scld       
## Hr.ctv.dfn.       
## ID.Tmprtr.s       
## CL.scaled   -0.052
plot(mod.ad.full) 

hist(residuals(mod.ad.full)) 

qqnorm(resid(mod.ad.full))

Model selection

Random variables
Creation of the different mixed models
## Null mixed model
mod.ad.null <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.site, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)

## only site identity as random variable
mod.ad.dummy.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## only turtle identity as random variable
mod.ad.dummy.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## Turtle identity without the dummy variable
mod.ad.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site,  na.action=na.exclude, REML = FALSE)

## Turtle and site identity without the dummy variable
mod.ad.ID.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.ad.null, mod.ad.dummy.site)
## Data: df.site
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
##                   npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## mod.ad.null         13 1955.7 2015.9 -964.86   1929.7                       
## mod.ad.dummy.site   14 1953.9 2018.7 -962.94   1925.9 3.8399  1    0.05005 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.ad.null, mod.ad.dummy.ID)
## Data: df.site
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
##                 npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## mod.ad.null       13 1955.7 2015.9 -964.86   1929.7                         
## mod.ad.dummy.ID   14 1911.8 1976.6 -941.90   1883.8 45.917  1  1.234e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.ad.ID, mod.ad.ID.site)
## Data: df.site
## Models:
## mod.ad.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.ad.ID.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##                npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## mod.ad.ID        13 1909.8 1969.9 -941.90   1883.8                       
## mod.ad.ID.site   14 1908.5 1973.3 -940.24   1880.5 3.3134  1    0.06872 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As we obtained with the complete dataset, we need to keep sampling site and turtle identities as random variables.

Predictor variables
initial model

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
summary(mod.ad.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled +  
##     CL.scaled + (1 | Site) + (1 | ID)
##    Data: df.site
## 
##      AIC      BIC   logLik deviance df.resid 
##   1908.5   1973.3   -940.2   1880.5      741 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.49139 -0.54075 -0.05307  0.50035  2.85811 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.44019  0.6635  
##  Site     (Intercept) 0.01778  0.1334  
##  Residual             0.31777  0.5637  
## Number of obs: 755, groups:  ID, 619; Site, 21
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      0.81579    0.08070 101.68270  10.109  < 2e-16
## House.200.scaled                -0.13364    0.04898  20.32292  -2.728 0.012827
## Distance.channel.scaled         -0.15209    0.04969  22.76527  -3.061 0.005581
## Vessels.activities.2019.scaled   0.18135    0.05866  17.55168   3.092 0.006435
## Order.trial.scaled               0.01243    0.03513 284.07101   0.354 0.723741
## SexM                             0.27855    0.07876 595.63502   3.537 0.000436
## Year2020                         0.10433    0.08234 356.28703   1.267 0.205963
## JulianDay.scaled                 0.05262    0.06546  49.40021   0.804 0.425358
## Hour.active.defenses.scaled      0.02182    0.03868 479.84232   0.564 0.573047
## ID.Temperature.scaled           -0.03850    0.04767 264.41829  -0.808 0.420016
## CL.scaled                        0.12167    0.04160 588.96080   2.925 0.003581
##                                   
## (Intercept)                    ***
## House.200.scaled               *  
## Distance.channel.scaled        ** 
## Vessels.activities.2019.scaled ** 
## Order.trial.scaled                
## SexM                           ***
## Year2020                          
## JulianDay.scaled                  
## Hour.active.defenses.scaled       
## ID.Temperature.scaled             
## CL.scaled                      ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.200. Dstn.. V..201 Ordr.. SexM   Yr2020 JlnDy. Hr.c..
## Hs.200.scld  0.092                                                        
## Dstnc.chnn.  0.228  0.121                                                 
## Vssl..2019. -0.120 -0.073 -0.514                                          
## Ordr.trl.sc  0.250 -0.061  0.019  0.061                                   
## SexM        -0.645  0.053 -0.028  0.014 -0.056                            
## Year2020    -0.449 -0.215 -0.255  0.228 -0.165  0.066                     
## JulnDy.scld  0.153  0.224  0.293 -0.272 -0.076  0.083  0.021              
## Hr.ctv.dfn.  0.074  0.073  0.103 -0.138  0.081  0.020  0.132  0.127       
## ID.Tmprtr.s -0.008  0.081 -0.125 -0.073  0.012 -0.007 -0.275 -0.546 -0.062
## CL.scaled   -0.298 -0.035 -0.038  0.006 -0.067  0.393  0.084  0.140  0.076
##             ID.Tm.
## Hs.200.scld       
## Dstnc.chnn.       
## Vssl..2019.       
## Ordr.trl.sc       
## SexM              
## Year2020          
## JulnDy.scld       
## Hr.ctv.dfn.       
## ID.Tmprtr.s       
## CL.scaled   -0.080
final model
mod.ad.final <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Sex + CL.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = TRUE)
summary(mod.ad.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Sex + CL.scaled + (1 | Site) +  
##     (1 | ID)
##    Data: df.site
## 
## REML criterion at convergence: 2195.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3777 -0.5191 -0.0760  0.4741  3.2385 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.41587  0.6449  
##  Site     (Intercept) 0.05549  0.2356  
##  Residual             0.33961  0.5828  
## Number of obs: 866, groups:  ID, 714; Site, 22
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      0.85717    0.07674  43.33695  11.170 2.43e-14
## House.200.scaled                -0.14502    0.06002  19.60251  -2.416  0.02558
## Distance.channel.scaled         -0.11579    0.05957  19.01611  -1.944  0.06686
## Vessels.activities.2019.scaled   0.16997    0.06799  16.68831   2.500  0.02318
## SexM                             0.32133    0.07311 694.71790   4.395 1.28e-05
## CL.scaled                        0.12601    0.03915 694.80055   3.219  0.00135
##                                   
## (Intercept)                    ***
## House.200.scaled               *  
## Distance.channel.scaled        .  
## Vessels.activities.2019.scaled *  
## SexM                           ***
## CL.scaled                      ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.200. Dstn.. V..201 SexM  
## Hs.200.scld -0.027                            
## Dstnc.chnn.  0.033  0.028                     
## Vssl..2019.  0.013  0.090 -0.451              
## SexM        -0.612  0.022 -0.023  0.023       
## CL.scaled   -0.267 -0.054 -0.050  0.033  0.373

The predictors are the same as the final model with the complete dataset. However, distance to the navigeation channel is marginaly significant, when we REML = TRUE

Calculation of the 95% confidence intervals
confint(mod.ad.final, level = 0.95, method = "Wald")
##                                      2.5 %        97.5 %
## .sig01                                  NA            NA
## .sig02                                  NA            NA
## .sigma                                  NA            NA
## (Intercept)                     0.70676987  1.0075717832
## House.200.scaled               -0.26265045 -0.0273826641
## Distance.channel.scaled        -0.23253615  0.0009588677
## Vessels.activities.2019.scaled  0.03669861  0.3032321795
## SexM                            0.17804502  0.4646220562
## CL.scaled                       0.04928730  0.2027395520
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.ad.final)
##            R2m       R2c
## [1,] 0.0837322 0.6162881

marginal: only for the fixed effect. conditional: both fixed and random effect.

Escape latency

Only in tanks

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.escape.vif)
##                                   GVIF df GVIF^(1/(2*df))
## House.400.scaled                2.3308  1          1.5267
## Urban.200.scaled                5.0789  1          2.2536
## Distance.channel.scaled        11.8394  1          3.4408
## Vessels.activities.2019.scaled  8.6569  1          2.9423
## Order.trial.scaled              1.1997  1          1.0953
## Sex                             1.1115  1          1.0543
## Year                            9.1332  1          3.0221
## JulianDay.scaled                6.4938  1          2.5483
## Hour.Platform.scaled            2.5549  1          1.5984
## ID.Temperature.scaled           5.9269  1          2.4345
## CL.scaled                       1.1104  1          1.0538
mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled +  Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.escape.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.400.scaled               2.0351  1          1.4266
## Urban.200.scaled               1.0732  1          1.0360
## Vessels.activities.2019.scaled 1.9497  1          1.3963
## Order.trial.scaled             1.1729  1          1.0830
## Sex                            1.1071  1          1.0522
## Year                           2.2731  1          1.5077
## Hour.Platform.scaled           2.0452  1          1.4301
## ID.Temperature.scaled          1.5382  1          1.2402
## CL.scaled                      1.0704  1          1.0346

Compared to the complete dataset, distance to the channel and Julian Day were deleted given that the GVIF^(1/(2*df)) were over two. (Tank.Site and Wind was deleted given that all the tests are made in tanks)

Calculation of Pearson correlation coefficients
# Visualization of the correlations
cor.pearson.escape <- df.tank[, c(20,21,17,6,10,12,8)]
chart.Correlation(cor.pearson.escape, histogram=TRUE, pch=19)

No correlation over 0.8

Verification of the assumptions with the initial model
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, REML = TRUE, na.action=na.exclude)
## boundary (singular) fit: see help('isSingular')
summary(mod.escape.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Year + Hour.Platform.scaled +  
##     ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##    Data: df.tank
## 
## REML criterion at convergence: 648.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.23771 -0.52877 -0.02912  0.49838  2.34029 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.5137   0.7167  
##  Site     (Intercept) 0.0000   0.0000  
##  Residual             0.6923   0.8321  
## Number of obs: 220, groups:  ID, 117; Site, 8
## 
## Fixed effects:
##                                  Estimate Std. Error         df t value
## (Intercept)                      5.879652   0.593778 102.406961   9.902
## House.400.scaled                 0.460594   0.452085 169.702156   1.019
## Urban.200.scaled                 3.236814   1.569999  98.058242   2.062
## Vessels.activities.2019.scaled  -0.121615   0.115378 125.146793  -1.054
## Order.trial.scaled              -0.485148   0.081520 144.248383  -5.951
## SexM                            -0.432503   0.199407 105.078607  -2.169
## Year2020                        -0.200102   0.256568 132.934496  -0.780
## Hour.Platform.scaled            -0.234133   0.125693 191.197817  -1.863
## ID.Temperature.scaled           -0.009594   0.151128 198.195560  -0.063
## CL.scaled                        0.308844   0.105586 115.189939   2.925
##                                Pr(>|t|)    
## (Intercept)                     < 2e-16 ***
## House.400.scaled                0.30974    
## Urban.200.scaled                0.04189 *  
## Vessels.activities.2019.scaled  0.29389    
## Order.trial.scaled             1.93e-08 ***
## SexM                            0.03234 *  
## Year2020                        0.43683    
## Hour.Platform.scaled            0.06403 .  
## ID.Temperature.scaled           0.94944    
## CL.scaled                       0.00415 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.400. U.200. V..201 Ordr.. SexM   Yr2020 Hr.Pl. ID.Tm.
## Hs.400.scld  0.188                                                        
## Urbn.200.sc  0.889 -0.086                                                 
## Vssl..2019.  0.093  0.103  0.057                                          
## Ordr.trl.sc -0.092 -0.184  0.030 -0.088                                   
## SexM        -0.293 -0.016 -0.110  0.096 -0.028                            
## Year2020    -0.286  0.096 -0.148 -0.551  0.125 -0.068                     
## Hr.Pltfrm.s  0.074  0.519  0.079  0.241  0.031 -0.077 -0.058              
## ID.Tmprtr.s  0.086 -0.135  0.053  0.036  0.026  0.057 -0.436  0.058       
## CL.scaled   -0.083  0.011 -0.048 -0.007  0.029  0.251  0.006  0.068  0.104
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
plot(mod.escape.full) 

hist(residuals(mod.escape.full)) 

qqnorm(resid(mod.escape.full))

Model selection

Random variables
Creation of the different mixed models
mod.escape.null <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.tank, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)

## only site identity as random variable
mod.escape.dummy.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## only turtle identity as random variable
mod.escape.dummy.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## Turtle identity without the dummy variable
mod.escape.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank,  na.action=na.exclude, REML = FALSE)

## Turtle and site identity without the dummy variable
mod.escape.ID.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)

The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.

Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.escape.null, mod.escape.dummy.site)
## Data: df.tank
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.escape.dummy.site: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
##                       npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.null         12 674.13 714.85 -325.06   650.13                    
## mod.escape.dummy.site   13 676.13 720.24 -325.06   650.13     0  1          1
anova(mod.escape.null, mod.escape.dummy.ID)
## Data: df.tank
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.escape.dummy.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
##                     npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.escape.null       12 674.13 714.85 -325.06   650.13                     
## mod.escape.dummy.ID   13 657.83 701.95 -315.92   631.83 18.296  1  1.891e-05
##                        
## mod.escape.null        
## mod.escape.dummy.ID ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.escape.ID, mod.escape.ID.site)
## Data: df.tank
## Models:
## mod.escape.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.escape.ID.site: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##                    npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.ID        12 655.83 696.55 -315.92   631.83                    
## mod.escape.ID.site   13 657.83 701.95 -315.92   631.83     0  1          1

The test is considered significant when chisq is over 3.84

Only turtle identity will be kept.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank,  na.action=na.exclude, REML = FALSE)
summary(mod.escape.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: 
## log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Year + Hour.Platform.scaled +  
##     ID.Temperature.scaled + CL.scaled + (1 | ID)
##    Data: df.tank
## 
##      AIC      BIC   logLik deviance df.resid 
##    655.8    696.6   -315.9    631.8      208 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.25364 -0.54334 -0.02882  0.50638  2.35909 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4558   0.6751  
##  Residual             0.6805   0.8249  
## Number of obs: 220, groups:  ID, 117
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      5.87987    0.57180 109.46879  10.283  < 2e-16
## House.400.scaled                 0.45833    0.43873 182.09936   1.045  0.29756
## Urban.200.scaled                 3.23333    1.51095 104.63865   2.140  0.03468
## Vessels.activities.2019.scaled  -0.12243    0.11143 134.60421  -1.099  0.27386
## Order.trial.scaled              -0.48370    0.08043 148.14495  -6.014 1.35e-08
## SexM                            -0.43516    0.19209 111.83199  -2.265  0.02541
## Year2020                        -0.19496    0.24800 141.90108  -0.786  0.43310
## Hour.Platform.scaled            -0.23768    0.12336 199.85420  -1.927  0.05542
## ID.Temperature.scaled           -0.01540    0.14714 207.47927  -0.105  0.91675
## CL.scaled                        0.30741    0.10185 123.12806   3.018  0.00309
##                                   
## (Intercept)                    ***
## House.400.scaled                  
## Urban.200.scaled               *  
## Vessels.activities.2019.scaled    
## Order.trial.scaled             ***
## SexM                           *  
## Year2020                          
## Hour.Platform.scaled           .  
## ID.Temperature.scaled             
## CL.scaled                      ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.400. U.200. V..201 Ordr.. SexM   Yr2020 Hr.Pl. ID.Tm.
## Hs.400.scld  0.189                                                        
## Urbn.200.sc  0.888 -0.084                                                 
## Vssl..2019.  0.095  0.111  0.058                                          
## Ordr.trl.sc -0.095 -0.188  0.030 -0.091                                   
## SexM        -0.294 -0.018 -0.110  0.096 -0.029                            
## Year2020    -0.287  0.094 -0.149 -0.550  0.130 -0.069                     
## Hr.Pltfrm.s  0.076  0.527  0.080  0.247  0.028 -0.079 -0.057              
## ID.Tmprtr.s  0.088 -0.137  0.054  0.035  0.020  0.058 -0.440  0.052       
## CL.scaled   -0.082  0.011 -0.048 -0.007  0.029  0.249  0.006  0.068  0.103
final.model
mod.escape.final <- lmer(log.escape ~ Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Hour.Platform.scaled + CL.scaled + (1|ID), data = df.tank,  na.action=na.exclude, REML = TRUE)
summary(mod.escape.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log.escape ~ Urban.200.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Hour.Platform.scaled + CL.scaled +      (1 | ID)
##    Data: df.tank
## 
## REML criterion at convergence: 708.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.13022 -0.54397 -0.01645  0.51404  2.48086 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4210   0.6488  
##  Residual             0.7858   0.8865  
## Number of obs: 238, groups:  ID, 117
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      5.56921    0.53791 117.44608  10.353  < 2e-16
## Urban.200.scaled                 3.30664    1.50242 117.65409   2.201  0.02970
## Vessels.activities.2019.scaled  -0.18531    0.08859 124.33279  -2.092  0.03849
## Order.trial.scaled              -0.39888    0.07563 173.92799  -5.274 3.92e-07
## SexM                            -0.43131    0.19030 114.12791  -2.266  0.02531
## Hour.Platform.scaled            -0.27239    0.09880 199.22598  -2.757  0.00637
## CL.scaled                        0.29634    0.10133 128.53753   2.924  0.00408
##                                   
## (Intercept)                    ***
## Urban.200.scaled               *  
## Vessels.activities.2019.scaled *  
## Order.trial.scaled             ***
## SexM                           *  
## Hour.Platform.scaled           ** 
## CL.scaled                      ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) U.200. V..201 Ordr.. SexM   Hr.Pl.
## Urbn.200.sc  0.942                                   
## Vssl..2019. -0.136 -0.029                            
## Ordr.trl.sc  0.001  0.009 -0.016                     
## SexM        -0.330 -0.121  0.086 -0.019              
## Hr.Pltfrm.s -0.061  0.117  0.193  0.021 -0.099       
## CL.scaled   -0.081 -0.045  0.022  0.017  0.237  0.059
plot(allEffects(mod.escape.final))

Calculation of the 95% confidence intervals
confint(mod.escape.final, level = 0.95, method = "Wald")
##                                      2.5 %      97.5 %
## .sig01                                  NA          NA
## .sigma                                  NA          NA
## (Intercept)                     4.51491392  6.62350186
## Urban.200.scaled                0.36194199  6.25133389
## Vessels.activities.2019.scaled -0.35893569 -0.01168287
## Order.trial.scaled             -0.54710737 -0.25066060
## SexM                           -0.80429309 -0.05832559
## Hour.Platform.scaled           -0.46602875 -0.07875158
## CL.scaled                       0.09772721  0.49495065
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.escape.final)
##            R2m       R2c
## [1,] 0.2240638 0.4947299

marginal: only for the fixed effect. conditional: both fixed and random effect.

Only on sampling sites

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled, data = df.site, na.action=na.exclude)
gvif(mod.escape.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.400.scaled               2.0022  1          1.4150
## Urban.200.scaled               1.6555  1          1.2867
## Distance.channel.scaled        2.3954  1          1.5477
## Vessels.activities.2019.scaled 2.6112  1          1.6159
## Order.trial.scaled             1.3866  1          1.1775
## JulianDay.scaled               2.3034  1          1.5177
## Sex                            1.1794  1          1.0860
## Year                           1.4838  1          1.2181
## Hour.Platform.scaled           1.2113  1          1.1006
## ID.Temperature.scaled          1.4818  1          1.2173
## CL.scaled                      1.2999  1          1.1401
## Wind.scaled                    1.1720  1          1.0826

All the GVIF are under 2. Tank.Site was deleted given that all the tests were made at the sampling sites.

Calculation of Pearson correlation coefficients
# Visualization of the correlations
cor.pearson.escape <- df.site[, c(20,21,16,17,6,10,12,8,3,11)]
chart.Correlation(cor.pearson.escape, histogram=TRUE, pch=19)

No correlation over 0.8

Verification of the assumptions with the initial model
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.site, REML = TRUE, na.action=na.exclude)

summary(mod.escape.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled +  
##     Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +  
##     CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
##    Data: df.site
## 
## REML criterion at convergence: 1934.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.03424 -0.52458 -0.05845  0.45745  3.07636 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.42063  0.6486  
##  Site     (Intercept) 0.03604  0.1898  
##  Residual             0.70938  0.8422  
## Number of obs: 644, groups:  ID, 543; Site, 18
## 
## Fixed effects:
##                                  Estimate Std. Error         df t value
## (Intercept)                      3.400933   0.119522  66.006726  28.454
## House.400.scaled                 0.002386   0.091342  11.498662   0.026
## Urban.200.scaled                 0.087840   0.084148  16.247817   1.044
## Distance.channel.scaled          0.007919   0.076304  16.342015   0.104
## Vessels.activities.2019.scaled  -0.037176   0.088738  13.699813  -0.419
## Order.trial.scaled              -0.158465   0.049737 504.473734  -3.186
## JulianDay.scaled                 0.176294   0.107227  22.658179   1.644
## SexM                             0.108173   0.098128 535.461380   1.102
## Year2020                        -0.158271   0.115948 147.736816  -1.365
## Hour.Platform.scaled            -0.002209   0.056370 378.302089  -0.039
## ID.Temperature.scaled           -0.187366   0.067248 147.978684  -2.786
## CL.scaled                        0.140614   0.053337 502.692658   2.636
## Wind.scaled                     -0.109935   0.051837 395.937705  -2.121
##                                Pr(>|t|)    
## (Intercept)                     < 2e-16 ***
## House.400.scaled                0.97961    
## Urban.200.scaled                0.31183    
## Distance.channel.scaled         0.91860    
## Vessels.activities.2019.scaled  0.68175    
## Order.trial.scaled              0.00153 ** 
## JulianDay.scaled                0.11396    
## SexM                            0.27080    
## Year2020                        0.17432    
## Hour.Platform.scaled            0.96876    
## ID.Temperature.scaled           0.00603 ** 
## CL.scaled                       0.00864 ** 
## Wind.scaled                     0.03456 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
plot(mod.escape.full) 

hist(residuals(mod.escape.full)) 

qqnorm(resid(mod.escape.full))

Model selection

Random variables
Creation of the different mixed models
mod.escape.null <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy), data = df.site, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)

## only site identity as random variable
mod.escape.dummy.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|Site), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## only turtle identity as random variable
mod.escape.dummy.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|ID), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## Turtle identity without the dummy variable
mod.escape.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.site,  na.action=na.exclude, REML = FALSE)

## Turtle and site identity without the dummy variable
mod.escape.ID.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.escape.null, mod.escape.dummy.site)
## Data: df.site
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | Site)
##                       npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.escape.null         15 1937.2 2004.2 -953.61   1907.2                     
## mod.escape.dummy.site   16 1938.9 2010.4 -953.44   1906.9 0.3241  1     0.5691
anova(mod.escape.null, mod.escape.dummy.ID)
## Data: df.site
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | ID)
##                     npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.escape.null       15 1937.2 2004.2 -953.61   1907.2                     
## mod.escape.dummy.ID   16 1918.8 1990.3 -943.39   1886.8 20.439  1  6.155e-06
##                        
## mod.escape.null        
## mod.escape.dummy.ID ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.escape.ID, mod.escape.ID.site)
## Data: df.site
## Models:
## mod.escape.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | ID)
## mod.escape.ID.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
##                    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.escape.ID        15 1916.8 1983.8 -943.39   1886.8                     
## mod.escape.ID.site   16 1918.3 1989.8 -943.16   1886.3 0.4443  1      0.505

Sampling sites identity is not significant. We do not need to keep it.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.site,  na.action=na.exclude, REML = FALSE)
summary(mod.escape.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: 
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled +  
##     Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +  
##     CL.scaled + Wind.scaled + (1 | ID)
##    Data: df.site
## 
##      AIC      BIC   logLik deviance df.resid 
##   1916.8   1983.8   -943.4   1886.8      629 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.07632 -0.54111 -0.07514  0.47475  3.10382 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4110   0.6411  
##  Residual             0.7143   0.8451  
## Number of obs: 644, groups:  ID, 543
## 
## Fixed effects:
##                                  Estimate Std. Error         df t value
## (Intercept)                      3.417888   0.103880 609.245182  32.902
## House.400.scaled                -0.002958   0.063208 513.669111  -0.047
## Urban.200.scaled                 0.070422   0.062404 353.897227   1.128
## Distance.channel.scaled          0.004135   0.056676 575.817746   0.073
## Vessels.activities.2019.scaled  -0.035589   0.063228 592.476520  -0.563
## Order.trial.scaled              -0.164068   0.048554 553.003321  -3.379
## JulianDay.scaled                 0.150289   0.081617 611.265644   1.841
## SexM                             0.116433   0.096102 550.984282   1.212
## Year2020                        -0.181213   0.103722 641.340659  -1.747
## Hour.Platform.scaled             0.006046   0.053214 615.188829   0.114
## ID.Temperature.scaled           -0.215403   0.059974 641.873134  -3.592
## CL.scaled                        0.149622   0.051533 571.436808   2.903
## Wind.scaled                     -0.139766   0.049118 580.221881  -2.845
##                                Pr(>|t|)    
## (Intercept)                     < 2e-16 ***
## House.400.scaled               0.962696    
## Urban.200.scaled               0.259882    
## Distance.channel.scaled        0.941866    
## Vessels.activities.2019.scaled 0.573735    
## Order.trial.scaled             0.000778 ***
## JulianDay.scaled               0.066046 .  
## SexM                           0.226205    
## Year2020                       0.081099 .  
## Hour.Platform.scaled           0.909576    
## ID.Temperature.scaled          0.000354 ***
## CL.scaled                      0.003834 ** 
## Wind.scaled                    0.004591 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
final model
mod.escape.final <- lmer(log.escape ~ Order.trial.scaled + Year + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.site,  na.action=na.exclude, REML = TRUE)
summary(mod.escape.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log.escape ~ Order.trial.scaled + Year + ID.Temperature.scaled +  
##     CL.scaled + Wind.scaled + (1 | ID)
##    Data: df.site
## 
## REML criterion at convergence: 2058.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.13146 -0.52103 -0.06498  0.48825  2.97272 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4024   0.6344  
##  Residual             0.7467   0.8641  
## Number of obs: 691, groups:  ID, 580
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)             3.53021    0.06775 639.52682  52.109  < 2e-16 ***
## Order.trial.scaled     -0.13070    0.03957 656.39902  -3.303  0.00101 ** 
## Year2020               -0.23873    0.08632 678.62445  -2.766  0.00583 ** 
## ID.Temperature.scaled  -0.19515    0.04956 683.11625  -3.938 9.06e-05 ***
## CL.scaled               0.10017    0.04249 594.66475   2.358  0.01872 *  
## Wind.scaled            -0.13891    0.04573 660.63698  -3.037  0.00248 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Ordr.. Yr2020 ID.Tm. CL.scl
## Ordr.trl.sc  0.208                            
## Year2020    -0.662 -0.217                     
## ID.Tmprtr.s -0.265  0.022 -0.044              
## CL.scaled   -0.104 -0.012  0.063  0.096       
## Wind.scaled -0.191 -0.067 -0.157  0.116  0.024

As the final model with the complete dataset. No variables quantifying human disturbance are significant.

Calculation of the 95% confidence intervals
confint(mod.escape.final, level = 0.95, method = "Wald")
##                             2.5 %      97.5 %
## .sig01                         NA          NA
## .sigma                         NA          NA
## (Intercept)            3.39743466  3.66299504
## Order.trial.scaled    -0.20824636 -0.05314408
## Year2020              -0.40790409 -0.06955282
## ID.Temperature.scaled -0.29227447 -0.09801996
## CL.scaled              0.01689495  0.18344015
## Wind.scaled           -0.22855108 -0.04927326
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.escape.final)
##             R2m      R2c
## [1,] 0.08020707 0.402319

marginal: only for the fixed effect. conditional: both fixed and random effect.

Emergence of the head after escaping

Only in tanks

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.emergence.vif <- glm(emergence ~ House.300.scaled + Urban.600.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
##                                   GVIF df GVIF^(1/(2*df))
## House.300.scaled               18.2975  1          4.2776
## Urban.600.scaled               25.9356  1          5.0927
## Distance.channel.scaled        11.3962  1          3.3758
## Vessels.activities.2019.scaled 11.1929  1          3.3456
## Order.trial.scaled              1.1148  1          1.0558
## Sex                             1.1244  1          1.0604
## Year                           10.8123  1          3.2882
## JulianDay.scaled               10.2275  1          3.1980
## Hour.Platform.scaled            2.6746  1          1.6354
## ID.Temperature.scaled           6.0944  1          2.4687
## CL.scaled                       1.1335  1          1.0647
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
##                           GVIF df GVIF^(1/(2*df))
## House.300.scaled        1.5662  1          1.2515
## Distance.channel.scaled 1.4032  1          1.1846
## Order.trial.scaled      1.1285  1          1.0623
## Sex                     1.0813  1          1.0399
## Year                    1.8545  1          1.3618
## Hour.Platform.scaled    1.6671  1          1.2912
## ID.Temperature.scaled   2.1969  1          1.4822
## CL.scaled               1.0780  1          1.0383

Urban areas within 600m, Vessels crossings and Julian Day were deleted given that the GVIF^(1/(2*df)) was over two. Tank.Site was deleted given all the tests were performed in the tanks.

Calculation of Pearson correlation coefficients
# Visualization of the correlations
cor.pearson.emergence <- df.tank[, c(19,6,10,12,8,16)]
chart.Correlation(cor.pearson.emergence, histogram=TRUE, pch=19)

No correlation over 0.8

Verification of the assumptions with the initial model
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, na.action=na.exclude)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 1.7172 (tol = 0.002, component 1)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled +  
##     Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +  
##     CL.scaled + (1 | ID)
##    Data: df.tank
## 
##      AIC      BIC   logLik deviance df.resid 
##    202.3    235.7    -91.2    182.3      197 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.37486 -0.29479 -0.17205 -0.08341  2.56695 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 4.727    2.174   
## Number of obs: 207, groups:  ID, 114
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)   
## (Intercept)              -4.4970     1.6286  -2.761  0.00576 **
## House.300.scaled          1.2822     1.6497   0.777  0.43700   
## Distance.channel.scaled  -0.3232     0.5369  -0.602  0.54719   
## Order.trial.scaled       -0.4320     0.3516  -1.229  0.21920   
## SexM                      0.7879     0.7763   1.015  0.31017   
## Year2020                  2.1911     0.9759   2.245  0.02475 * 
## Hour.Platform.scaled      0.5739     0.4885   1.175  0.24013   
## ID.Temperature.scaled    -1.0509     0.6774  -1.551  0.12081   
## CL.scaled                 0.0717     0.4031   0.178  0.85882   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.300. Dstn.. Ordr.. SexM   Yr2020 Hr.Pl. ID.Tm.
## Hs.300.scld  0.208                                                 
## Dstnc.chnn. -0.122 -0.101                                          
## Ordr.trl.sc  0.126 -0.202 -0.030                                   
## SexM        -0.396 -0.065 -0.012 -0.054                            
## Year2020    -0.689  0.169 -0.014 -0.097  0.062                     
## Hr.Pltfrm.s -0.290  0.436  0.016  0.063 -0.094  0.298              
## ID.Tmprtr.s  0.446 -0.231 -0.330  0.215 -0.020 -0.581 -0.133       
## CL.scaled    0.026 -0.046 -0.037  0.078  0.230 -0.071  0.083  0.168
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 1.7172 (tol = 0.002, component 1)

The model do not converge. We will determine each optimizer we need to use to make the model converge.

gm_all <- allFit(mod.emergence.full)
## bobyqa : [OK]
## Nelder_Mead :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 1.71524 (tol = 0.002, component 1)
## [OK]
## nlminbwrap : [OK]
## nmkbw :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0771633 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00705438 (tol = 0.002, component 1)
## [OK]
t(sapply(gm_all,fixef)) 
##                               (Intercept) House.300.scaled
## bobyqa                         -10.131803         2.607682
## Nelder_Mead                     -4.497529         1.279237
## nlminbwrap                     -10.131797         2.607676
## nmkbw                          -10.312362         2.569589
## optimx.L-BFGS-B                -10.131834         2.607835
## nloptwrap.NLOPT_LN_NELDERMEAD  -10.130717         2.608733
## nloptwrap.NLOPT_LN_BOBYQA      -10.127401         2.614272
##                               Distance.channel.scaled Order.trial.scaled
## bobyqa                                     0.04567428         -0.6127357
## Nelder_Mead                               -0.32304131         -0.4323083
## nlminbwrap                                 0.04567145         -0.6127291
## nmkbw                                      0.08144834         -0.6215233
## optimx.L-BFGS-B                            0.04568636         -0.6126979
## nloptwrap.NLOPT_LN_NELDERMEAD              0.04556557         -0.6127748
## nloptwrap.NLOPT_LN_BOBYQA                  0.04319892         -0.6120725
##                                    SexM Year2020 Hour.Platform.scaled
## bobyqa                        0.4571711 3.118528            1.2261513
## Nelder_Mead                   0.7868883 2.190561            0.5743365
## nlminbwrap                    0.4571552 3.118536            1.2261509
## nmkbw                         0.4048173 3.116476            1.2503968
## optimx.L-BFGS-B               0.4569680 3.118884            1.2262064
## nloptwrap.NLOPT_LN_NELDERMEAD 0.4565541 3.118425            1.2262440
## nloptwrap.NLOPT_LN_BOBYQA     0.4583994 3.118709            1.2266148
##                               ID.Temperature.scaled    CL.scaled
## bobyqa                                    -1.985905 -0.008885313
## Nelder_Mead                               -1.049771  0.072620773
## nlminbwrap                                -1.985914 -0.008883853
## nmkbw                                     -2.026999 -0.036137320
## optimx.L-BFGS-B                           -1.985988 -0.008909886
## nloptwrap.NLOPT_LN_NELDERMEAD             -1.985703 -0.009157880
## nloptwrap.NLOPT_LN_BOBYQA                 -1.984661 -0.004728427
sapply(gm_all,logLik) 
##                        bobyqa                   Nelder_Mead 
##                     -88.02733                     -91.16247 
##                    nlminbwrap                         nmkbw 
##                     -88.02733                     -88.02514 
##               optimx.L-BFGS-B nloptwrap.NLOPT_LN_NELDERMEAD 
##                     -88.02733                     -88.02733 
##     nloptwrap.NLOPT_LN_BOBYQA 
##                     -88.02735
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
plot(mod.emergence.full) 

hist(residuals(mod.emergence.full)) 

qqnorm(resid(mod.emergence.full))

Model selection

Random variables
Creation of the different mixed models
## Null mixed model
mod.emergence.null <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.tank, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## only site identity as random variable
mod.emergence.dummy.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.tank, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## only turtle identity as random variable
mod.emergence.dummy.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.tank, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## Turtle identity without the dummy variable
mod.emergence.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)

## Turtle and site identity without the dummy variable
mod.emergence.ID.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.emergence.null, mod.emergence.dummy.site)
## Data: df.tank
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.site: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
##                          npar    AIC    BIC  logLik deviance Chisq Df
## mod.emergence.null         10 215.92 249.25 -97.961   195.92         
## mod.emergence.dummy.site   11 217.92 254.58 -97.961   195.92     0  1
##                          Pr(>Chisq)
## mod.emergence.null                 
## mod.emergence.dummy.site          1
anova(mod.emergence.null, mod.emergence.dummy.ID)
## Data: df.tank
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
##                        npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.emergence.null       10 215.92 249.25 -97.961   195.92                     
## mod.emergence.dummy.ID   11 203.59 240.25 -90.795   181.59 14.331  1  0.0001533
##                           
## mod.emergence.null        
## mod.emergence.dummy.ID ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.ID, mod.emergence.ID.site)
## Data: df.tank
## Models:
## mod.emergence.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.emergence.ID.site: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##                       npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.emergence.ID        10 196.06 229.38 -88.027   176.06                    
## mod.emergence.ID.site   11 198.06 234.72 -88.027   176.06     0  1          1

Sampling sites identity is not significant. We do not need to keep it. Also, we have convergence problems with site in addition to turtle identity even with the use of an optimizer.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled +  
##     Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +  
##     CL.scaled + (1 | ID)
##    Data: df.tank
## Control: 
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    196.1    229.4    -88.0    176.1      197 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.25990 -0.03546 -0.01491 -0.00501  2.75620 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 93.65    9.678   
## Number of obs: 207, groups:  ID, 114
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)   
## (Intercept)             -10.13183    3.13622  -3.231  0.00124 **
## House.300.scaled          2.60784    3.32274   0.785  0.43254   
## Distance.channel.scaled   0.04569    0.92617   0.049  0.96066   
## Order.trial.scaled       -0.61270    0.48392  -1.266  0.20547   
## SexM                      0.45697    1.58836   0.288  0.77358   
## Year2020                  3.11888    1.89991   1.642  0.10067   
## Hour.Platform.scaled      1.22621    0.80198   1.529  0.12627   
## ID.Temperature.scaled    -1.98599    1.02362  -1.940  0.05236 . 
## CL.scaled                -0.00891    0.84521  -0.011  0.99159   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.300. Dstn.. Ordr.. SexM   Yr2020 Hr.Pl. ID.Tm.
## Hs.300.scld  0.428                                                 
## Dstnc.chnn. -0.177 -0.066                                          
## Ordr.trl.sc -0.018 -0.100 -0.054                                   
## SexM        -0.276  0.038  0.007  0.008                            
## Year2020    -0.624 -0.069 -0.058  0.049  0.026                     
## Hr.Pltfrm.s -0.434  0.230  0.038  0.122 -0.064  0.343              
## ID.Tmprtr.s  0.272 -0.152 -0.260  0.174 -0.006 -0.433 -0.060       
## CL.scaled   -0.052 -0.031 -0.037  0.028  0.223  0.048  0.032  0.114
final model
mod.emergence.final <- glmer(emergence ~  + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.final)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: emergence ~ +(1 | ID)
##    Data: df.tank
## Control: 
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    197.9    204.7    -97.0    193.9      217 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.33880 -0.02252 -0.02196 -0.02147  1.48242 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 109.1    10.45   
## Number of obs: 219, groups:  ID, 119
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -7.532      1.154  -6.525 6.79e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

No variable is significant. The final model is a null model.

Calculation of the 95% confidence intervals
confint(mod.emergence.final, level = 0.95, method = "Wald")
##                 2.5 %    97.5 %
## .sig01             NA        NA
## (Intercept) -9.794052 -5.269458
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.emergence.final)
##             R2m          R2c
## theoretical   0 9.707410e-01
## delta         0 2.423606e-14

marginal: only for the fixed effect. conditional: both fixed and random effect. The delta method can be used with all distributions and link functions.

Only on sampling sites

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.emergence.vif <- glm(emergence ~ House.300.scaled + Urban.600.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.300.scaled               3.4151  1          1.8480
## Urban.600.scaled               4.4203  1          2.1025
## Distance.channel.scaled        1.5106  1          1.2291
## Vessels.activities.2019.scaled 1.6971  1          1.3027
## Order.trial.scaled             1.3182  1          1.1481
## Sex                            1.2127  1          1.1012
## Year                           1.4330  1          1.1971
## JulianDay.scaled               2.2220  1          1.4906
## Hour.Platform.scaled           1.0899  1          1.0440
## ID.Temperature.scaled          1.8619  1          1.3645
## CL.scaled                      1.2265  1          1.1075
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.300.scaled               1.3813  1          1.1753
## Distance.channel.scaled        1.5128  1          1.2300
## Vessels.activities.2019.scaled 1.7017  1          1.3045
## Order.trial.scaled             1.2552  1          1.1204
## Sex                            1.1767  1          1.0848
## Year                           1.4012  1          1.1837
## JulianDay.scaled               2.0201  1          1.4213
## Hour.Platform.scaled           1.0880  1          1.0431
## ID.Temperature.scaled          1.8255  1          1.3511
## CL.scaled                      1.2126  1          1.1012

Urban areas within 600m was deleted given that the GVIF^(1/(2*df)) was over two. Tank.Site was deleted given that all the observations were performed at the sampling sites.

Calculation of Pearson correlation coefficients
# Visualization of the correlations
cor.pearson.emergence <- df.site[, c(19,6,3,10,12,8,16,17)]
chart.Correlation(cor.pearson.emergence, histogram=TRUE, pch=19)

No correlation over 0.8

Verification of the assumptions with the initial model
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.site, family = binomial, na.action=na.exclude)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0346845 (tol = 0.002, component 1)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled +  
##     ID.Temperature.scaled + CL.scaled + (1 | ID) + (1 | Site)
##    Data: df.site
## 
##      AIC      BIC   logLik deviance df.resid 
##    918.0    978.8   -446.0    892.0      784 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6742 -0.5640 -0.3769  0.7739  3.8883 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 0.70974  0.8425  
##  Site   (Intercept) 0.08752  0.2958  
## Number of obs: 797, groups:  ID, 664; Site, 21
## 
## Fixed effects:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -0.658972   0.232527  -2.834 0.004598 ** 
## House.300.scaled               -0.201521   0.135152  -1.491 0.135943    
## Distance.channel.scaled        -0.169129   0.145818  -1.160 0.246103    
## Vessels.activities.2019.scaled  0.183161   0.141140   1.298 0.194380    
## Order.trial.scaled              0.374200   0.107775   3.472 0.000517 ***
## SexM                           -0.210585   0.215950  -0.975 0.329483    
## Year2020                       -0.051072   0.236524  -0.216 0.829043    
## JulianDay.scaled                0.471121   0.173768   2.711 0.006704 ** 
## Hour.Platform.scaled           -0.003869   0.120376  -0.032 0.974358    
## ID.Temperature.scaled           0.102556   0.134758   0.761 0.446631    
## CL.scaled                      -0.325814   0.119957  -2.716 0.006606 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.300. Dstn.. V..201 Ordr.. SexM   Yr2020 JlnDy. Hr.Pl.
## Hs.300.scld  0.054                                                        
## Dstnc.chnn.  0.272  0.199                                                 
## Vssl..2019. -0.223 -0.161 -0.500                                          
## Ordr.trl.sc  0.155 -0.154 -0.025  0.038                                   
## SexM        -0.603  0.070 -0.020 -0.001 -0.086                            
## Year2020    -0.533 -0.114 -0.269  0.265 -0.215  0.052                     
## JulnDy.scld  0.096  0.296  0.281 -0.342 -0.066  0.054 -0.076              
## Hr.Pltfrm.s  0.200 -0.052  0.045 -0.069  0.222  0.011 -0.039  0.096       
## ID.Tmprtr.s -0.033  0.136 -0.113 -0.050  0.041  0.012 -0.231 -0.436 -0.101
## CL.scaled   -0.163 -0.016 -0.022 -0.017 -0.164  0.363  0.031  0.040  0.039
##             ID.Tm.
## Hs.300.scld       
## Dstnc.chnn.       
## Vssl..2019.       
## Ordr.trl.sc       
## SexM              
## Year2020          
## JulnDy.scld       
## Hr.Pltfrm.s       
## ID.Tmprtr.s       
## CL.scaled   -0.060
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0346845 (tol = 0.002, component 1)
Testing for the best optimizer
gm_all <- allFit(mod.emergence.full)
## bobyqa : [OK]
## Nelder_Mead :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0308026 (tol = 0.002, component 1)
## [OK]
## nlminbwrap : [OK]
## nmkbw : [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00263954 (tol = 0.002, component 1)
## [OK]
t(sapply(gm_all,fixef)) 
##                               (Intercept) House.300.scaled
## bobyqa                         -0.6565887       -0.2018033
## Nelder_Mead                    -0.6582006       -0.2015575
## nlminbwrap                     -0.6565884       -0.2018023
## nmkbw                          -0.6566706       -0.2018181
## optimx.L-BFGS-B                -0.6565343       -0.2017890
## nloptwrap.NLOPT_LN_NELDERMEAD  -0.6565220       -0.2017892
## nloptwrap.NLOPT_LN_BOBYQA      -0.6567184       -0.2017706
##                               Distance.channel.scaled
## bobyqa                                     -0.1681996
## Nelder_Mead                                -0.1691471
## nlminbwrap                                 -0.1681997
## nmkbw                                      -0.1681419
## optimx.L-BFGS-B                            -0.1681665
## nloptwrap.NLOPT_LN_NELDERMEAD              -0.1681539
## nloptwrap.NLOPT_LN_BOBYQA                  -0.1681984
##                               Vessels.activities.2019.scaled Order.trial.scaled
## bobyqa                                             0.1816207          0.3738560
## Nelder_Mead                                        0.1830299          0.3738660
## nlminbwrap                                         0.1816225          0.3738563
## nmkbw                                              0.1815766          0.3738302
## optimx.L-BFGS-B                                    0.1815875          0.3738489
## nloptwrap.NLOPT_LN_NELDERMEAD                      0.1815746          0.3738593
## nloptwrap.NLOPT_LN_BOBYQA                          0.1816178          0.3738183
##                                     SexM    Year2020 JulianDay.scaled
## bobyqa                        -0.2104532 -0.05595727        0.4749174
## Nelder_Mead                   -0.2110695 -0.05193873        0.4711612
## nlminbwrap                    -0.2104545 -0.05595727        0.4749210
## nmkbw                         -0.2103401 -0.05595822        0.4748682
## optimx.L-BFGS-B               -0.2104473 -0.05594978        0.4749722
## nloptwrap.NLOPT_LN_NELDERMEAD -0.2104210 -0.05603676        0.4749007
## nloptwrap.NLOPT_LN_BOBYQA     -0.2104047 -0.05598458        0.4748766
##                               Hour.Platform.scaled ID.Temperature.scaled
## bobyqa                                -0.004316513             0.1005477
## Nelder_Mead                           -0.004215164             0.1027812
## nlminbwrap                            -0.004315633             0.1005480
## nmkbw                                 -0.004260918             0.1006100
## optimx.L-BFGS-B                       -0.004297641             0.1005027
## nloptwrap.NLOPT_LN_NELDERMEAD         -0.004206986             0.1005645
## nloptwrap.NLOPT_LN_BOBYQA             -0.004546246             0.1006092
##                                CL.scaled
## bobyqa                        -0.3250463
## Nelder_Mead                   -0.3261493
## nlminbwrap                    -0.3250481
## nmkbw                         -0.3250179
## optimx.L-BFGS-B               -0.3250286
## nloptwrap.NLOPT_LN_NELDERMEAD -0.3250195
## nloptwrap.NLOPT_LN_BOBYQA     -0.3250503
sapply(gm_all,logLik) 
##                        bobyqa                   Nelder_Mead 
##                     -445.9923                     -445.9930 
##                    nlminbwrap                         nmkbw 
##                     -445.9923                     -445.9923 
##               optimx.L-BFGS-B nloptwrap.NLOPT_LN_NELDERMEAD 
##                     -445.9923                     -445.9923 
##     nloptwrap.NLOPT_LN_BOBYQA 
##                     -445.9923

We decided to use optimx.L-BFGS-B optimizer to include in our initial model.

mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
plot(mod.emergence.full) 

hist(residuals(mod.emergence.full)) 

qqnorm(resid(mod.emergence.full))

Model selection

Random variables
Creation of the different mixed models
## Null mixed model
mod.emergence.null <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.site, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## only site identity as random variable
mod.emergence.dummy.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.site, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## only turtle identity as random variable
mod.emergence.dummy.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.site, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## Turtle identity without the dummy variable
mod.emergence.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)

## Turtle and site identity without the dummy variable
mod.emergence.ID.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.emergence.null, mod.emergence.dummy.site)
## Data: df.site
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
##                          npar    AIC    BIC  logLik deviance  Chisq Df
## mod.emergence.null         12 925.52 981.69 -450.76   901.52          
## mod.emergence.dummy.site   13 923.55 984.41 -448.78   897.55 3.9684  1
##                          Pr(>Chisq)  
## mod.emergence.null                   
## mod.emergence.dummy.site    0.04636 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.null, mod.emergence.dummy.ID)
## Data: df.site
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
##                        npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.emergence.null       12 925.52 981.69 -450.76   901.52                     
## mod.emergence.dummy.ID   13 919.43 980.28 -446.71   893.43 8.0941  1   0.004441
##                          
## mod.emergence.null       
## mod.emergence.dummy.ID **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.ID, mod.emergence.ID.site)
## Data: df.site
## Models:
## mod.emergence.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.emergence.ID.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##                       npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.emergence.ID        12 917.18 973.35 -446.59   893.18                     
## mod.emergence.ID.site   13 917.98 978.84 -445.99   891.98 1.1975  1     0.2738

Sampling sites identity is not significant and it is added to turtle identity. We do not need to keep it.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled +  
##     ID.Temperature.scaled + CL.scaled + (1 | ID)
##    Data: df.site
## Control: 
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    917.2    973.4   -446.6    893.2      785 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8174 -0.5437 -0.3671  0.7573  4.1442 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 0.9313   0.965   
## Number of obs: 797, groups:  ID, 664
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -0.68198    0.22651  -3.011 0.002605 ** 
## House.300.scaled               -0.20668    0.11946  -1.730 0.083613 .  
## Distance.channel.scaled        -0.15655    0.12775  -1.225 0.220398    
## Vessels.activities.2019.scaled  0.18662    0.11960   1.560 0.118686    
## Order.trial.scaled              0.39987    0.10724   3.729 0.000193 ***
## SexM                           -0.21632    0.21945  -0.986 0.324255    
## Year2020                       -0.01116    0.22710  -0.049 0.960805    
## JulianDay.scaled                0.48296    0.15748   3.067 0.002163 ** 
## Hour.Platform.scaled            0.01518    0.11627   0.131 0.896131    
## ID.Temperature.scaled           0.10227    0.12733   0.803 0.421894    
## CL.scaled                      -0.36329    0.11896  -3.054 0.002260 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.300. Dstn.. V..201 Ordr.. SexM   Yr2020 JlnDy. Hr.Pl.
## Hs.300.scld  0.050                                                        
## Dstnc.chnn.  0.299  0.197                                                 
## Vssl..2019. -0.276 -0.183 -0.511                                          
## Ordr.trl.sc  0.152 -0.180 -0.065  0.054                                   
## SexM        -0.627  0.095 -0.013  0.000 -0.080                            
## Year2020    -0.553 -0.116 -0.320  0.336 -0.248  0.063                     
## JulnDy.scld  0.044  0.267  0.272 -0.299 -0.088  0.071 -0.081              
## Hr.Pltfrm.s  0.196 -0.081  0.035 -0.062  0.215  0.014 -0.048  0.053       
## ID.Tmprtr.s -0.043  0.138 -0.123 -0.084  0.055  0.005 -0.217 -0.458 -0.106
## CL.scaled   -0.149 -0.017  0.019 -0.039 -0.142  0.362  0.060  0.063  0.074
##             ID.Tm.
## Hs.300.scld       
## Dstnc.chnn.       
## Vssl..2019.       
## Ordr.trl.sc       
## SexM              
## Year2020          
## JulnDy.scld       
## Hr.Pltfrm.s       
## ID.Tmprtr.s       
## CL.scaled   -0.087
final model
mod.emergence.final <- glmer(emergence ~ Order.trial.scaled + JulianDay.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.final)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ Order.trial.scaled + JulianDay.scaled + ID.Temperature.scaled +  
##     CL.scaled + (1 | ID)
##    Data: df.site
## Control: 
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    980.4   1008.8   -484.2    968.4      838 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5253 -0.5548 -0.3726  0.7717  3.4169 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 1.061    1.03    
## Number of obs: 844, groups:  ID, 701
## 
## Fixed effects:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.86585    0.12402  -6.982 2.92e-12 ***
## Order.trial.scaled     0.22926    0.09111   2.516  0.01187 *  
## JulianDay.scaled       0.54278    0.13612   3.988 6.68e-05 ***
## ID.Temperature.scaled  0.25691    0.11325   2.268  0.02330 *  
## CL.scaled             -0.27323    0.10183  -2.683  0.00729 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Ordr.. JlnDy. ID.Tm.
## Ordr.trl.sc -0.100                     
## JulnDy.scld -0.056 -0.170              
## ID.Tmprtr.s -0.281  0.112 -0.471       
## CL.scaled    0.194 -0.104  0.054 -0.091
Calculation of the 95% confidence intervals
confint(mod.emergence.final, level = 0.95, method = "Wald")
##                             2.5 %      97.5 %
## .sig01                         NA          NA
## (Intercept)           -1.10892321 -0.62278386
## Order.trial.scaled     0.05067511  0.40783862
## JulianDay.scaled       0.27599121  0.80957608
## ID.Temperature.scaled  0.03494129  0.47887987
## CL.scaled             -0.47280808 -0.07364491
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.emergence.final)
##                   R2m       R2c
## theoretical 0.1479719 0.3557077
## delta       0.1179110 0.2834449

marginal: only for the fixed effect. conditional: both fixed and random effect. The delta method can be used with all distributions and link functions.

Analyses with only turtles tested more than once

Creation of the databases

Deletion of turtles with only one observation.

df.double <- df[df$ID %in% df$ID[duplicated(df$ID)],]
n_distinct(df.double$ID)
## [1] 221

We have 221 turtles tested more than once. By only keeping the turtles tested more than once, we are passing from 1135 observations to 626 observations. Not all entries have an observations for the three behaviors at the same time. Thus, we need to create a dataset for each behavior by deleting the missing observations for each of them.

#Sum of active defensive behaviors
df.double.ad <- df.double[complete.cases(df.double[ , 9]),]
df.double.ad <- df.double.ad[df.double.ad$ID %in% df.double.ad$ID[duplicated(df.double.ad$ID)],]
n_distinct(df.double.ad$ID)
## [1] 221
#Escape latency
df.double.escape <- df.double[complete.cases(df.double[ , 13]),]
df.double.escape <- df.double.escape[df.double.escape$ID %in% df.double.escape$ID[duplicated(df.double.escape$ID)],]
n_distinct(df.double.escape$ID)
## [1] 218
#Emergence of the turtle after escaping
df.double.emergence <- df.double[complete.cases(df.double[ , 14]),]
df.double.emergence <- df.double.emergence[df.double.emergence$ID %in% df.double.emergence$ID[duplicated(df.double.emergence$ID)],]
n_distinct(df.double.emergence$ID)
## [1] 215
table(df.double.escape$Site)
## 
##        BR1   BR2-2020         C1        CB1        CL2        LR1        RR1 
##         95         13         23         35         50          8         50 
##       RR10 RR2-2-2019   RR2-2020      RR3-1      RR3-2        RR4        RR5 
##          4         11         34         14         26          0          6 
##        RR6        RR7        RR8        RR9        RS1        SA1        UP6 
##         27         15          5         12         67         77          4 
##        WF1 
##         37
table(df.double.emergence$Site)
## 
##        BR1   BR2-2020         C1        CB1        CL2        LR1        RR1 
##         93         13         22         33         50          8         48 
##       RR10 RR2-2-2019   RR2-2020      RR3-1      RR3-2        RR4        RR5 
##          4         11         34         14         25          0          6 
##        RR6        RR7        RR8        RR9        RS1        SA1        UP6 
##         29         15          5         12         64         56          4 
##        WF1 
##         36
table(df.double.ad$Site)
## 
##        BR1   BR2-2020         C1        CB1        CL2        LR1        RR1 
##         95         13         23         40         50          8         53 
##       RR10 RR2-2-2019   RR2-2020      RR3-1      RR3-2        RR4        RR5 
##          4         11         34         14         26          0          6 
##        RR6        RR7        RR8        RR9        RS1        SA1        UP6 
##         29         17          5         12         68         59          4 
##        WF1 
##         37

All the sampling sites from the complete datset have turtles tested more than once, except RR4.

Histograms of the predictors quantifying human disturbance according to the dataset used

graph.vessel <- ggplot() + 
  geom_histogram(aes(x=df$Vessels.activities.2019),fill="black", alpha=0.9) + 
  geom_histogram(aes(x=df.double$Vessels.activities.2019), fill="chartreuse4") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Mean daily number of vessel crossings") 

graph.channel <- ggplot() + 
  geom_histogram(aes(x=df$Distance.channel),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.double$Distance.channel), fill="chartreuse4") + 
 theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Shortest distance to the navigation channel")   

graph.house <- ggplot() + 
  geom_histogram(aes(x=df$House.200),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.double$House.200), fill="chartreuse4") +
  theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Number of houses within 200 m") 

library(cowplot)
graph.total <- plot_grid(graph.channel, graph.vessel, graph.house, ncol = 3, labels = "AUTO")
graph.total

Here, we compare the frequency of the variables quantifying human disturbance: A) the shortest aquatic distance to the navigation channel in meters; B) the mean daily number of vessel crossings, and C) the number of houses with access to the canal within 200 m of the sampling site, according to the dataset used (black: all the observations from the entire dataset, green: only the behavioral measurements from turtles testes more than once).

Sum of active defensive behaviors

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.ad, na.action=na.exclude)
gvif(mod.active.defenses.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.200.scaled               5.1772  1          2.2753
## Urban.900.scaled               6.2641  1          2.5028
## Distance.channel.scaled        1.7054  1          1.3059
## Vessels.activities.2019.scaled 1.9751  1          1.4054
## Order.trial.scaled             1.2305  1          1.1093
## Sex                            1.5154  1          1.2310
## Tank.Site                      3.8994  1          1.9747
## Year                           1.9873  1          1.4097
## JulianDay.scaled               3.5956  1          1.8962
## Hour.active.defenses.scaled    2.4747  1          1.5731
## ID.Temperature.scaled          1.8566  1          1.3626
## CL.scaled                      1.3744  1          1.1723
mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.ad, na.action=na.exclude)
gvif(mod.active.defenses.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.200.scaled               2.5259  1          1.5893
## Distance.channel.scaled        1.6998  1          1.3038
## Vessels.activities.2019.scaled 1.7866  1          1.3366
## Order.trial.scaled             1.2180  1          1.1036
## Sex                            1.3514  1          1.1625
## Tank.Site                      3.8994  1          1.9747
## Year                           1.9604  1          1.4001
## JulianDay.scaled               3.3888  1          1.8409
## Hour.active.defenses.scaled    2.4522  1          1.5660
## ID.Temperature.scaled          1.8380  1          1.3557
## CL.scaled                      1.3742  1          1.1723

Urban areas within 900 m was deleted given that the GVIF^(1/(2*df)) was over two.

Calculation of Pearson correlation coefficients

# Visualization of the correlations
cor.pearson.active.defenses <- df.double.ad[, c(18,23,6,3,4,12,8,16,17)]
chart.Correlation(cor.pearson.active.defenses, histogram=TRUE, pch=19)

The Pearson correlation coefficient of 0.8 between House.200 and Urban.900 confirm the deletion of Urban.900 with the calculation of the GVIF^(1/(2*df)).

Verification of the assumptions with the initial model

mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.double.ad, REML = TRUE, na.action=na.exclude)

summary(mod.ad.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled +  
##     ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##    Data: df.double.ad
## 
## REML criterion at convergence: 959.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7344 -0.5517 -0.1151  0.5299  2.7636 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4082   0.6389  
##  Site     (Intercept) 0.0496   0.2227  
##  Residual             0.3155   0.5617  
## Number of obs: 399, groups:  ID, 211; Site, 21
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      0.96570    0.13431  53.72823   7.190 2.08e-09
## House.200.scaled                -0.15600    0.08419  13.68989  -1.853  0.08555
## Distance.channel.scaled         -0.20993    0.08464  14.82270  -2.480  0.02563
## Vessels.activities.2019.scaled   0.25597    0.10268  10.41442   2.493  0.03097
## Order.trial.scaled               0.03759    0.03747 222.80396   1.003  0.31674
## SexM                             0.35314    0.13152 188.95551   2.685  0.00789
## Tank.SiteT                      -0.13295    0.17491 172.03545  -0.760  0.44822
## Year2020                        -0.20388    0.12137 167.87540  -1.680  0.09484
## JulianDay.scaled                -0.07544    0.09756  18.33891  -0.773  0.44923
## Hour.active.defenses.scaled      0.02671    0.05271 269.08360   0.507  0.61275
## ID.Temperature.scaled            0.04869    0.06461  99.88162   0.754  0.45288
## CL.scaled                        0.17529    0.06738 183.56842   2.602  0.01004
##                                   
## (Intercept)                    ***
## House.200.scaled               .  
## Distance.channel.scaled        *  
## Vessels.activities.2019.scaled *  
## Order.trial.scaled                
## SexM                           ** 
## Tank.SiteT                        
## Year2020                       .  
## JulianDay.scaled                  
## Hour.active.defenses.scaled       
## ID.Temperature.scaled             
## CL.scaled                      *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.200. Dstn.. V..201 Ordr.. SexM   Tnk.ST Yr2020 JlnDy.
## Hs.200.scld -0.085                                                        
## Dstnc.chnn.  0.057  0.142                                                 
## Vssl..2019.  0.093 -0.022 -0.505                                          
## Ordr.trl.sc -0.005  0.032 -0.033  0.122                                   
## SexM        -0.653  0.182 -0.078  0.059  0.001                            
## Tank.SiteT  -0.266  0.021 -0.148 -0.026  0.208  0.045                     
## Year2020    -0.239 -0.260 -0.026 -0.105 -0.271 -0.078 -0.387              
## JulnDy.scld -0.022  0.343  0.231 -0.207 -0.143  0.097 -0.467  0.197       
## Hr.ctv.dfn.  0.147  0.068  0.062  0.007  0.107 -0.043 -0.609  0.238  0.155
## ID.Tmprtr.s  0.014  0.080 -0.150 -0.027  0.011  0.027  0.394 -0.302 -0.412
## CL.scaled   -0.261 -0.047 -0.046  0.101  0.016  0.343  0.056 -0.078  0.050
##             Hr.c.. ID.Tm.
## Hs.200.scld              
## Dstnc.chnn.              
## Vssl..2019.              
## Ordr.trl.sc              
## SexM                     
## Tank.SiteT               
## Year2020                 
## JulnDy.scld              
## Hr.ctv.dfn.              
## ID.Tmprtr.s -0.015       
## CL.scaled   -0.036  0.002
plot(mod.ad.full) 

hist(residuals(mod.ad.full)) 

qqnorm(resid(mod.ad.full))

Model selection

Random variables

Creation of the different mixed models
## Null mixed model
mod.ad.null <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.double.ad, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)

## only site identity as random variable
mod.ad.dummy.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.double.ad, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## only turtle identity as random variable
mod.ad.dummy.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.double.ad, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## Turtle identity without the dummy variable
mod.ad.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.ad,  na.action=na.exclude, REML = FALSE)

## Turtle and site identity without the dummy variable
mod.ad.ID.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.double.ad, na.action=na.exclude, REML = FALSE)

The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.

Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.ad.null, mod.ad.dummy.site)
## Data: df.double.ad
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
##                   npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null         14 1008.7 1064.5 -490.35    980.7                    
## mod.ad.dummy.site   15 1010.7 1070.5 -490.35    980.7     0  1          1
anova(mod.ad.null, mod.ad.dummy.ID)
## Data: df.double.ad
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
##                 npar     AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)    
## mod.ad.null       14 1008.70 1064.5 -490.35   980.70                        
## mod.ad.dummy.ID   15  947.72 1007.6 -458.86   917.72 62.98  1  2.088e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.ad.ID, mod.ad.ID.site)
## Data: df.double.ad
## Models:
## mod.ad.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.ad.ID.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##                npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.ID        14 945.72 1001.6 -458.86   917.72                    
## mod.ad.ID.site   15 947.72 1007.6 -458.86   917.72     0  1          1

The test is considered significant when chisq is over 3.84

Only turtle identity will be kept as a random variable.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.ad, na.action=na.exclude, REML = FALSE)
summary(mod.ad.full) 
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled +  
##     ID.Temperature.scaled + CL.scaled + (1 | ID)
##    Data: df.double.ad
## 
##      AIC      BIC   logLik deviance df.resid 
##    945.7   1001.6   -458.9    917.7      385 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.79108 -0.53650 -0.09793  0.53562  2.75285 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4063   0.6374  
##  Residual             0.3136   0.5600  
## Number of obs: 399, groups:  ID, 211
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      0.99390    0.11984 265.28283   8.293 5.59e-15
## House.200.scaled                -0.16773    0.06670 208.95640  -2.515 0.012672
## Distance.channel.scaled         -0.22794    0.06804 204.39522  -3.350 0.000963
## Vessels.activities.2019.scaled   0.29346    0.07673 232.15000   3.825 0.000168
## Order.trial.scaled               0.04266    0.03569 312.46630   1.195 0.232800
## SexM                             0.31457    0.12583 199.14779   2.500 0.013228
## Tank.SiteT                      -0.02854    0.16170 382.88556  -0.176 0.860013
## Year2020                        -0.18281    0.11228 396.77606  -1.628 0.104301
## JulianDay.scaled                -0.11814    0.07653 299.65326  -1.544 0.123692
## Hour.active.defenses.scaled      0.02421    0.05086 330.13083   0.476 0.634344
## ID.Temperature.scaled            0.07757    0.05812 379.58873   1.335 0.182732
## CL.scaled                        0.16711    0.06562 188.19014   2.547 0.011678
##                                   
## (Intercept)                    ***
## House.200.scaled               *  
## Distance.channel.scaled        ***
## Vessels.activities.2019.scaled ***
## Order.trial.scaled                
## SexM                           *  
## Tank.SiteT                        
## Year2020                          
## JulianDay.scaled                  
## Hour.active.defenses.scaled       
## ID.Temperature.scaled             
## CL.scaled                      *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.200. Dstn.. V..201 Ordr.. SexM   Tnk.ST Yr2020 JlnDy.
## Hs.200.scld -0.117                                                        
## Dstnc.chnn.  0.050  0.149                                                 
## Vssl..2019.  0.162 -0.008 -0.491                                          
## Ordr.trl.sc -0.031  0.033 -0.080  0.186                                   
## SexM        -0.689  0.255 -0.085  0.071  0.006                            
## Tank.SiteT  -0.327  0.019 -0.188 -0.114  0.165  0.074                     
## Year2020    -0.256 -0.329  0.022 -0.171 -0.198 -0.106 -0.365              
## JulnDy.scld -0.039  0.370  0.253 -0.119 -0.174  0.090 -0.503  0.214       
## Hr.ctv.dfn.  0.173  0.081  0.059  0.080  0.142 -0.063 -0.620  0.223  0.140
## ID.Tmprtr.s  0.026  0.101 -0.202 -0.049  0.020  0.057  0.368 -0.339 -0.405
## CL.scaled   -0.279 -0.073 -0.052  0.131  0.009  0.345  0.078 -0.087  0.032
##             Hr.c.. ID.Tm.
## Hs.200.scld              
## Dstnc.chnn.              
## Vssl..2019.              
## Ordr.trl.sc              
## SexM                     
## Tank.SiteT               
## Year2020                 
## JulnDy.scld              
## Hr.ctv.dfn.              
## ID.Tmprtr.s -0.031       
## CL.scaled   -0.040  0.022
final.model
mod.ad.final <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.ad, na.action=na.exclude, REML = TRUE)
summary(mod.ad.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     ID.Temperature.scaled + CL.scaled + (1 | ID)
##    Data: df.double.ad
## 
## REML criterion at convergence: 1361.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.38654 -0.62432 -0.09817  0.56221  2.80205 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.3194   0.5651  
##  Residual             0.4221   0.6497  
## Number of obs: 561, groups:  ID, 215
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      0.88864    0.08714 206.58701  10.198  < 2e-16
## House.200.scaled                -0.15392    0.04871 192.66811  -3.160  0.00183
## Distance.channel.scaled         -0.14479    0.06045 220.21573  -2.395  0.01745
## Vessels.activities.2019.scaled   0.18377    0.06160 193.27807   2.983  0.00322
## Order.trial.scaled               0.05845    0.02852 477.18753   2.049  0.04098
## SexM                             0.33800    0.10978 196.40374   3.079  0.00238
## ID.Temperature.scaled            0.11145    0.04409 537.90670   2.528  0.01177
## CL.scaled                        0.13982    0.05858 203.37653   2.387  0.01791
##                                   
## (Intercept)                    ***
## House.200.scaled               ** 
## Distance.channel.scaled        *  
## Vessels.activities.2019.scaled ** 
## Order.trial.scaled             *  
## SexM                           ** 
## ID.Temperature.scaled          *  
## CL.scaled                      *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.200. Dstn.. V..201 Ordr.. SexM   ID.Tm.
## Hs.200.scld -0.185                                          
## Dstnc.chnn.  0.014  0.084                                   
## Vssl..2019. -0.037  0.015 -0.542                            
## Ordr.trl.sc -0.138 -0.026 -0.019  0.030                     
## SexM        -0.816  0.182 -0.107  0.089 -0.016              
## ID.Tmprtr.s -0.032  0.100 -0.146 -0.087 -0.197  0.084       
## CL.scaled   -0.290 -0.201 -0.062  0.148 -0.008  0.318  0.034
Calculation of the 95% confidence intervals
confint(mod.ad.final, level = 0.95, method = "Wald")
##                                       2.5 %      97.5 %
## .sig01                                   NA          NA
## .sigma                                   NA          NA
## (Intercept)                     0.717840684  1.05943287
## House.200.scaled               -0.249395513 -0.05844638
## Distance.channel.scaled        -0.263267334 -0.02630938
## Vessels.activities.2019.scaled  0.063044184  0.30449727
## Order.trial.scaled              0.002546783  0.11435758
## SexM                            0.122832090  0.55316600
## ID.Temperature.scaled           0.025025958  0.19786843
## CL.scaled                       0.025004563  0.25464168
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.ad.final)
##           R2m       R2c
## [1,] 0.109186 0.4928679

marginal: only for the fixed effect. conditional: both fixed and random effect.

Escape latency

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Tank.Site + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled, data = df.double.escape, na.action=na.exclude)
gvif(mod.escape.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.400.scaled               2.4177  1          1.5549
## Urban.200.scaled               2.0373  1          1.4273
## Distance.channel.scaled        2.0279  1          1.4240
## Vessels.activities.2019.scaled 2.1394  1          1.4627
## Order.trial.scaled             1.2156  1          1.1025
## Sex                            1.2303  1          1.1092
## Year                           1.4837  1          1.2181
## Tank.Site                      4.9140  1          2.2168
## JulianDay.scaled               3.3188  1          1.8218
## Hour.Platform.scaled           2.3715  1          1.5400
## ID.Temperature.scaled          1.5853  1          1.2591
## CL.scaled                      1.3132  1          1.1459
## Wind.scaled                    2.5983  1          1.6119
mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled, data = df.double.escape, na.action=na.exclude)
gvif(mod.escape.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.400.scaled               2.4121  1          1.5531
## Urban.200.scaled               2.0233  1          1.4224
## Distance.channel.scaled        1.9596  1          1.3999
## Vessels.activities.2019.scaled 2.1290  1          1.4591
## Order.trial.scaled             1.2150  1          1.1023
## Sex                            1.2275  1          1.1079
## Year                           1.4698  1          1.2124
## JulianDay.scaled               3.1470  1          1.7740
## Hour.Platform.scaled           1.6506  1          1.2848
## ID.Temperature.scaled          1.4847  1          1.2185
## CL.scaled                      1.3042  1          1.1420
## Wind.scaled                    1.6589  1          1.2880

We deleted Tank.Site given that the GVIF^(1/(2*df)) was over two.

Calculation of Pearson correlation coefficients

# Visualization of the correlations
cor.pearson.escape <- df.double.escape[, c(20,21,6,3,10,12,8,16,17,11)]
chart.Correlation(cor.pearson.escape, histogram=TRUE, pch=19)

Verification of the assumptions with the initial model

mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.double.escape, REML = TRUE, na.action=na.exclude)

summary(mod.escape.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled +  
##     CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
##    Data: df.double.escape
## 
## REML criterion at convergence: 1444.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1969 -0.6070 -0.0417  0.5045  3.2380 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.52919  0.7275  
##  Site     (Intercept) 0.02688  0.1640  
##  Residual             0.73644  0.8582  
## Number of obs: 481, groups:  ID, 196; Site, 19
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      3.19093    0.16937  46.96034  18.840  < 2e-16
## House.400.scaled                 0.09007    0.12955  13.59285   0.695   0.4986
## Urban.200.scaled                 0.20958    0.09627   9.67590   2.177   0.0554
## Distance.channel.scaled          0.21748    0.09849  16.12511   2.208   0.0421
## Vessels.activities.2019.scaled  -0.15503    0.10953   7.47936  -1.415   0.1972
## Order.trial.scaled              -0.29096    0.04956 388.36311  -5.870 9.33e-09
## SexM                            -0.10902    0.15319 150.86753  -0.712   0.4778
## Year2020                         0.26273    0.15773  62.56523   1.666   0.1008
## JulianDay.scaled                 0.42383    0.13360  15.25573   3.172   0.0062
## Hour.Platform.scaled            -0.01431    0.05757 253.59142  -0.249   0.8039
## ID.Temperature.scaled           -0.11100    0.08604  73.74765  -1.290   0.2011
## CL.scaled                        0.08780    0.08086 173.52075   1.086   0.2790
## Wind.scaled                     -0.16415    0.06627 291.08805  -2.477   0.0138
##                                   
## (Intercept)                    ***
## House.400.scaled                  
## Urban.200.scaled               .  
## Distance.channel.scaled        *  
## Vessels.activities.2019.scaled    
## Order.trial.scaled             ***
## SexM                              
## Year2020                          
## JulianDay.scaled               ** 
## Hour.Platform.scaled              
## ID.Temperature.scaled             
## CL.scaled                         
## Wind.scaled                    *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
plot(mod.escape.full) 

hist(residuals(mod.escape.full)) 

qqnorm(resid(mod.escape.full))

Model selection

Random variables

Creation of the different mixed models
## Null mixed model
mod.escape.null <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy), data = df.double.escape, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)

## only site identity as random variable
mod.escape.dummy.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|Site), data = df.double.escape, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## only turtle identity as random variable
mod.escape.dummy.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|ID), data = df.double.escape, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)

## Turtle identity without the dummy variable
mod.escape.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.double.escape,  na.action=na.exclude, REML = FALSE)

## Turtle and site identity without the dummy variable
mod.escape.ID.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.double.escape, na.action=na.exclude, REML = FALSE)

The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.

Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.escape.null, mod.escape.dummy.site)
## Data: df.double.escape
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | Site)
##                       npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.escape.null         15 1481.4 1544.1 -725.72   1451.4                     
## mod.escape.dummy.site   16 1483.3 1550.1 -725.66   1451.3 0.1206  1     0.7284
anova(mod.escape.null, mod.escape.dummy.ID)
## Data: df.double.escape
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | ID)
##                     npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.escape.null       15 1481.4 1544.1 -725.72   1451.4                     
## mod.escape.dummy.ID   16 1434.5 1501.3 -701.25   1402.5 48.944  1  2.634e-12
##                        
## mod.escape.null        
## mod.escape.dummy.ID ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.escape.ID, mod.escape.ID.site)
## Data: df.double.escape
## Models:
## mod.escape.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | ID)
## mod.escape.ID.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
##                    npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.ID        15 1432.5 1495.1 -701.25   1402.5                    
## mod.escape.ID.site   16 1434.5 1501.3 -701.25   1402.5     0  1          1

The test is considered significant when chisq is over 3.84

Only turtle identity will be kept.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.double.escape,  na.action=na.exclude, REML = FALSE)

summary(mod.escape.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: 
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +  
##     Vessels.activities.2019.scaled + Order.trial.scaled + Sex +  
##     Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled +  
##     CL.scaled + Wind.scaled + (1 | ID)
##    Data: df.double.escape
## 
##      AIC      BIC   logLik deviance df.resid 
##   1432.5   1495.1   -701.2   1402.5      466 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2232 -0.6156 -0.0512  0.5064  3.2580 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4960   0.7043  
##  Residual             0.7321   0.8556  
## Number of obs: 481, groups:  ID, 196
## 
## Fixed effects:
##                                 Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                      3.17772    0.15558 239.66937  20.425  < 2e-16
## House.400.scaled                 0.10897    0.11261 198.87413   0.968  0.33439
## Urban.200.scaled                 0.20547    0.08165 211.94975   2.516  0.01260
## Distance.channel.scaled          0.22969    0.08675 195.02223   2.648  0.00877
## Vessels.activities.2019.scaled  -0.16066    0.09050 188.33151  -1.775  0.07747
## Order.trial.scaled              -0.28677    0.04867 436.83513  -5.892 7.63e-09
## SexM                            -0.08576    0.14709 178.14486  -0.583  0.56058
## Year2020                         0.22789    0.14649 325.02237   1.556  0.12075
## JulianDay.scaled                 0.43899    0.11651 226.72118   3.768  0.00021
## Hour.Platform.scaled            -0.02489    0.05563 460.77839  -0.447  0.65477
## ID.Temperature.scaled           -0.11494    0.08046 465.86473  -1.429  0.15381
## CL.scaled                        0.08680    0.07838 186.28421   1.107  0.26956
## Wind.scaled                     -0.17356    0.06446 448.01732  -2.692  0.00736
##                                   
## (Intercept)                    ***
## House.400.scaled                  
## Urban.200.scaled               *  
## Distance.channel.scaled        ** 
## Vessels.activities.2019.scaled .  
## Order.trial.scaled             ***
## SexM                              
## Year2020                          
## JulianDay.scaled               ***
## Hour.Platform.scaled              
## ID.Temperature.scaled             
## CL.scaled                         
## Wind.scaled                    ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
final.model
mod.escape.final <- lmer(log.escape ~ Urban.200.scaled + Order.trial.scaled + JulianDay.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.double.escape,  na.action=na.exclude, REML = TRUE)
summary(mod.escape.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log.escape ~ Urban.200.scaled + Order.trial.scaled + JulianDay.scaled +  
##     CL.scaled + Wind.scaled + (1 | ID)
##    Data: df.double.escape
## 
## REML criterion at convergence: 1618.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.39559 -0.61219 -0.06053  0.54123  3.11180 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 0.4678   0.6840  
##  Residual             0.7913   0.8896  
## Number of obs: 543, groups:  ID, 202
## 
## Fixed effects:
##                     Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)          3.30917    0.07195 235.61868  45.995  < 2e-16 ***
## Urban.200.scaled     0.24196    0.07259 209.06938   3.333  0.00102 ** 
## Order.trial.scaled  -0.26885    0.04347 477.11367  -6.184 1.34e-09 ***
## JulianDay.scaled     0.33947    0.08571 243.62059   3.961 9.82e-05 ***
## CL.scaled            0.14548    0.06330 209.04497   2.298  0.02253 *  
## Wind.scaled         -0.15321    0.05068 483.67081  -3.023  0.00263 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) U.200. Ordr.. JlnDy. CL.scl
## Urbn.200.sc -0.226                            
## Ordr.trl.sc -0.222 -0.182                     
## JulnDy.scld -0.376  0.563 -0.187              
## CL.scaled   -0.045 -0.115  0.008  0.103       
## Wind.scaled  0.133  0.160 -0.440  0.266 -0.055
Calculation of the 95% confidence intervals
confint(mod.escape.final, level = 0.95, method = "Wald")
##                          2.5 %      97.5 %
## .sig01                      NA          NA
## .sigma                      NA          NA
## (Intercept)         3.16816101  3.45018404
## Urban.200.scaled    0.09968258  0.38423802
## Order.trial.scaled -0.35405874 -0.18364043
## JulianDay.scaled    0.17147967  0.50747005
## CL.scaled           0.02141824  0.26953839
## Wind.scaled        -0.25253317 -0.05388696
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.escape.final)
##            R2m     R2c
## [1,] 0.1460005 0.46328

marginal: only for the fixed effect. conditional: both fixed and random effect.

Emergence of the head after escaping

Verification of model assumptions

Multicollinearity: Generalized variance inflation factor (GVIF^(1/(2*df)))

GVIF^(1/(2*df)) over two indicate the presence of multicollinearity

mod.emergence.vif <- glm(emergence ~ House.300.scaled + Urban.600.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Tank.Site + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.emergence, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
##                                   GVIF df GVIF^(1/(2*df))
## House.300.scaled                7.8026  1          2.7933
## Urban.600.scaled               10.2844  1          3.2069
## Distance.channel.scaled         1.7805  1          1.3344
## Vessels.activities.2019.scaled  2.0218  1          1.4219
## Order.trial.scaled              1.2107  1          1.1003
## Sex                             1.3053  1          1.1425
## Year                            1.4926  1          1.2217
## Tank.Site                       3.1891  1          1.7858
## JulianDay.scaled                2.3481  1          1.5324
## Hour.Platform.scaled            2.2332  1          1.4944
## ID.Temperature.scaled           1.7556  1          1.3250
## CL.scaled                       1.2567  1          1.1210
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.emergence, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
##                                  GVIF df GVIF^(1/(2*df))
## House.300.scaled               1.9292  1          1.3890
## Distance.channel.scaled        1.7571  1          1.3256
## Vessels.activities.2019.scaled 1.9590  1          1.3996
## Order.trial.scaled             1.2102  1          1.1001
## Sex                            1.2209  1          1.1049
## Tank.Site                      3.1018  1          1.7612
## Year                           1.4882  1          1.2199
## JulianDay.scaled               2.1570  1          1.4687
## Hour.Platform.scaled           2.2185  1          1.4895
## ID.Temperature.scaled          1.6455  1          1.2828
## CL.scaled                      1.2470  1          1.1167

Urban areas within 600m was deleted given that the GVIF^(1/(2*df)) was over two.

Calculation of Pearson correlation coefficients

# Visualization of the correlations
cor.pearson.emergence <- df.double.emergence[, c(19,22,6,3,10,12,8,16,17)]
chart.Correlation(cor.pearson.emergence, histogram=TRUE, pch=19)

The Pearson correlation coefficient over 0.8 between Urban areas within 600m and number of houses within 300m confirmed the deletion.

Verification of the assumptions with the initial model

mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.double.emergence, family = binomial, na.action=na.exclude)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0597096 (tol = 0.002, component 1)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled +  
##     Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled +  
##     (1 | ID) + (1 | Site)
##    Data: df.double.emergence
## 
##      AIC      BIC   logLik deviance df.resid 
##    574.1    633.9   -273.0    546.1      515 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9251 -0.3956 -0.2373  0.4351  2.9867 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 2.6800   1.6371  
##  Site   (Intercept) 0.5229   0.7231  
## Number of obs: 529, groups:  ID, 209; Site, 21
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -0.57697    0.46645  -1.237   0.2161    
## House.300.scaled               -0.27675    0.35107  -0.788   0.4305    
## Distance.channel.scaled        -0.27959    0.29474  -0.949   0.3428    
## Vessels.activities.2019.scaled  0.51196    0.33349   1.535   0.1247    
## Order.trial.scaled              0.07448    0.15409   0.483   0.6288    
## SexM                            0.22499    0.42826   0.525   0.5993    
## Tank.SiteT                     -2.90817    0.64776  -4.490 7.14e-06 ***
## Year2020                       -0.24392    0.46219  -0.528   0.5977    
## JulianDay.scaled                0.95437    0.34523   2.764   0.0057 ** 
## Hour.Platform.scaled            0.09197    0.22069   0.417   0.6769    
## ID.Temperature.scaled          -0.05166    0.25219  -0.205   0.8377    
## CL.scaled                       0.02028    0.22268   0.091   0.9274    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.300. Dstn.. V..201 Ordr.. SexM   Tnk.ST Yr2020 JlnDy.
## Hs.300.scld  0.054                                                        
## Dstnc.chnn.  0.101  0.151                                                 
## Vssl..2019.  0.058  0.021 -0.497                                          
## Ordr.trl.sc -0.044 -0.082 -0.046  0.056                                   
## SexM        -0.589  0.169 -0.077  0.082 -0.025                            
## Tank.SiteT  -0.207 -0.041 -0.084 -0.096  0.172 -0.021                     
## Year2020    -0.408 -0.212 -0.084 -0.105 -0.207 -0.051 -0.094              
## JulnDy.scld -0.029  0.351  0.203 -0.182 -0.188  0.098 -0.468  0.013       
## Hr.Pltfrm.s  0.236  0.088  0.030  0.056  0.197 -0.021 -0.614 -0.007  0.151
## ID.Tmprtr.s  0.072  0.085 -0.172  0.020  0.036  0.015  0.348 -0.228 -0.404
## CL.scaled   -0.216 -0.036 -0.011  0.049  0.003  0.324  0.051 -0.119  0.097
##             Hr.Pl. ID.Tm.
## Hs.300.scld              
## Dstnc.chnn.              
## Vssl..2019.              
## Ordr.trl.sc              
## SexM                     
## Tank.SiteT               
## Year2020                 
## JulnDy.scld              
## Hr.Pltfrm.s              
## ID.Tmprtr.s -0.036       
## CL.scaled   -0.042 -0.039
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0597096 (tol = 0.002, component 1)
Testing for the best optimizer
gm_all <- allFit(mod.emergence.full)
## bobyqa : [OK]
## Nelder_Mead :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0366235 (tol = 0.002, component 1)
## [OK]
## nlminbwrap : [OK]
## nmkbw :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00203983 (tol = 0.002, component 1)
## [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00236751 (tol = 0.002, component 1)
## [OK]
t(sapply(gm_all,fixef)) 
##                               (Intercept) House.300.scaled
## bobyqa                         -0.5703545       -0.2731547
## Nelder_Mead                    -0.5776692       -0.2809434
## nlminbwrap                     -0.5703548       -0.2731633
## nmkbw                          -0.5705922       -0.2733797
## optimx.L-BFGS-B                -0.5703964       -0.2732323
## nloptwrap.NLOPT_LN_NELDERMEAD  -0.5705662       -0.2732201
## nloptwrap.NLOPT_LN_BOBYQA      -0.5704294       -0.2729278
##                               Distance.channel.scaled
## bobyqa                                     -0.2793841
## Nelder_Mead                                -0.2806871
## nlminbwrap                                 -0.2793867
## nmkbw                                      -0.2794297
## optimx.L-BFGS-B                            -0.2793669
## nloptwrap.NLOPT_LN_NELDERMEAD              -0.2795616
## nloptwrap.NLOPT_LN_BOBYQA                  -0.2791320
##                               Vessels.activities.2019.scaled Order.trial.scaled
## bobyqa                                             0.5165569         0.07778553
## Nelder_Mead                                        0.5154441         0.07581981
## nlminbwrap                                         0.5165607         0.07778218
## nmkbw                                              0.5166040         0.07775609
## optimx.L-BFGS-B                                    0.5165086         0.07779155
## nloptwrap.NLOPT_LN_NELDERMEAD                      0.5167676         0.07783627
## nloptwrap.NLOPT_LN_BOBYQA                          0.5165501         0.07783208
##                                    SexM Tank.SiteT   Year2020 JulianDay.scaled
## bobyqa                        0.2235015  -2.916581 -0.2552020        0.9607171
## Nelder_Mead                   0.2291159  -2.903567 -0.2546789        0.9564602
## nlminbwrap                    0.2234918  -2.916589 -0.2551919        0.9607245
## nmkbw                         0.2235574  -2.916467 -0.2551090        0.9606228
## optimx.L-BFGS-B               0.2234028  -2.916398 -0.2551102        0.9606051
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2235494  -2.916669 -0.2550607        0.9607448
## nloptwrap.NLOPT_LN_BOBYQA     0.2235864  -2.916952 -0.2552062        0.9610121
##                               Hour.Platform.scaled ID.Temperature.scaled
## bobyqa                                  0.09735240           -0.04527925
## Nelder_Mead                             0.09061758           -0.04467571
## nlminbwrap                              0.09735059           -0.04528315
## nmkbw                                   0.09716383           -0.04536702
## optimx.L-BFGS-B                         0.09730574           -0.04525554
## nloptwrap.NLOPT_LN_NELDERMEAD           0.09739978           -0.04529126
## nloptwrap.NLOPT_LN_BOBYQA               0.09742754           -0.04567319
##                                CL.scaled
## bobyqa                        0.01697201
## Nelder_Mead                   0.02125493
## nlminbwrap                    0.01696731
## nmkbw                         0.01698761
## optimx.L-BFGS-B               0.01694443
## nloptwrap.NLOPT_LN_NELDERMEAD 0.01699645
## nloptwrap.NLOPT_LN_BOBYQA     0.01706011
sapply(gm_all,logLik) 
##                        bobyqa                   Nelder_Mead 
##                     -273.0322                     -273.0332 
##                    nlminbwrap                         nmkbw 
##                     -273.0322                     -273.0322 
##               optimx.L-BFGS-B nloptwrap.NLOPT_LN_NELDERMEAD 
##                     -273.0322                     -273.0322 
##     nloptwrap.NLOPT_LN_BOBYQA 
##                     -273.0322

We decided to use optimx.L-BFGS-B optimizer to include in our initial model.

mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.double.emergence, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), family = binomial, na.action=na.exclude)

summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled +  
##     Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled +  
##     (1 | ID) + (1 | Site)
##    Data: df.double.emergence
## Control: 
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    574.1    633.9   -273.0    546.1      515 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9279 -0.3948 -0.2357  0.4347  2.9939 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 2.7080   1.6456  
##  Site   (Intercept) 0.5328   0.7299  
## Number of obs: 529, groups:  ID, 209; Site, 21
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -0.57040    0.46818  -1.218  0.22310    
## House.300.scaled               -0.27323    0.35298  -0.774  0.43889    
## Distance.channel.scaled        -0.27937    0.29628  -0.943  0.34572    
## Vessels.activities.2019.scaled  0.51651    0.33560   1.539  0.12379    
## Order.trial.scaled              0.07779    0.15439   0.504  0.61436    
## SexM                            0.22340    0.42973   0.520  0.60316    
## Tank.SiteT                     -2.91640    0.64979  -4.488 7.18e-06 ***
## Year2020                       -0.25511    0.46414  -0.550  0.58256    
## JulianDay.scaled                0.96061    0.34753   2.764  0.00571 ** 
## Hour.Platform.scaled            0.09731    0.22129   0.440  0.66014    
## ID.Temperature.scaled          -0.04526    0.25277  -0.179  0.85791    
## CL.scaled                       0.01694    0.22343   0.076  0.93955    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.300. Dstn.. V..201 Ordr.. SexM   Tnk.ST Yr2020 JlnDy.
## Hs.300.scld  0.054                                                        
## Dstnc.chnn.  0.101  0.151                                                 
## Vssl..2019.  0.057  0.021 -0.497                                          
## Ordr.trl.sc -0.044 -0.082 -0.046  0.056                                   
## SexM        -0.589  0.169 -0.076  0.082 -0.025                            
## Tank.SiteT  -0.208 -0.041 -0.084 -0.097  0.171 -0.020                     
## Year2020    -0.408 -0.211 -0.085 -0.105 -0.208 -0.051 -0.093              
## JulnDy.scld -0.028  0.352  0.203 -0.181 -0.186  0.098 -0.468  0.011       
## Hr.Pltfrm.s  0.236  0.088  0.030  0.056  0.197 -0.020 -0.616 -0.008  0.152
## ID.Tmprtr.s  0.071  0.085 -0.171  0.020  0.036  0.015  0.347 -0.229 -0.402
## CL.scaled   -0.215 -0.035 -0.010  0.049  0.003  0.324  0.053 -0.118  0.097
##             Hr.Pl. ID.Tm.
## Hs.300.scld              
## Dstnc.chnn.              
## Vssl..2019.              
## Ordr.trl.sc              
## SexM                     
## Tank.SiteT               
## Year2020                 
## JulnDy.scld              
## Hr.Pltfrm.s              
## ID.Tmprtr.s -0.036       
## CL.scaled   -0.042 -0.038
plot(mod.emergence.full) 

hist(residuals(mod.emergence.full)) 

qqnorm(resid(mod.emergence.full))

Model selection

Random variables

Creation of the different mixed models
## Null mixed model
mod.emergence.null <- glmer(emergence ~ House.300.scaled +  Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.double.emergence, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## only site identity as random variable
mod.emergence.dummy.site <- glmer(emergence ~ House.300.scaled +  Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.double.emergence, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## only turtle identity as random variable
mod.emergence.dummy.ID <- glmer(emergence ~ House.300.scaled +  Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.double.emergence, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)

## Turtle identity without the dummy variable
mod.emergence.ID <- glmer(emergence ~ House.300.scaled +  Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)

## Turtle and site identity without the dummy variable
mod.emergence.ID.site <- glmer(emergence ~ House.300.scaled +  Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
Likelihood ratio tests between the mixed models

The less complex model is always first.

anova(mod.emergence.null, mod.emergence.dummy.site)
## Data: df.double.emergence
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
##                          npar    AIC    BIC  logLik deviance  Chisq Df
## mod.emergence.null         13 614.21 669.73 -294.11   588.21          
## mod.emergence.dummy.site   14 602.88 662.67 -287.44   574.88 13.329  1
##                          Pr(>Chisq)    
## mod.emergence.null                     
## mod.emergence.dummy.site  0.0002614 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.null, mod.emergence.dummy.ID)
## Data: df.double.emergence
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
##                        npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.emergence.null       13 614.21 669.73 -294.11   588.21                     
## mod.emergence.dummy.ID   14 576.50 636.29 -274.25   548.50 39.709  1  2.948e-10
##                           
## mod.emergence.null        
## mod.emergence.dummy.ID ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.ID, mod.emergence.ID.site)
## Data: df.double.emergence
## Models:
## mod.emergence.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.emergence.ID.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
##                       npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod.emergence.ID        13 574.49 630.01 -274.24   548.49                     
## mod.emergence.ID.site   14 574.06 633.86 -273.03   546.06 2.4239  1     0.1195

The test is considered significant when chisq is over 3.84

We do not need to keep sampling site identity as a random variable.

Predictor variables

We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.

initial model
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +  
##     Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled +  
##     Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled +      (1 | ID)
##    Data: df.double.emergence
## Control: 
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    574.5    630.0   -274.2    548.5      516 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8941 -0.3865 -0.2438  0.4490  3.1677 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 3.263    1.806   
## Number of obs: 529, groups:  ID, 209
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -0.59974    0.43860  -1.367  0.17151    
## House.300.scaled               -0.36068    0.29042  -1.242  0.21427    
## Distance.channel.scaled        -0.34887    0.24819  -1.406  0.15983    
## Vessels.activities.2019.scaled  0.58629    0.25755   2.276  0.02282 *  
## Order.trial.scaled              0.08994    0.15152   0.594  0.55280    
## SexM                            0.20905    0.43345   0.482  0.62960    
## Tank.SiteT                     -2.90224    0.62366  -4.654 3.26e-06 ***
## Year2020                       -0.02535    0.41231  -0.061  0.95098    
## JulianDay.scaled                0.80274    0.26871   2.987  0.00281 ** 
## Hour.Platform.scaled            0.11054    0.20980   0.527  0.59827    
## ID.Temperature.scaled           0.06824    0.21781   0.313  0.75405    
## CL.scaled                      -0.09072    0.22025  -0.412  0.68040    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) H.300. Dstn.. V..201 Ordr.. SexM   Tnk.ST Yr2020 JlnDy.
## Hs.300.scld  0.043                                                        
## Dstnc.chnn.  0.081  0.122                                                 
## Vssl..2019.  0.117  0.087 -0.501                                          
## Ordr.trl.sc -0.071 -0.086 -0.074  0.060                                   
## SexM        -0.625  0.225 -0.094  0.122 -0.029                            
## Tank.SiteT  -0.243 -0.045 -0.092 -0.220  0.168 -0.002                     
## Year2020    -0.466 -0.213  0.029 -0.282 -0.201 -0.051 -0.077              
## JulnDy.scld -0.054  0.335  0.199 -0.065 -0.223  0.080 -0.524  0.125       
## Hr.Pltfrm.s  0.259  0.119  0.013  0.160  0.194 -0.040 -0.599 -0.044  0.150
## ID.Tmprtr.s  0.073  0.146 -0.243  0.031 -0.001  0.057  0.297 -0.328 -0.323
## CL.scaled   -0.226 -0.113 -0.036  0.074  0.024  0.315  0.095 -0.062 -0.002
##             Hr.Pl. ID.Tm.
## Hs.300.scld              
## Dstnc.chnn.              
## Vssl..2019.              
## Ordr.trl.sc              
## SexM                     
## Tank.SiteT               
## Year2020                 
## JulnDy.scld              
## Hr.Pltfrm.s              
## ID.Tmprtr.s -0.051       
## CL.scaled   -0.038  0.046
final model
mod.emergence.final <- glmer(emergence ~ Vessels.activities.2019.scaled + Tank.Site + JulianDay.scaled + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.final)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## emergence ~ Vessels.activities.2019.scaled + Tank.Site + JulianDay.scaled +  
##     (1 | ID)
##    Data: df.double.emergence
## Control: 
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    619.1    641.0   -304.6    609.1      577 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7942 -0.3722 -0.2399  0.4728  3.1463 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 4.089    2.022   
## Number of obs: 582, groups:  ID, 215
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -0.5568     0.2251  -2.474   0.0134 *  
## Vessels.activities.2019.scaled   0.4822     0.2102   2.294   0.0218 *  
## Tank.SiteT                      -2.8945     0.4209  -6.877 6.10e-12 ***
## JulianDay.scaled                 1.0130     0.2244   4.515 6.34e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) V..201 Tnk.ST
## Vssl..2019.  0.012              
## Tank.SiteT  -0.143 -0.257       
## JulnDy.scld -0.198 -0.101 -0.456
Calculation of the 95% confidence intervals
confint(mod.emergence.final, level = 0.95, method = "Wald")
##                                      2.5 %     97.5 %
## .sig01                                  NA         NA
## (Intercept)                    -0.99798497 -0.1156176
## Vessels.activities.2019.scaled  0.07017862  0.8941339
## Tank.SiteT                     -3.71937426 -2.0695618
## JulianDay.scaled                0.57321285  1.4527686
Calculation of the marginal and conditional variance explained by the final model
r.squaredGLMM(mod.emergence.final)
##                   R2m       R2c
## theoretical 0.1869336 0.6375022
## delta       0.1633519 0.5570811

marginal: only for the fixed effect. conditional: both fixed and random effect. The delta method can be used with all distributions and link functions.