library(Hmisc)
library(writexl)
library(ggplot2)
library(dplyr)
library(lmerTest)
library(optimx)
library(lme4)
library(PerformanceAnalytics)
library(effects)
library(MuMIn)
library(ggeffects)
library(splines)
library(glmtoolbox)
library(afex)
library(nloptr)
library(dfoptim)
library(psych)
library(ordinal)
library(dplyr)
library(cowplot)
You need to set the directory by selecting the file where the dataset are located.
df <- read.table("Behaviors.txt", header = T)
The dataset has 1135 entries corresponding to the number of trial made on the 730 turtle sampled. However, all three behaviors were not always recorded at each trial. Thus, the number of observations for each behavior is less than the number of entries.
We want to make sure that the data are in the good format for the analyses.
# Risk-taking behaviors that will be used as response variable in their respective mixed model.
df$active.defenses <- as.numeric(df$active.defenses)
df$escape <- as.numeric(df$escape)
df$log.escape <- log(df$escape + 1) # log(x+1) transformation of the escape latency to be able to use a Gaussian distribution in the mixed model.
df$emergence <- as.numeric(df$emergence)
# Predictor variables
df$Year <- as.factor(df$Year)
df$JulianDay <- as.numeric(df$JulianDay)
df$Hour.active.defenses <- as.numeric(df$Hour.active.defenses)
df$Hour.Platform <- as.numeric(df$Hour.Platform)
df$Order.trial <- as.numeric(df$Order.trial)
df$CL <- as.numeric(df$CL)
df$Wind <- as.numeric(df$Wind)
df$ID.Temperature <- as.numeric(df$ID.Temperature)
df$Site <- as.factor(df$Site)
df$Distance.channel <- as.numeric(df$Distance.channel)
df$Vessels.activities.2019 <- as.numeric(df$Vessels.activities.2019)
df$House.200 <- as.numeric(df$House.200)
df$House.300 <- as.numeric(df$House.300)
df$House.400 <- as.numeric(df$House.400)
df$Urban.200 <- as.numeric(df$Urban.200)
df$Urban.600 <- as.numeric(df$Urban.600)
df$Urban.900 <- as.numeric(df$Urban.900)
All continuous predictor variables were standardized (mean zero, unit variance) before model selection.
df$JulianDay.scaled <- scale(df$JulianDay, center=TRUE, scale=TRUE)
df$Hour.active.defenses.scaled <- scale(df$Hour.active.defenses, center=TRUE, scale=TRUE)
df$Hour.Platform.scaled <- scale(df$Hour.Platform, center=TRUE, scale=TRUE)
df$Order.trial.scaled <- scale(df$Order.trial, center=TRUE, scale=TRUE)
df$CL.scaled <- scale(df$CL, center=TRUE, scale=TRUE)
df$Wind.scaled <- scale(df$Wind, center=TRUE, scale=TRUE)
df$ID.Temperature.scaled <- scale(df$ID.Temperature, center=TRUE, scale=TRUE)
df$Distance.channel.scaled <- scale(df$Distance.channel, center=TRUE, scale=TRUE)
df$Vessels.activities.2019.scaled <- scale(df$Vessels.activities.2019, center=TRUE, scale=TRUE)
df$House.200.scaled <- scale(df$House.200, center=TRUE, scale=TRUE)
df$House.300.scaled <- scale(df$House.300, center=TRUE, scale=TRUE)
df$House.400.scaled <- scale(df$House.400, center=TRUE, scale=TRUE)
df$Urban.200.scaled <- scale(df$Urban.200, center=TRUE, scale=TRUE)
df$Urban.600.scaled <- scale(df$Urban.600, center=TRUE, scale=TRUE)
df$Urban.900.scaled <- scale(df$Urban.900, center=TRUE, scale=TRUE)
Here, we want to see if we obtained similar results from those with the complete dataset by separating turtles tested in captivity and in the wild.
df.tank <- df[df$Tank.Site == 'T', ]
table(df.tank$Site)
##
## BR1 BR2-2020 C1 CB1 CL2 LR1 RR1
## 65 2 12 19 33 0 0
## RR10 RR2-2-2019 RR2-2020 RR3-1 RR3-2 RR4 RR5
## 0 0 0 0 0 0 0
## RR6 RR7 RR8 RR9 RS1 SA1 UP6
## 0 0 0 0 45 57 0
## WF1
## 18
n_distinct(df.tank$ID)
## [1] 122
Now, we only have observations from eight sites mainly located in the south section of the canal reducing the variation in human disturbance compared to the entire dataset.
df.site <- df[df$Tank.Site == 'S', ]
table(df.site$Site)
##
## BR1 BR2-2020 C1 CB1 CL2 LR1 RR1
## 53 30 28 55 21 51 62
## RR10 RR2-2-2019 RR2-2020 RR3-1 RR3-2 RR4 RR5
## 30 15 50 25 36 32 46
## RR6 RR7 RR8 RR9 RS1 SA1 UP6
## 78 63 37 40 29 20 42
## WF1
## 41
n_distinct(df.site$ID)
## [1] 730
graph.vessel <- ggplot() +
geom_histogram(aes(x=df$Vessels.activities.2019),fill="black", alpha=0.9) +
geom_histogram(aes(x=df.site$Vessels.activities.2019), fill="chartreuse4") + geom_histogram(aes(x=df.tank$Vessels.activities.2019), fill="chartreuse") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Mean daily number of vessel crossings")
graph.channel <- ggplot() +
geom_histogram(aes(x=df$Distance.channel),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.site$Distance.channel), fill="chartreuse4") +
geom_histogram(aes(x=df.tank$Distance.channel), fill="chartreuse") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Shortest distance to the navigation channel")
graph.house <- ggplot() +
geom_histogram(aes(x=df$House.200),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.site$House.200), fill="chartreuse4") +
geom_histogram(aes(x=df.tank$House.200), fill="chartreuse") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Number of houses within 200 m")
graph.total <- plot_grid(graph.channel, graph.vessel, graph.house, ncol = 3, labels = "AUTO")
graph.total
Here, we compare the frequency of the variables quantifying human disturbance: A) the shortest aquatic distance to the navigation channel in meters; B) the mean daily number of vessel crossings, and C) the number of houses with access to the canal within 200 m of the sampling site, according to the dataset used (black: all the observations from the entire dataset, dark green: only the behavioral measurements performed at sampling sites, light green: only behavioral measurements performed in the controlled setting).
We can observed drastic changes in the frequency and diversity of human disturbance level observed between the different databases used.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.active.defenses.vif)
## GVIF df GVIF^(1/(2*df))
## House.200.scaled 10.0773 1 3.1745
## Urban.900.scaled 2.2025 1 1.4841
## Distance.channel.scaled 2.7524 1 1.6590
## Vessels.activities.2019.scaled 6.8818 1 2.6233
## Order.trial.scaled 2.0016 1 1.4148
## Sex 1.2304 1 1.1092
## Year 8.3392 1 2.8878
## JulianDay.scaled 4.3675 1 2.0899
## Hour.active.defenses.scaled 3.3399 1 1.8275
## ID.Temperature.scaled 3.9508 1 1.9877
## CL.scaled 1.1598 1 1.0769
mod.active.defenses.vif <- lm(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.active.defenses.vif)
## GVIF df GVIF^(1/(2*df))
## Urban.900.scaled 1.5731 1 1.2542
## Distance.channel.scaled 2.3267 1 1.5254
## Vessels.activities.2019.scaled 2.5494 1 1.5967
## Order.trial.scaled 1.4001 1 1.1833
## Sex 1.1650 1 1.0794
## JulianDay.scaled 3.4714 1 1.8632
## Hour.active.defenses.scaled 2.3268 1 1.5254
## ID.Temperature.scaled 3.6080 1 1.8995
## CL.scaled 1.1560 1 1.0752
Compared to the model with the complete dataset, number of houses with 200m and year were deleted given that the GVIF^(1/(2*df)) were over two. Urban areas within 900m was kept here. (Tank.Site was deleted given that all the tests are made in tanks)
# Visualization of the correlations
cor.pearson.active.defenses <- df.tank[, c(23,16,17,6,3,4,12,8)]
chart.Correlation(cor.pearson.active.defenses, histogram=TRUE, pch=19)
We observed a Pearson correlation coefficient over 0.8 between Julian Day and Turtle temperature. Given that we have more missing observations in turtle temperature, We deleted this variable to only keep Julian Day.
mod.ad.full <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, REML = TRUE, na.action=na.exclude)
summary(mod.ad.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ Urban.900.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled +
## (1 | Site) + (1 | ID)
## Data: df.tank
##
## REML criterion at convergence: 416.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6200 -0.6151 -0.1467 0.5279 2.4626
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.15279 0.3909
## Site (Intercept) 0.08543 0.2923
## Residual 0.62227 0.7888
## Number of obs: 157, groups: ID, 113; Site, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.93313 1.04563 2.17984 0.892 0.45958
## Urban.900.scaled 0.24489 1.68026 2.24032 0.146 0.89620
## Distance.channel.scaled -0.19265 0.21574 5.83063 -0.893 0.40722
## Vessels.activities.2019.scaled 0.30490 0.17266 2.56215 1.766 0.19104
## Order.trial.scaled 0.01229 0.09945 52.01929 0.124 0.90212
## SexM 0.52788 0.17167 96.58996 3.075 0.00274 **
## JulianDay.scaled -0.24101 0.27407 2.67864 -0.879 0.45093
## Hour.active.defenses.scaled 0.11448 0.12211 35.15761 0.938 0.35488
## CL.scaled 0.07369 0.08538 81.58201 0.863 0.39063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) U.900. Dstn.. V..201 Ordr.. SexM JlnDy. Hr.c..
## Urbn.900.sc 0.937
## Dstnc.chnn. -0.567 -0.501
## Vssl..2019. 0.241 0.286 -0.516
## Ordr.trl.sc 0.094 -0.010 -0.102 0.063
## SexM -0.061 0.046 -0.056 0.056 -0.008
## JulnDy.scld -0.370 -0.113 0.246 -0.059 -0.159 -0.001
## Hr.ctv.dfn. 0.042 0.177 0.010 0.261 -0.226 -0.040 -0.052
## CL.scaled 0.047 0.056 -0.060 0.039 0.026 0.302 -0.089 0.083
plot(mod.ad.full)
hist(residuals(mod.ad.full))
qqnorm(resid(mod.ad.full))
## Null mixed model
mod.ad.null <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|dummy), data = df.tank, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)
## only site identity as random variable
mod.ad.dummy.site <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## only turtle identity as random variable
mod.ad.dummy.ID <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## Turtle identity without the dummy variable
mod.ad.ID <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)
## Turtle and site identity without the dummy variable
mod.ad.ID.site <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)
The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.
The less complex model is always first.
anova(mod.ad.null, mod.ad.dummy.site)
## Data: df.tank
## Models:
## mod.ad.null: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.site: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null 11 420.9 454.52 -199.45 398.9
## mod.ad.dummy.site 12 422.9 459.58 -199.45 398.9 0 1 1
anova(mod.ad.null, mod.ad.dummy.ID)
## Data: df.tank
## Models:
## mod.ad.null: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.ID: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null 11 420.90 454.52 -199.45 398.90
## mod.ad.dummy.ID 12 421.83 458.51 -198.92 397.83 1.0681 1 0.3014
anova(mod.ad.ID, mod.ad.ID.site)
## Data: df.tank
## Models:
## mod.ad.ID: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | ID)
## mod.ad.ID.site: active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.ID 11 419.83 453.45 -198.92 397.83
## mod.ad.ID.site 12 421.83 458.51 -198.92 397.83 0 1 1
The test is considered significant when chisq is over 3.84
No random variables need to be kept.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.ad.full <- lmer(active.defenses ~ Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)
summary(mod.ad.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: active.defenses ~ Urban.900.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## JulianDay.scaled + Hour.active.defenses.scaled + CL.scaled + (1 | ID)
## Data: df.tank
##
## AIC BIC logLik deviance df.resid
## 419.8 453.5 -198.9 397.8 146
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6253 -0.6351 -0.1908 0.5570 2.6353
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.1306 0.3614
## Residual 0.6145 0.7839
## Number of obs: 157, groups: ID, 113
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.15984 0.57998 98.25710 2.000 0.04828
## Urban.900.scaled 0.48429 0.94153 99.21310 0.514 0.60814
## Distance.channel.scaled -0.09417 0.15667 146.43706 -0.601 0.54873
## Vessels.activities.2019.scaled 0.25128 0.10417 152.81309 2.412 0.01704
## Order.trial.scaled 0.06659 0.09313 71.48239 0.715 0.47689
## SexM 0.50194 0.16591 102.57981 3.025 0.00314
## JulianDay.scaled -0.19298 0.17077 150.08412 -1.130 0.26026
## Hour.active.defenses.scaled 0.03546 0.09950 148.33901 0.356 0.72207
## CL.scaled 0.07100 0.08310 85.89838 0.854 0.39528
##
## (Intercept) *
## Urban.900.scaled
## Distance.channel.scaled
## Vessels.activities.2019.scaled *
## Order.trial.scaled
## SexM **
## JulianDay.scaled
## Hour.active.defenses.scaled
## CL.scaled
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) U.900. Dstn.. V..201 Ordr.. SexM JlnDy. Hr.c..
## Urbn.900.sc 0.883
## Dstnc.chnn. -0.515 -0.316
## Vssl..2019. 0.165 0.263 -0.446
## Ordr.trl.sc 0.114 -0.027 -0.241 0.210
## SexM -0.055 0.127 -0.040 0.081 0.005
## JulnDy.scld -0.409 -0.061 0.423 0.040 -0.246 -0.018
## Hr.ctv.dfn. 0.134 0.370 0.060 0.377 -0.089 -0.040 0.029
## CL.scaled 0.110 0.126 -0.079 0.069 0.029 0.299 -0.146 0.108
mod.ad.final <- lmer(active.defenses ~ Vessels.activities.2019.scaled + Sex + (1|ID), data = df.tank, na.action=na.exclude, REML = TRUE)
summary(mod.ad.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ Vessels.activities.2019.scaled + Sex + (1 | ID)
## Data: df.tank
##
## REML criterion at convergence: 566.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1023 -0.5995 -0.1612 0.5621 2.6267
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.2087 0.4569
## Residual 0.5321 0.7294
## Number of obs: 225, groups: ID, 117
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.74886 0.12008 114.63114 6.237 7.71e-09
## Vessels.activities.2019.scaled 0.26246 0.06598 103.18791 3.978 0.000129
## SexM 0.45228 0.14149 107.47018 3.196 0.001827
##
## (Intercept) ***
## Vessels.activities.2019.scaled ***
## SexM **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) V..201
## Vssl..2019. -0.267
## SexM -0.818 0.113
confint(mod.ad.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 0.5135155 0.9842101
## Vessels.activities.2019.scaled 0.1331450 0.3917788
## SexM 0.1749553 0.7295997
r.squaredGLMM(mod.ad.final)
## R2m R2c
## [1,] 0.1184081 0.3668126
marginal: only for the fixed effect. conditional: both fixed and random effect.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.active.defenses.vif <- lm(active.defenses ~ Urban.900.scaled + House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, na.action=na.exclude)
gvif(mod.active.defenses.vif)
## GVIF df GVIF^(1/(2*df))
## Urban.900.scaled 4.0764 1 2.0190
## House.200.scaled 3.2587 1 1.8052
## Distance.channel.scaled 1.6502 1 1.2846
## Vessels.activities.2019.scaled 1.8376 1 1.3556
## Order.trial.scaled 1.2559 1 1.1207
## Sex 1.2966 1 1.1387
## Year 1.5506 1 1.2452
## JulianDay.scaled 2.7134 1 1.6472
## Hour.active.defenses.scaled 1.1434 1 1.0693
## ID.Temperature.scaled 2.4248 1 1.5572
## CL.scaled 1.3136 1 1.1461
mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, na.action=na.exclude)
gvif(mod.active.defenses.vif)
## GVIF df GVIF^(1/(2*df))
## House.200.scaled 1.5196 1 1.2327
## Distance.channel.scaled 1.6352 1 1.2787
## Vessels.activities.2019.scaled 1.8142 1 1.3469
## Order.trial.scaled 1.2092 1 1.0996
## Sex 1.2391 1 1.1131
## Year 1.5504 1 1.2452
## JulianDay.scaled 2.5473 1 1.5960
## Hour.active.defenses.scaled 1.1420 1 1.0686
## ID.Temperature.scaled 2.4046 1 1.5507
## CL.scaled 1.3118 1 1.1453
Here, urban areas within 900m was deleted given that the GVIF^(1/(2*df)) were over two as we did with the complete dataset (Tank.Site was deleted given that all the tests were performed at the sampling sites)
# Visualization of the correlations
cor.pearson.active.defenses <- df.site[, c(18,16,17,6,3,4,12,8)]
chart.Correlation(cor.pearson.active.defenses, histogram=TRUE, pch=19)
No correlation over 0.8
mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, REML = TRUE, na.action=na.exclude)
summary(mod.ad.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled +
## CL.scaled + (1 | Site) + (1 | ID)
## Data: df.site
##
## REML criterion at convergence: 1925.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.46216 -0.52423 -0.05754 0.48215 2.83807
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.44217 0.6650
## Site (Intercept) 0.03788 0.1946
## Residual 0.32012 0.5658
## Number of obs: 755, groups: ID, 619; Site, 21
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.81725 0.08787 50.39952 9.301 1.61e-12
## House.200.scaled -0.13508 0.05843 13.35051 -2.312 0.037336
## Distance.channel.scaled -0.15796 0.05887 14.09975 -2.683 0.017752
## Vessels.activities.2019.scaled 0.19627 0.07084 11.82436 2.771 0.017150
## Order.trial.scaled 0.01177 0.03559 278.08647 0.331 0.741028
## SexM 0.27443 0.07949 589.32876 3.452 0.000595
## Year2020 0.08629 0.08493 415.91301 1.016 0.310215
## JulianDay.scaled 0.02272 0.07306 38.17390 0.311 0.757483
## Hour.active.defenses.scaled 0.01218 0.03953 475.38650 0.308 0.758194
## ID.Temperature.scaled -0.03639 0.04956 302.16070 -0.734 0.463311
## CL.scaled 0.12225 0.04213 586.38619 2.902 0.003850
##
## (Intercept) ***
## House.200.scaled *
## Distance.channel.scaled *
## Vessels.activities.2019.scaled *
## Order.trial.scaled
## SexM ***
## Year2020
## JulianDay.scaled
## Hour.active.defenses.scaled
## ID.Temperature.scaled
## CL.scaled **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.200. Dstn.. V..201 Ordr.. SexM Yr2020 JlnDy. Hr.c..
## Hs.200.scld 0.073
## Dstnc.chnn. 0.191 0.107
## Vssl..2019. -0.080 -0.047 -0.515
## Ordr.trl.sc 0.236 -0.048 0.022 0.052
## SexM -0.597 0.042 -0.025 0.009 -0.054
## Year2020 -0.413 -0.189 -0.219 0.178 -0.172 0.060
## JulnDy.scld 0.167 0.232 0.281 -0.278 -0.054 0.068 0.026
## Hr.ctv.dfn. 0.076 0.066 0.093 -0.121 0.085 0.017 0.140 0.145
## ID.Tmprtr.s -0.014 0.076 -0.117 -0.049 0.001 0.000 -0.268 -0.503 -0.061
## CL.scaled -0.281 -0.032 -0.039 0.009 -0.065 0.394 0.081 0.108 0.071
## ID.Tm.
## Hs.200.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Year2020
## JulnDy.scld
## Hr.ctv.dfn.
## ID.Tmprtr.s
## CL.scaled -0.052
plot(mod.ad.full)
hist(residuals(mod.ad.full))
qqnorm(resid(mod.ad.full))
## Null mixed model
mod.ad.null <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.site, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)
## only site identity as random variable
mod.ad.dummy.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## only turtle identity as random variable
mod.ad.dummy.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## Turtle identity without the dummy variable
mod.ad.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
## Turtle and site identity without the dummy variable
mod.ad.ID.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
The less complex model is always first.
anova(mod.ad.null, mod.ad.dummy.site)
## Data: df.site
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null 13 1955.7 2015.9 -964.86 1929.7
## mod.ad.dummy.site 14 1953.9 2018.7 -962.94 1925.9 3.8399 1 0.05005 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.ad.null, mod.ad.dummy.ID)
## Data: df.site
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null 13 1955.7 2015.9 -964.86 1929.7
## mod.ad.dummy.ID 14 1911.8 1976.6 -941.90 1883.8 45.917 1 1.234e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.ad.ID, mod.ad.ID.site)
## Data: df.site
## Models:
## mod.ad.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.ad.ID.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.ID 13 1909.8 1969.9 -941.90 1883.8
## mod.ad.ID.site 14 1908.5 1973.3 -940.24 1880.5 3.3134 1 0.06872 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As we obtained with the complete dataset, we need to keep sampling site and turtle identities as random variables.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
summary(mod.ad.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled +
## CL.scaled + (1 | Site) + (1 | ID)
## Data: df.site
##
## AIC BIC logLik deviance df.resid
## 1908.5 1973.3 -940.2 1880.5 741
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.49139 -0.54075 -0.05307 0.50035 2.85811
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.44019 0.6635
## Site (Intercept) 0.01778 0.1334
## Residual 0.31777 0.5637
## Number of obs: 755, groups: ID, 619; Site, 21
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.81579 0.08070 101.68270 10.109 < 2e-16
## House.200.scaled -0.13364 0.04898 20.32292 -2.728 0.012827
## Distance.channel.scaled -0.15209 0.04969 22.76527 -3.061 0.005581
## Vessels.activities.2019.scaled 0.18135 0.05866 17.55168 3.092 0.006435
## Order.trial.scaled 0.01243 0.03513 284.07101 0.354 0.723741
## SexM 0.27855 0.07876 595.63502 3.537 0.000436
## Year2020 0.10433 0.08234 356.28703 1.267 0.205963
## JulianDay.scaled 0.05262 0.06546 49.40021 0.804 0.425358
## Hour.active.defenses.scaled 0.02182 0.03868 479.84232 0.564 0.573047
## ID.Temperature.scaled -0.03850 0.04767 264.41829 -0.808 0.420016
## CL.scaled 0.12167 0.04160 588.96080 2.925 0.003581
##
## (Intercept) ***
## House.200.scaled *
## Distance.channel.scaled **
## Vessels.activities.2019.scaled **
## Order.trial.scaled
## SexM ***
## Year2020
## JulianDay.scaled
## Hour.active.defenses.scaled
## ID.Temperature.scaled
## CL.scaled **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.200. Dstn.. V..201 Ordr.. SexM Yr2020 JlnDy. Hr.c..
## Hs.200.scld 0.092
## Dstnc.chnn. 0.228 0.121
## Vssl..2019. -0.120 -0.073 -0.514
## Ordr.trl.sc 0.250 -0.061 0.019 0.061
## SexM -0.645 0.053 -0.028 0.014 -0.056
## Year2020 -0.449 -0.215 -0.255 0.228 -0.165 0.066
## JulnDy.scld 0.153 0.224 0.293 -0.272 -0.076 0.083 0.021
## Hr.ctv.dfn. 0.074 0.073 0.103 -0.138 0.081 0.020 0.132 0.127
## ID.Tmprtr.s -0.008 0.081 -0.125 -0.073 0.012 -0.007 -0.275 -0.546 -0.062
## CL.scaled -0.298 -0.035 -0.038 0.006 -0.067 0.393 0.084 0.140 0.076
## ID.Tm.
## Hs.200.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Year2020
## JulnDy.scld
## Hr.ctv.dfn.
## ID.Tmprtr.s
## CL.scaled -0.080
mod.ad.final <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Sex + CL.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = TRUE)
summary(mod.ad.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Sex + CL.scaled + (1 | Site) +
## (1 | ID)
## Data: df.site
##
## REML criterion at convergence: 2195.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3777 -0.5191 -0.0760 0.4741 3.2385
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.41587 0.6449
## Site (Intercept) 0.05549 0.2356
## Residual 0.33961 0.5828
## Number of obs: 866, groups: ID, 714; Site, 22
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.85717 0.07674 43.33695 11.170 2.43e-14
## House.200.scaled -0.14502 0.06002 19.60251 -2.416 0.02558
## Distance.channel.scaled -0.11579 0.05957 19.01611 -1.944 0.06686
## Vessels.activities.2019.scaled 0.16997 0.06799 16.68831 2.500 0.02318
## SexM 0.32133 0.07311 694.71790 4.395 1.28e-05
## CL.scaled 0.12601 0.03915 694.80055 3.219 0.00135
##
## (Intercept) ***
## House.200.scaled *
## Distance.channel.scaled .
## Vessels.activities.2019.scaled *
## SexM ***
## CL.scaled **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.200. Dstn.. V..201 SexM
## Hs.200.scld -0.027
## Dstnc.chnn. 0.033 0.028
## Vssl..2019. 0.013 0.090 -0.451
## SexM -0.612 0.022 -0.023 0.023
## CL.scaled -0.267 -0.054 -0.050 0.033 0.373
The predictors are the same as the final model with the complete dataset. However, distance to the navigeation channel is marginaly significant, when we REML = TRUE
confint(mod.ad.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## .sig02 NA NA
## .sigma NA NA
## (Intercept) 0.70676987 1.0075717832
## House.200.scaled -0.26265045 -0.0273826641
## Distance.channel.scaled -0.23253615 0.0009588677
## Vessels.activities.2019.scaled 0.03669861 0.3032321795
## SexM 0.17804502 0.4646220562
## CL.scaled 0.04928730 0.2027395520
r.squaredGLMM(mod.ad.final)
## R2m R2c
## [1,] 0.0837322 0.6162881
marginal: only for the fixed effect. conditional: both fixed and random effect.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.escape.vif)
## GVIF df GVIF^(1/(2*df))
## House.400.scaled 2.3308 1 1.5267
## Urban.200.scaled 5.0789 1 2.2536
## Distance.channel.scaled 11.8394 1 3.4408
## Vessels.activities.2019.scaled 8.6569 1 2.9423
## Order.trial.scaled 1.1997 1 1.0953
## Sex 1.1115 1 1.0543
## Year 9.1332 1 3.0221
## JulianDay.scaled 6.4938 1 2.5483
## Hour.Platform.scaled 2.5549 1 1.5984
## ID.Temperature.scaled 5.9269 1 2.4345
## CL.scaled 1.1104 1 1.0538
mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, na.action=na.exclude)
gvif(mod.escape.vif)
## GVIF df GVIF^(1/(2*df))
## House.400.scaled 2.0351 1 1.4266
## Urban.200.scaled 1.0732 1 1.0360
## Vessels.activities.2019.scaled 1.9497 1 1.3963
## Order.trial.scaled 1.1729 1 1.0830
## Sex 1.1071 1 1.0522
## Year 2.2731 1 1.5077
## Hour.Platform.scaled 2.0452 1 1.4301
## ID.Temperature.scaled 1.5382 1 1.2402
## CL.scaled 1.0704 1 1.0346
Compared to the complete dataset, distance to the channel and Julian Day were deleted given that the GVIF^(1/(2*df)) were over two. (Tank.Site and Wind was deleted given that all the tests are made in tanks)
# Visualization of the correlations
cor.pearson.escape <- df.tank[, c(20,21,17,6,10,12,8)]
chart.Correlation(cor.pearson.escape, histogram=TRUE, pch=19)
No correlation over 0.8
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, REML = TRUE, na.action=na.exclude)
## boundary (singular) fit: see help('isSingular')
summary(mod.escape.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Year + Hour.Platform.scaled +
## ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## Data: df.tank
##
## REML criterion at convergence: 648.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.23771 -0.52877 -0.02912 0.49838 2.34029
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.5137 0.7167
## Site (Intercept) 0.0000 0.0000
## Residual 0.6923 0.8321
## Number of obs: 220, groups: ID, 117; Site, 8
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 5.879652 0.593778 102.406961 9.902
## House.400.scaled 0.460594 0.452085 169.702156 1.019
## Urban.200.scaled 3.236814 1.569999 98.058242 2.062
## Vessels.activities.2019.scaled -0.121615 0.115378 125.146793 -1.054
## Order.trial.scaled -0.485148 0.081520 144.248383 -5.951
## SexM -0.432503 0.199407 105.078607 -2.169
## Year2020 -0.200102 0.256568 132.934496 -0.780
## Hour.Platform.scaled -0.234133 0.125693 191.197817 -1.863
## ID.Temperature.scaled -0.009594 0.151128 198.195560 -0.063
## CL.scaled 0.308844 0.105586 115.189939 2.925
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## House.400.scaled 0.30974
## Urban.200.scaled 0.04189 *
## Vessels.activities.2019.scaled 0.29389
## Order.trial.scaled 1.93e-08 ***
## SexM 0.03234 *
## Year2020 0.43683
## Hour.Platform.scaled 0.06403 .
## ID.Temperature.scaled 0.94944
## CL.scaled 0.00415 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.400. U.200. V..201 Ordr.. SexM Yr2020 Hr.Pl. ID.Tm.
## Hs.400.scld 0.188
## Urbn.200.sc 0.889 -0.086
## Vssl..2019. 0.093 0.103 0.057
## Ordr.trl.sc -0.092 -0.184 0.030 -0.088
## SexM -0.293 -0.016 -0.110 0.096 -0.028
## Year2020 -0.286 0.096 -0.148 -0.551 0.125 -0.068
## Hr.Pltfrm.s 0.074 0.519 0.079 0.241 0.031 -0.077 -0.058
## ID.Tmprtr.s 0.086 -0.135 0.053 0.036 0.026 0.057 -0.436 0.058
## CL.scaled -0.083 0.011 -0.048 -0.007 0.029 0.251 0.006 0.068 0.104
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
plot(mod.escape.full)
hist(residuals(mod.escape.full))
qqnorm(resid(mod.escape.full))
mod.escape.null <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.tank, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)
## only site identity as random variable
mod.escape.dummy.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## only turtle identity as random variable
mod.escape.dummy.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.tank, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## Turtle identity without the dummy variable
mod.escape.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)
## Turtle and site identity without the dummy variable
mod.escape.ID.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)
The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.
The less complex model is always first.
anova(mod.escape.null, mod.escape.dummy.site)
## Data: df.tank
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.escape.dummy.site: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.null 12 674.13 714.85 -325.06 650.13
## mod.escape.dummy.site 13 676.13 720.24 -325.06 650.13 0 1 1
anova(mod.escape.null, mod.escape.dummy.ID)
## Data: df.tank
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.escape.dummy.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.null 12 674.13 714.85 -325.06 650.13
## mod.escape.dummy.ID 13 657.83 701.95 -315.92 631.83 18.296 1 1.891e-05
##
## mod.escape.null
## mod.escape.dummy.ID ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.escape.ID, mod.escape.ID.site)
## Data: df.tank
## Models:
## mod.escape.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.escape.ID.site: log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.ID 12 655.83 696.55 -315.92 631.83
## mod.escape.ID.site 13 657.83 701.95 -315.92 631.83 0 1 1
The test is considered significant when chisq is over 3.84
Only turtle identity will be kept.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, na.action=na.exclude, REML = FALSE)
summary(mod.escape.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula:
## log.escape ~ House.400.scaled + Urban.200.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Year + Hour.Platform.scaled +
## ID.Temperature.scaled + CL.scaled + (1 | ID)
## Data: df.tank
##
## AIC BIC logLik deviance df.resid
## 655.8 696.6 -315.9 631.8 208
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.25364 -0.54334 -0.02882 0.50638 2.35909
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4558 0.6751
## Residual 0.6805 0.8249
## Number of obs: 220, groups: ID, 117
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.87987 0.57180 109.46879 10.283 < 2e-16
## House.400.scaled 0.45833 0.43873 182.09936 1.045 0.29756
## Urban.200.scaled 3.23333 1.51095 104.63865 2.140 0.03468
## Vessels.activities.2019.scaled -0.12243 0.11143 134.60421 -1.099 0.27386
## Order.trial.scaled -0.48370 0.08043 148.14495 -6.014 1.35e-08
## SexM -0.43516 0.19209 111.83199 -2.265 0.02541
## Year2020 -0.19496 0.24800 141.90108 -0.786 0.43310
## Hour.Platform.scaled -0.23768 0.12336 199.85420 -1.927 0.05542
## ID.Temperature.scaled -0.01540 0.14714 207.47927 -0.105 0.91675
## CL.scaled 0.30741 0.10185 123.12806 3.018 0.00309
##
## (Intercept) ***
## House.400.scaled
## Urban.200.scaled *
## Vessels.activities.2019.scaled
## Order.trial.scaled ***
## SexM *
## Year2020
## Hour.Platform.scaled .
## ID.Temperature.scaled
## CL.scaled **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.400. U.200. V..201 Ordr.. SexM Yr2020 Hr.Pl. ID.Tm.
## Hs.400.scld 0.189
## Urbn.200.sc 0.888 -0.084
## Vssl..2019. 0.095 0.111 0.058
## Ordr.trl.sc -0.095 -0.188 0.030 -0.091
## SexM -0.294 -0.018 -0.110 0.096 -0.029
## Year2020 -0.287 0.094 -0.149 -0.550 0.130 -0.069
## Hr.Pltfrm.s 0.076 0.527 0.080 0.247 0.028 -0.079 -0.057
## ID.Tmprtr.s 0.088 -0.137 0.054 0.035 0.020 0.058 -0.440 0.052
## CL.scaled -0.082 0.011 -0.048 -0.007 0.029 0.249 0.006 0.068 0.103
mod.escape.final <- lmer(log.escape ~ Urban.200.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Hour.Platform.scaled + CL.scaled + (1|ID), data = df.tank, na.action=na.exclude, REML = TRUE)
summary(mod.escape.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log.escape ~ Urban.200.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Hour.Platform.scaled + CL.scaled + (1 | ID)
## Data: df.tank
##
## REML criterion at convergence: 708.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.13022 -0.54397 -0.01645 0.51404 2.48086
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4210 0.6488
## Residual 0.7858 0.8865
## Number of obs: 238, groups: ID, 117
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.56921 0.53791 117.44608 10.353 < 2e-16
## Urban.200.scaled 3.30664 1.50242 117.65409 2.201 0.02970
## Vessels.activities.2019.scaled -0.18531 0.08859 124.33279 -2.092 0.03849
## Order.trial.scaled -0.39888 0.07563 173.92799 -5.274 3.92e-07
## SexM -0.43131 0.19030 114.12791 -2.266 0.02531
## Hour.Platform.scaled -0.27239 0.09880 199.22598 -2.757 0.00637
## CL.scaled 0.29634 0.10133 128.53753 2.924 0.00408
##
## (Intercept) ***
## Urban.200.scaled *
## Vessels.activities.2019.scaled *
## Order.trial.scaled ***
## SexM *
## Hour.Platform.scaled **
## CL.scaled **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) U.200. V..201 Ordr.. SexM Hr.Pl.
## Urbn.200.sc 0.942
## Vssl..2019. -0.136 -0.029
## Ordr.trl.sc 0.001 0.009 -0.016
## SexM -0.330 -0.121 0.086 -0.019
## Hr.Pltfrm.s -0.061 0.117 0.193 0.021 -0.099
## CL.scaled -0.081 -0.045 0.022 0.017 0.237 0.059
plot(allEffects(mod.escape.final))
confint(mod.escape.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 4.51491392 6.62350186
## Urban.200.scaled 0.36194199 6.25133389
## Vessels.activities.2019.scaled -0.35893569 -0.01168287
## Order.trial.scaled -0.54710737 -0.25066060
## SexM -0.80429309 -0.05832559
## Hour.Platform.scaled -0.46602875 -0.07875158
## CL.scaled 0.09772721 0.49495065
r.squaredGLMM(mod.escape.final)
## R2m R2c
## [1,] 0.2240638 0.4947299
marginal: only for the fixed effect. conditional: both fixed and random effect.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled, data = df.site, na.action=na.exclude)
gvif(mod.escape.vif)
## GVIF df GVIF^(1/(2*df))
## House.400.scaled 2.0022 1 1.4150
## Urban.200.scaled 1.6555 1 1.2867
## Distance.channel.scaled 2.3954 1 1.5477
## Vessels.activities.2019.scaled 2.6112 1 1.6159
## Order.trial.scaled 1.3866 1 1.1775
## JulianDay.scaled 2.3034 1 1.5177
## Sex 1.1794 1 1.0860
## Year 1.4838 1 1.2181
## Hour.Platform.scaled 1.2113 1 1.1006
## ID.Temperature.scaled 1.4818 1 1.2173
## CL.scaled 1.2999 1 1.1401
## Wind.scaled 1.1720 1 1.0826
All the GVIF are under 2. Tank.Site was deleted given that all the tests were made at the sampling sites.
# Visualization of the correlations
cor.pearson.escape <- df.site[, c(20,21,16,17,6,10,12,8,3,11)]
chart.Correlation(cor.pearson.escape, histogram=TRUE, pch=19)
No correlation over 0.8
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.site, REML = TRUE, na.action=na.exclude)
summary(mod.escape.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled +
## Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +
## CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
## Data: df.site
##
## REML criterion at convergence: 1934.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.03424 -0.52458 -0.05845 0.45745 3.07636
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.42063 0.6486
## Site (Intercept) 0.03604 0.1898
## Residual 0.70938 0.8422
## Number of obs: 644, groups: ID, 543; Site, 18
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 3.400933 0.119522 66.006726 28.454
## House.400.scaled 0.002386 0.091342 11.498662 0.026
## Urban.200.scaled 0.087840 0.084148 16.247817 1.044
## Distance.channel.scaled 0.007919 0.076304 16.342015 0.104
## Vessels.activities.2019.scaled -0.037176 0.088738 13.699813 -0.419
## Order.trial.scaled -0.158465 0.049737 504.473734 -3.186
## JulianDay.scaled 0.176294 0.107227 22.658179 1.644
## SexM 0.108173 0.098128 535.461380 1.102
## Year2020 -0.158271 0.115948 147.736816 -1.365
## Hour.Platform.scaled -0.002209 0.056370 378.302089 -0.039
## ID.Temperature.scaled -0.187366 0.067248 147.978684 -2.786
## CL.scaled 0.140614 0.053337 502.692658 2.636
## Wind.scaled -0.109935 0.051837 395.937705 -2.121
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## House.400.scaled 0.97961
## Urban.200.scaled 0.31183
## Distance.channel.scaled 0.91860
## Vessels.activities.2019.scaled 0.68175
## Order.trial.scaled 0.00153 **
## JulianDay.scaled 0.11396
## SexM 0.27080
## Year2020 0.17432
## Hour.Platform.scaled 0.96876
## ID.Temperature.scaled 0.00603 **
## CL.scaled 0.00864 **
## Wind.scaled 0.03456 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
plot(mod.escape.full)
hist(residuals(mod.escape.full))
qqnorm(resid(mod.escape.full))
mod.escape.null <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy), data = df.site, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)
## only site identity as random variable
mod.escape.dummy.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|Site), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## only turtle identity as random variable
mod.escape.dummy.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|ID), data = df.site, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## Turtle identity without the dummy variable
mod.escape.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
## Turtle and site identity without the dummy variable
mod.escape.ID.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
The less complex model is always first.
anova(mod.escape.null, mod.escape.dummy.site)
## Data: df.site
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.null 15 1937.2 2004.2 -953.61 1907.2
## mod.escape.dummy.site 16 1938.9 2010.4 -953.44 1906.9 0.3241 1 0.5691
anova(mod.escape.null, mod.escape.dummy.ID)
## Data: df.site
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.null 15 1937.2 2004.2 -953.61 1907.2
## mod.escape.dummy.ID 16 1918.8 1990.3 -943.39 1886.8 20.439 1 6.155e-06
##
## mod.escape.null
## mod.escape.dummy.ID ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.escape.ID, mod.escape.ID.site)
## Data: df.site
## Models:
## mod.escape.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | ID)
## mod.escape.ID.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.ID 15 1916.8 1983.8 -943.39 1886.8
## mod.escape.ID.site 16 1918.3 1989.8 -943.16 1886.3 0.4443 1 0.505
Sampling sites identity is not significant. We do not need to keep it.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.site, na.action=na.exclude, REML = FALSE)
summary(mod.escape.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula:
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + JulianDay.scaled +
## Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +
## CL.scaled + Wind.scaled + (1 | ID)
## Data: df.site
##
## AIC BIC logLik deviance df.resid
## 1916.8 1983.8 -943.4 1886.8 629
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.07632 -0.54111 -0.07514 0.47475 3.10382
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4110 0.6411
## Residual 0.7143 0.8451
## Number of obs: 644, groups: ID, 543
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 3.417888 0.103880 609.245182 32.902
## House.400.scaled -0.002958 0.063208 513.669111 -0.047
## Urban.200.scaled 0.070422 0.062404 353.897227 1.128
## Distance.channel.scaled 0.004135 0.056676 575.817746 0.073
## Vessels.activities.2019.scaled -0.035589 0.063228 592.476520 -0.563
## Order.trial.scaled -0.164068 0.048554 553.003321 -3.379
## JulianDay.scaled 0.150289 0.081617 611.265644 1.841
## SexM 0.116433 0.096102 550.984282 1.212
## Year2020 -0.181213 0.103722 641.340659 -1.747
## Hour.Platform.scaled 0.006046 0.053214 615.188829 0.114
## ID.Temperature.scaled -0.215403 0.059974 641.873134 -3.592
## CL.scaled 0.149622 0.051533 571.436808 2.903
## Wind.scaled -0.139766 0.049118 580.221881 -2.845
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## House.400.scaled 0.962696
## Urban.200.scaled 0.259882
## Distance.channel.scaled 0.941866
## Vessels.activities.2019.scaled 0.573735
## Order.trial.scaled 0.000778 ***
## JulianDay.scaled 0.066046 .
## SexM 0.226205
## Year2020 0.081099 .
## Hour.Platform.scaled 0.909576
## ID.Temperature.scaled 0.000354 ***
## CL.scaled 0.003834 **
## Wind.scaled 0.004591 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
mod.escape.final <- lmer(log.escape ~ Order.trial.scaled + Year + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.site, na.action=na.exclude, REML = TRUE)
summary(mod.escape.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log.escape ~ Order.trial.scaled + Year + ID.Temperature.scaled +
## CL.scaled + Wind.scaled + (1 | ID)
## Data: df.site
##
## REML criterion at convergence: 2058.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.13146 -0.52103 -0.06498 0.48825 2.97272
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4024 0.6344
## Residual 0.7467 0.8641
## Number of obs: 691, groups: ID, 580
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.53021 0.06775 639.52682 52.109 < 2e-16 ***
## Order.trial.scaled -0.13070 0.03957 656.39902 -3.303 0.00101 **
## Year2020 -0.23873 0.08632 678.62445 -2.766 0.00583 **
## ID.Temperature.scaled -0.19515 0.04956 683.11625 -3.938 9.06e-05 ***
## CL.scaled 0.10017 0.04249 594.66475 2.358 0.01872 *
## Wind.scaled -0.13891 0.04573 660.63698 -3.037 0.00248 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Ordr.. Yr2020 ID.Tm. CL.scl
## Ordr.trl.sc 0.208
## Year2020 -0.662 -0.217
## ID.Tmprtr.s -0.265 0.022 -0.044
## CL.scaled -0.104 -0.012 0.063 0.096
## Wind.scaled -0.191 -0.067 -0.157 0.116 0.024
As the final model with the complete dataset. No variables quantifying human disturbance are significant.
confint(mod.escape.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 3.39743466 3.66299504
## Order.trial.scaled -0.20824636 -0.05314408
## Year2020 -0.40790409 -0.06955282
## ID.Temperature.scaled -0.29227447 -0.09801996
## CL.scaled 0.01689495 0.18344015
## Wind.scaled -0.22855108 -0.04927326
r.squaredGLMM(mod.escape.final)
## R2m R2c
## [1,] 0.08020707 0.402319
marginal: only for the fixed effect. conditional: both fixed and random effect.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Urban.600.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
## GVIF df GVIF^(1/(2*df))
## House.300.scaled 18.2975 1 4.2776
## Urban.600.scaled 25.9356 1 5.0927
## Distance.channel.scaled 11.3962 1 3.3758
## Vessels.activities.2019.scaled 11.1929 1 3.3456
## Order.trial.scaled 1.1148 1 1.0558
## Sex 1.1244 1 1.0604
## Year 10.8123 1 3.2882
## JulianDay.scaled 10.2275 1 3.1980
## Hour.Platform.scaled 2.6746 1 1.6354
## ID.Temperature.scaled 6.0944 1 2.4687
## CL.scaled 1.1335 1 1.0647
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.tank, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
## GVIF df GVIF^(1/(2*df))
## House.300.scaled 1.5662 1 1.2515
## Distance.channel.scaled 1.4032 1 1.1846
## Order.trial.scaled 1.1285 1 1.0623
## Sex 1.0813 1 1.0399
## Year 1.8545 1 1.3618
## Hour.Platform.scaled 1.6671 1 1.2912
## ID.Temperature.scaled 2.1969 1 1.4822
## CL.scaled 1.0780 1 1.0383
Urban areas within 600m, Vessels crossings and Julian Day were deleted given that the GVIF^(1/(2*df)) was over two. Tank.Site was deleted given all the tests were performed in the tanks.
# Visualization of the correlations
cor.pearson.emergence <- df.tank[, c(19,6,10,12,8,16)]
chart.Correlation(cor.pearson.emergence, histogram=TRUE, pch=19)
No correlation over 0.8
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, na.action=na.exclude)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 1.7172 (tol = 0.002, component 1)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled +
## Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +
## CL.scaled + (1 | ID)
## Data: df.tank
##
## AIC BIC logLik deviance df.resid
## 202.3 235.7 -91.2 182.3 197
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.37486 -0.29479 -0.17205 -0.08341 2.56695
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 4.727 2.174
## Number of obs: 207, groups: ID, 114
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.4970 1.6286 -2.761 0.00576 **
## House.300.scaled 1.2822 1.6497 0.777 0.43700
## Distance.channel.scaled -0.3232 0.5369 -0.602 0.54719
## Order.trial.scaled -0.4320 0.3516 -1.229 0.21920
## SexM 0.7879 0.7763 1.015 0.31017
## Year2020 2.1911 0.9759 2.245 0.02475 *
## Hour.Platform.scaled 0.5739 0.4885 1.175 0.24013
## ID.Temperature.scaled -1.0509 0.6774 -1.551 0.12081
## CL.scaled 0.0717 0.4031 0.178 0.85882
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.300. Dstn.. Ordr.. SexM Yr2020 Hr.Pl. ID.Tm.
## Hs.300.scld 0.208
## Dstnc.chnn. -0.122 -0.101
## Ordr.trl.sc 0.126 -0.202 -0.030
## SexM -0.396 -0.065 -0.012 -0.054
## Year2020 -0.689 0.169 -0.014 -0.097 0.062
## Hr.Pltfrm.s -0.290 0.436 0.016 0.063 -0.094 0.298
## ID.Tmprtr.s 0.446 -0.231 -0.330 0.215 -0.020 -0.581 -0.133
## CL.scaled 0.026 -0.046 -0.037 0.078 0.230 -0.071 0.083 0.168
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 1.7172 (tol = 0.002, component 1)
The model do not converge. We will determine each optimizer we need to use to make the model converge.
gm_all <- allFit(mod.emergence.full)
## bobyqa : [OK]
## Nelder_Mead :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 1.71524 (tol = 0.002, component 1)
## [OK]
## nlminbwrap : [OK]
## nmkbw :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0771633 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00705438 (tol = 0.002, component 1)
## [OK]
t(sapply(gm_all,fixef))
## (Intercept) House.300.scaled
## bobyqa -10.131803 2.607682
## Nelder_Mead -4.497529 1.279237
## nlminbwrap -10.131797 2.607676
## nmkbw -10.312362 2.569589
## optimx.L-BFGS-B -10.131834 2.607835
## nloptwrap.NLOPT_LN_NELDERMEAD -10.130717 2.608733
## nloptwrap.NLOPT_LN_BOBYQA -10.127401 2.614272
## Distance.channel.scaled Order.trial.scaled
## bobyqa 0.04567428 -0.6127357
## Nelder_Mead -0.32304131 -0.4323083
## nlminbwrap 0.04567145 -0.6127291
## nmkbw 0.08144834 -0.6215233
## optimx.L-BFGS-B 0.04568636 -0.6126979
## nloptwrap.NLOPT_LN_NELDERMEAD 0.04556557 -0.6127748
## nloptwrap.NLOPT_LN_BOBYQA 0.04319892 -0.6120725
## SexM Year2020 Hour.Platform.scaled
## bobyqa 0.4571711 3.118528 1.2261513
## Nelder_Mead 0.7868883 2.190561 0.5743365
## nlminbwrap 0.4571552 3.118536 1.2261509
## nmkbw 0.4048173 3.116476 1.2503968
## optimx.L-BFGS-B 0.4569680 3.118884 1.2262064
## nloptwrap.NLOPT_LN_NELDERMEAD 0.4565541 3.118425 1.2262440
## nloptwrap.NLOPT_LN_BOBYQA 0.4583994 3.118709 1.2266148
## ID.Temperature.scaled CL.scaled
## bobyqa -1.985905 -0.008885313
## Nelder_Mead -1.049771 0.072620773
## nlminbwrap -1.985914 -0.008883853
## nmkbw -2.026999 -0.036137320
## optimx.L-BFGS-B -1.985988 -0.008909886
## nloptwrap.NLOPT_LN_NELDERMEAD -1.985703 -0.009157880
## nloptwrap.NLOPT_LN_BOBYQA -1.984661 -0.004728427
sapply(gm_all,logLik)
## bobyqa Nelder_Mead
## -88.02733 -91.16247
## nlminbwrap nmkbw
## -88.02733 -88.02514
## optimx.L-BFGS-B nloptwrap.NLOPT_LN_NELDERMEAD
## -88.02733 -88.02733
## nloptwrap.NLOPT_LN_BOBYQA
## -88.02735
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
plot(mod.emergence.full)
hist(residuals(mod.emergence.full))
qqnorm(resid(mod.emergence.full))
## Null mixed model
mod.emergence.null <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.tank, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## only site identity as random variable
mod.emergence.dummy.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.tank, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## only turtle identity as random variable
mod.emergence.dummy.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.tank, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## Turtle identity without the dummy variable
mod.emergence.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
## Turtle and site identity without the dummy variable
mod.emergence.ID.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
The less complex model is always first.
anova(mod.emergence.null, mod.emergence.dummy.site)
## Data: df.tank
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.site: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df
## mod.emergence.null 10 215.92 249.25 -97.961 195.92
## mod.emergence.dummy.site 11 217.92 254.58 -97.961 195.92 0 1
## Pr(>Chisq)
## mod.emergence.null
## mod.emergence.dummy.site 1
anova(mod.emergence.null, mod.emergence.dummy.ID)
## Data: df.tank
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.emergence.null 10 215.92 249.25 -97.961 195.92
## mod.emergence.dummy.ID 11 203.59 240.25 -90.795 181.59 14.331 1 0.0001533
##
## mod.emergence.null
## mod.emergence.dummy.ID ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.ID, mod.emergence.ID.site)
## Data: df.tank
## Models:
## mod.emergence.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.emergence.ID.site: emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.emergence.ID 10 196.06 229.38 -88.027 176.06
## mod.emergence.ID.site 11 198.06 234.72 -88.027 176.06 0 1 1
Sampling sites identity is not significant. We do not need to keep it. Also, we have convergence problems with site in addition to turtle identity even with the use of an optimizer.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled + Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ House.300.scaled + Distance.channel.scaled + Order.trial.scaled +
## Sex + Year + Hour.Platform.scaled + ID.Temperature.scaled +
## CL.scaled + (1 | ID)
## Data: df.tank
## Control:
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
##
## AIC BIC logLik deviance df.resid
## 196.1 229.4 -88.0 176.1 197
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.25990 -0.03546 -0.01491 -0.00501 2.75620
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 93.65 9.678
## Number of obs: 207, groups: ID, 114
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.13183 3.13622 -3.231 0.00124 **
## House.300.scaled 2.60784 3.32274 0.785 0.43254
## Distance.channel.scaled 0.04569 0.92617 0.049 0.96066
## Order.trial.scaled -0.61270 0.48392 -1.266 0.20547
## SexM 0.45697 1.58836 0.288 0.77358
## Year2020 3.11888 1.89991 1.642 0.10067
## Hour.Platform.scaled 1.22621 0.80198 1.529 0.12627
## ID.Temperature.scaled -1.98599 1.02362 -1.940 0.05236 .
## CL.scaled -0.00891 0.84521 -0.011 0.99159
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.300. Dstn.. Ordr.. SexM Yr2020 Hr.Pl. ID.Tm.
## Hs.300.scld 0.428
## Dstnc.chnn. -0.177 -0.066
## Ordr.trl.sc -0.018 -0.100 -0.054
## SexM -0.276 0.038 0.007 0.008
## Year2020 -0.624 -0.069 -0.058 0.049 0.026
## Hr.Pltfrm.s -0.434 0.230 0.038 0.122 -0.064 0.343
## ID.Tmprtr.s 0.272 -0.152 -0.260 0.174 -0.006 -0.433 -0.060
## CL.scaled -0.052 -0.031 -0.037 0.028 0.223 0.048 0.032 0.114
mod.emergence.final <- glmer(emergence ~ + (1|ID), data = df.tank, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.final)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: emergence ~ +(1 | ID)
## Data: df.tank
## Control:
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
##
## AIC BIC logLik deviance df.resid
## 197.9 204.7 -97.0 193.9 217
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.33880 -0.02252 -0.02196 -0.02147 1.48242
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 109.1 10.45
## Number of obs: 219, groups: ID, 119
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.532 1.154 -6.525 6.79e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
No variable is significant. The final model is a null model.
confint(mod.emergence.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## (Intercept) -9.794052 -5.269458
r.squaredGLMM(mod.emergence.final)
## R2m R2c
## theoretical 0 9.707410e-01
## delta 0 2.423606e-14
marginal: only for the fixed effect. conditional: both fixed and random effect. The delta method can be used with all distributions and link functions.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Urban.600.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
## GVIF df GVIF^(1/(2*df))
## House.300.scaled 3.4151 1 1.8480
## Urban.600.scaled 4.4203 1 2.1025
## Distance.channel.scaled 1.5106 1 1.2291
## Vessels.activities.2019.scaled 1.6971 1 1.3027
## Order.trial.scaled 1.3182 1 1.1481
## Sex 1.2127 1 1.1012
## Year 1.4330 1 1.1971
## JulianDay.scaled 2.2220 1 1.4906
## Hour.Platform.scaled 1.0899 1 1.0440
## ID.Temperature.scaled 1.8619 1 1.3645
## CL.scaled 1.2265 1 1.1075
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.site, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
## GVIF df GVIF^(1/(2*df))
## House.300.scaled 1.3813 1 1.1753
## Distance.channel.scaled 1.5128 1 1.2300
## Vessels.activities.2019.scaled 1.7017 1 1.3045
## Order.trial.scaled 1.2552 1 1.1204
## Sex 1.1767 1 1.0848
## Year 1.4012 1 1.1837
## JulianDay.scaled 2.0201 1 1.4213
## Hour.Platform.scaled 1.0880 1 1.0431
## ID.Temperature.scaled 1.8255 1 1.3511
## CL.scaled 1.2126 1 1.1012
Urban areas within 600m was deleted given that the GVIF^(1/(2*df)) was over two. Tank.Site was deleted given that all the observations were performed at the sampling sites.
# Visualization of the correlations
cor.pearson.emergence <- df.site[, c(19,6,3,10,12,8,16,17)]
chart.Correlation(cor.pearson.emergence, histogram=TRUE, pch=19)
No correlation over 0.8
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.site, family = binomial, na.action=na.exclude)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0346845 (tol = 0.002, component 1)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled +
## ID.Temperature.scaled + CL.scaled + (1 | ID) + (1 | Site)
## Data: df.site
##
## AIC BIC logLik deviance df.resid
## 918.0 978.8 -446.0 892.0 784
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6742 -0.5640 -0.3769 0.7739 3.8883
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.70974 0.8425
## Site (Intercept) 0.08752 0.2958
## Number of obs: 797, groups: ID, 664; Site, 21
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.658972 0.232527 -2.834 0.004598 **
## House.300.scaled -0.201521 0.135152 -1.491 0.135943
## Distance.channel.scaled -0.169129 0.145818 -1.160 0.246103
## Vessels.activities.2019.scaled 0.183161 0.141140 1.298 0.194380
## Order.trial.scaled 0.374200 0.107775 3.472 0.000517 ***
## SexM -0.210585 0.215950 -0.975 0.329483
## Year2020 -0.051072 0.236524 -0.216 0.829043
## JulianDay.scaled 0.471121 0.173768 2.711 0.006704 **
## Hour.Platform.scaled -0.003869 0.120376 -0.032 0.974358
## ID.Temperature.scaled 0.102556 0.134758 0.761 0.446631
## CL.scaled -0.325814 0.119957 -2.716 0.006606 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.300. Dstn.. V..201 Ordr.. SexM Yr2020 JlnDy. Hr.Pl.
## Hs.300.scld 0.054
## Dstnc.chnn. 0.272 0.199
## Vssl..2019. -0.223 -0.161 -0.500
## Ordr.trl.sc 0.155 -0.154 -0.025 0.038
## SexM -0.603 0.070 -0.020 -0.001 -0.086
## Year2020 -0.533 -0.114 -0.269 0.265 -0.215 0.052
## JulnDy.scld 0.096 0.296 0.281 -0.342 -0.066 0.054 -0.076
## Hr.Pltfrm.s 0.200 -0.052 0.045 -0.069 0.222 0.011 -0.039 0.096
## ID.Tmprtr.s -0.033 0.136 -0.113 -0.050 0.041 0.012 -0.231 -0.436 -0.101
## CL.scaled -0.163 -0.016 -0.022 -0.017 -0.164 0.363 0.031 0.040 0.039
## ID.Tm.
## Hs.300.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Year2020
## JulnDy.scld
## Hr.Pltfrm.s
## ID.Tmprtr.s
## CL.scaled -0.060
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0346845 (tol = 0.002, component 1)
gm_all <- allFit(mod.emergence.full)
## bobyqa : [OK]
## Nelder_Mead :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0308026 (tol = 0.002, component 1)
## [OK]
## nlminbwrap : [OK]
## nmkbw : [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00263954 (tol = 0.002, component 1)
## [OK]
t(sapply(gm_all,fixef))
## (Intercept) House.300.scaled
## bobyqa -0.6565887 -0.2018033
## Nelder_Mead -0.6582006 -0.2015575
## nlminbwrap -0.6565884 -0.2018023
## nmkbw -0.6566706 -0.2018181
## optimx.L-BFGS-B -0.6565343 -0.2017890
## nloptwrap.NLOPT_LN_NELDERMEAD -0.6565220 -0.2017892
## nloptwrap.NLOPT_LN_BOBYQA -0.6567184 -0.2017706
## Distance.channel.scaled
## bobyqa -0.1681996
## Nelder_Mead -0.1691471
## nlminbwrap -0.1681997
## nmkbw -0.1681419
## optimx.L-BFGS-B -0.1681665
## nloptwrap.NLOPT_LN_NELDERMEAD -0.1681539
## nloptwrap.NLOPT_LN_BOBYQA -0.1681984
## Vessels.activities.2019.scaled Order.trial.scaled
## bobyqa 0.1816207 0.3738560
## Nelder_Mead 0.1830299 0.3738660
## nlminbwrap 0.1816225 0.3738563
## nmkbw 0.1815766 0.3738302
## optimx.L-BFGS-B 0.1815875 0.3738489
## nloptwrap.NLOPT_LN_NELDERMEAD 0.1815746 0.3738593
## nloptwrap.NLOPT_LN_BOBYQA 0.1816178 0.3738183
## SexM Year2020 JulianDay.scaled
## bobyqa -0.2104532 -0.05595727 0.4749174
## Nelder_Mead -0.2110695 -0.05193873 0.4711612
## nlminbwrap -0.2104545 -0.05595727 0.4749210
## nmkbw -0.2103401 -0.05595822 0.4748682
## optimx.L-BFGS-B -0.2104473 -0.05594978 0.4749722
## nloptwrap.NLOPT_LN_NELDERMEAD -0.2104210 -0.05603676 0.4749007
## nloptwrap.NLOPT_LN_BOBYQA -0.2104047 -0.05598458 0.4748766
## Hour.Platform.scaled ID.Temperature.scaled
## bobyqa -0.004316513 0.1005477
## Nelder_Mead -0.004215164 0.1027812
## nlminbwrap -0.004315633 0.1005480
## nmkbw -0.004260918 0.1006100
## optimx.L-BFGS-B -0.004297641 0.1005027
## nloptwrap.NLOPT_LN_NELDERMEAD -0.004206986 0.1005645
## nloptwrap.NLOPT_LN_BOBYQA -0.004546246 0.1006092
## CL.scaled
## bobyqa -0.3250463
## Nelder_Mead -0.3261493
## nlminbwrap -0.3250481
## nmkbw -0.3250179
## optimx.L-BFGS-B -0.3250286
## nloptwrap.NLOPT_LN_NELDERMEAD -0.3250195
## nloptwrap.NLOPT_LN_BOBYQA -0.3250503
sapply(gm_all,logLik)
## bobyqa Nelder_Mead
## -445.9923 -445.9930
## nlminbwrap nmkbw
## -445.9923 -445.9923
## optimx.L-BFGS-B nloptwrap.NLOPT_LN_NELDERMEAD
## -445.9923 -445.9923
## nloptwrap.NLOPT_LN_BOBYQA
## -445.9923
We decided to use optimx.L-BFGS-B optimizer to include in our initial model.
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
plot(mod.emergence.full)
hist(residuals(mod.emergence.full))
qqnorm(resid(mod.emergence.full))
## Null mixed model
mod.emergence.null <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.site, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## only site identity as random variable
mod.emergence.dummy.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.site, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## only turtle identity as random variable
mod.emergence.dummy.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.site, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## Turtle identity without the dummy variable
mod.emergence.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
## Turtle and site identity without the dummy variable
mod.emergence.ID.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
The less complex model is always first.
anova(mod.emergence.null, mod.emergence.dummy.site)
## Data: df.site
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df
## mod.emergence.null 12 925.52 981.69 -450.76 901.52
## mod.emergence.dummy.site 13 923.55 984.41 -448.78 897.55 3.9684 1
## Pr(>Chisq)
## mod.emergence.null
## mod.emergence.dummy.site 0.04636 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.null, mod.emergence.dummy.ID)
## Data: df.site
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.emergence.null 12 925.52 981.69 -450.76 901.52
## mod.emergence.dummy.ID 13 919.43 980.28 -446.71 893.43 8.0941 1 0.004441
##
## mod.emergence.null
## mod.emergence.dummy.ID **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.ID, mod.emergence.ID.site)
## Data: df.site
## Models:
## mod.emergence.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.emergence.ID.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.emergence.ID 12 917.18 973.35 -446.59 893.18
## mod.emergence.ID.site 13 917.98 978.84 -445.99 891.98 1.1975 1 0.2738
Sampling sites identity is not significant and it is added to turtle identity. We do not need to keep it.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled +
## ID.Temperature.scaled + CL.scaled + (1 | ID)
## Data: df.site
## Control:
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
##
## AIC BIC logLik deviance df.resid
## 917.2 973.4 -446.6 893.2 785
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8174 -0.5437 -0.3671 0.7573 4.1442
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.9313 0.965
## Number of obs: 797, groups: ID, 664
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.68198 0.22651 -3.011 0.002605 **
## House.300.scaled -0.20668 0.11946 -1.730 0.083613 .
## Distance.channel.scaled -0.15655 0.12775 -1.225 0.220398
## Vessels.activities.2019.scaled 0.18662 0.11960 1.560 0.118686
## Order.trial.scaled 0.39987 0.10724 3.729 0.000193 ***
## SexM -0.21632 0.21945 -0.986 0.324255
## Year2020 -0.01116 0.22710 -0.049 0.960805
## JulianDay.scaled 0.48296 0.15748 3.067 0.002163 **
## Hour.Platform.scaled 0.01518 0.11627 0.131 0.896131
## ID.Temperature.scaled 0.10227 0.12733 0.803 0.421894
## CL.scaled -0.36329 0.11896 -3.054 0.002260 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.300. Dstn.. V..201 Ordr.. SexM Yr2020 JlnDy. Hr.Pl.
## Hs.300.scld 0.050
## Dstnc.chnn. 0.299 0.197
## Vssl..2019. -0.276 -0.183 -0.511
## Ordr.trl.sc 0.152 -0.180 -0.065 0.054
## SexM -0.627 0.095 -0.013 0.000 -0.080
## Year2020 -0.553 -0.116 -0.320 0.336 -0.248 0.063
## JulnDy.scld 0.044 0.267 0.272 -0.299 -0.088 0.071 -0.081
## Hr.Pltfrm.s 0.196 -0.081 0.035 -0.062 0.215 0.014 -0.048 0.053
## ID.Tmprtr.s -0.043 0.138 -0.123 -0.084 0.055 0.005 -0.217 -0.458 -0.106
## CL.scaled -0.149 -0.017 0.019 -0.039 -0.142 0.362 0.060 0.063 0.074
## ID.Tm.
## Hs.300.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Year2020
## JulnDy.scld
## Hr.Pltfrm.s
## ID.Tmprtr.s
## CL.scaled -0.087
mod.emergence.final <- glmer(emergence ~ Order.trial.scaled + JulianDay.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.site, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.final)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ Order.trial.scaled + JulianDay.scaled + ID.Temperature.scaled +
## CL.scaled + (1 | ID)
## Data: df.site
## Control:
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
##
## AIC BIC logLik deviance df.resid
## 980.4 1008.8 -484.2 968.4 838
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5253 -0.5548 -0.3726 0.7717 3.4169
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 1.061 1.03
## Number of obs: 844, groups: ID, 701
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.86585 0.12402 -6.982 2.92e-12 ***
## Order.trial.scaled 0.22926 0.09111 2.516 0.01187 *
## JulianDay.scaled 0.54278 0.13612 3.988 6.68e-05 ***
## ID.Temperature.scaled 0.25691 0.11325 2.268 0.02330 *
## CL.scaled -0.27323 0.10183 -2.683 0.00729 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Ordr.. JlnDy. ID.Tm.
## Ordr.trl.sc -0.100
## JulnDy.scld -0.056 -0.170
## ID.Tmprtr.s -0.281 0.112 -0.471
## CL.scaled 0.194 -0.104 0.054 -0.091
confint(mod.emergence.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## (Intercept) -1.10892321 -0.62278386
## Order.trial.scaled 0.05067511 0.40783862
## JulianDay.scaled 0.27599121 0.80957608
## ID.Temperature.scaled 0.03494129 0.47887987
## CL.scaled -0.47280808 -0.07364491
r.squaredGLMM(mod.emergence.final)
## R2m R2c
## theoretical 0.1479719 0.3557077
## delta 0.1179110 0.2834449
marginal: only for the fixed effect. conditional: both fixed and random effect. The delta method can be used with all distributions and link functions.
Deletion of turtles with only one observation.
df.double <- df[df$ID %in% df$ID[duplicated(df$ID)],]
n_distinct(df.double$ID)
## [1] 221
We have 221 turtles tested more than once. By only keeping the turtles tested more than once, we are passing from 1135 observations to 626 observations. Not all entries have an observations for the three behaviors at the same time. Thus, we need to create a dataset for each behavior by deleting the missing observations for each of them.
#Sum of active defensive behaviors
df.double.ad <- df.double[complete.cases(df.double[ , 9]),]
df.double.ad <- df.double.ad[df.double.ad$ID %in% df.double.ad$ID[duplicated(df.double.ad$ID)],]
n_distinct(df.double.ad$ID)
## [1] 221
#Escape latency
df.double.escape <- df.double[complete.cases(df.double[ , 13]),]
df.double.escape <- df.double.escape[df.double.escape$ID %in% df.double.escape$ID[duplicated(df.double.escape$ID)],]
n_distinct(df.double.escape$ID)
## [1] 218
#Emergence of the turtle after escaping
df.double.emergence <- df.double[complete.cases(df.double[ , 14]),]
df.double.emergence <- df.double.emergence[df.double.emergence$ID %in% df.double.emergence$ID[duplicated(df.double.emergence$ID)],]
n_distinct(df.double.emergence$ID)
## [1] 215
table(df.double.escape$Site)
##
## BR1 BR2-2020 C1 CB1 CL2 LR1 RR1
## 95 13 23 35 50 8 50
## RR10 RR2-2-2019 RR2-2020 RR3-1 RR3-2 RR4 RR5
## 4 11 34 14 26 0 6
## RR6 RR7 RR8 RR9 RS1 SA1 UP6
## 27 15 5 12 67 77 4
## WF1
## 37
table(df.double.emergence$Site)
##
## BR1 BR2-2020 C1 CB1 CL2 LR1 RR1
## 93 13 22 33 50 8 48
## RR10 RR2-2-2019 RR2-2020 RR3-1 RR3-2 RR4 RR5
## 4 11 34 14 25 0 6
## RR6 RR7 RR8 RR9 RS1 SA1 UP6
## 29 15 5 12 64 56 4
## WF1
## 36
table(df.double.ad$Site)
##
## BR1 BR2-2020 C1 CB1 CL2 LR1 RR1
## 95 13 23 40 50 8 53
## RR10 RR2-2-2019 RR2-2020 RR3-1 RR3-2 RR4 RR5
## 4 11 34 14 26 0 6
## RR6 RR7 RR8 RR9 RS1 SA1 UP6
## 29 17 5 12 68 59 4
## WF1
## 37
All the sampling sites from the complete datset have turtles tested more than once, except RR4.
graph.vessel <- ggplot() +
geom_histogram(aes(x=df$Vessels.activities.2019),fill="black", alpha=0.9) +
geom_histogram(aes(x=df.double$Vessels.activities.2019), fill="chartreuse4") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Mean daily number of vessel crossings")
graph.channel <- ggplot() +
geom_histogram(aes(x=df$Distance.channel),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.double$Distance.channel), fill="chartreuse4") +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Shortest distance to the navigation channel")
graph.house <- ggplot() +
geom_histogram(aes(x=df$House.200),fill="black", alpha = 0.9) + geom_histogram(aes(x=df.double$House.200), fill="chartreuse4") +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text=element_text(size=12, colour="black")) + ylab("Frequency") + xlab("Number of houses within 200 m")
library(cowplot)
graph.total <- plot_grid(graph.channel, graph.vessel, graph.house, ncol = 3, labels = "AUTO")
graph.total
Here, we compare the frequency of the variables quantifying human disturbance: A) the shortest aquatic distance to the navigation channel in meters; B) the mean daily number of vessel crossings, and C) the number of houses with access to the canal within 200 m of the sampling site, according to the dataset used (black: all the observations from the entire dataset, green: only the behavioral measurements from turtles testes more than once).
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Urban.900.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.ad, na.action=na.exclude)
gvif(mod.active.defenses.vif)
## GVIF df GVIF^(1/(2*df))
## House.200.scaled 5.1772 1 2.2753
## Urban.900.scaled 6.2641 1 2.5028
## Distance.channel.scaled 1.7054 1 1.3059
## Vessels.activities.2019.scaled 1.9751 1 1.4054
## Order.trial.scaled 1.2305 1 1.1093
## Sex 1.5154 1 1.2310
## Tank.Site 3.8994 1 1.9747
## Year 1.9873 1 1.4097
## JulianDay.scaled 3.5956 1 1.8962
## Hour.active.defenses.scaled 2.4747 1 1.5731
## ID.Temperature.scaled 1.8566 1 1.3626
## CL.scaled 1.3744 1 1.1723
mod.active.defenses.vif <- lm(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.ad, na.action=na.exclude)
gvif(mod.active.defenses.vif)
## GVIF df GVIF^(1/(2*df))
## House.200.scaled 2.5259 1 1.5893
## Distance.channel.scaled 1.6998 1 1.3038
## Vessels.activities.2019.scaled 1.7866 1 1.3366
## Order.trial.scaled 1.2180 1 1.1036
## Sex 1.3514 1 1.1625
## Tank.Site 3.8994 1 1.9747
## Year 1.9604 1 1.4001
## JulianDay.scaled 3.3888 1 1.8409
## Hour.active.defenses.scaled 2.4522 1 1.5660
## ID.Temperature.scaled 1.8380 1 1.3557
## CL.scaled 1.3742 1 1.1723
Urban areas within 900 m was deleted given that the GVIF^(1/(2*df)) was over two.
# Visualization of the correlations
cor.pearson.active.defenses <- df.double.ad[, c(18,23,6,3,4,12,8,16,17)]
chart.Correlation(cor.pearson.active.defenses, histogram=TRUE, pch=19)
The Pearson correlation coefficient of 0.8 between House.200 and Urban.900 confirm the deletion of Urban.900 with the calculation of the GVIF^(1/(2*df)).
mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.double.ad, REML = TRUE, na.action=na.exclude)
summary(mod.ad.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled +
## ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## Data: df.double.ad
##
## REML criterion at convergence: 959.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7344 -0.5517 -0.1151 0.5299 2.7636
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4082 0.6389
## Site (Intercept) 0.0496 0.2227
## Residual 0.3155 0.5617
## Number of obs: 399, groups: ID, 211; Site, 21
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.96570 0.13431 53.72823 7.190 2.08e-09
## House.200.scaled -0.15600 0.08419 13.68989 -1.853 0.08555
## Distance.channel.scaled -0.20993 0.08464 14.82270 -2.480 0.02563
## Vessels.activities.2019.scaled 0.25597 0.10268 10.41442 2.493 0.03097
## Order.trial.scaled 0.03759 0.03747 222.80396 1.003 0.31674
## SexM 0.35314 0.13152 188.95551 2.685 0.00789
## Tank.SiteT -0.13295 0.17491 172.03545 -0.760 0.44822
## Year2020 -0.20388 0.12137 167.87540 -1.680 0.09484
## JulianDay.scaled -0.07544 0.09756 18.33891 -0.773 0.44923
## Hour.active.defenses.scaled 0.02671 0.05271 269.08360 0.507 0.61275
## ID.Temperature.scaled 0.04869 0.06461 99.88162 0.754 0.45288
## CL.scaled 0.17529 0.06738 183.56842 2.602 0.01004
##
## (Intercept) ***
## House.200.scaled .
## Distance.channel.scaled *
## Vessels.activities.2019.scaled *
## Order.trial.scaled
## SexM **
## Tank.SiteT
## Year2020 .
## JulianDay.scaled
## Hour.active.defenses.scaled
## ID.Temperature.scaled
## CL.scaled *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.200. Dstn.. V..201 Ordr.. SexM Tnk.ST Yr2020 JlnDy.
## Hs.200.scld -0.085
## Dstnc.chnn. 0.057 0.142
## Vssl..2019. 0.093 -0.022 -0.505
## Ordr.trl.sc -0.005 0.032 -0.033 0.122
## SexM -0.653 0.182 -0.078 0.059 0.001
## Tank.SiteT -0.266 0.021 -0.148 -0.026 0.208 0.045
## Year2020 -0.239 -0.260 -0.026 -0.105 -0.271 -0.078 -0.387
## JulnDy.scld -0.022 0.343 0.231 -0.207 -0.143 0.097 -0.467 0.197
## Hr.ctv.dfn. 0.147 0.068 0.062 0.007 0.107 -0.043 -0.609 0.238 0.155
## ID.Tmprtr.s 0.014 0.080 -0.150 -0.027 0.011 0.027 0.394 -0.302 -0.412
## CL.scaled -0.261 -0.047 -0.046 0.101 0.016 0.343 0.056 -0.078 0.050
## Hr.c.. ID.Tm.
## Hs.200.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Tank.SiteT
## Year2020
## JulnDy.scld
## Hr.ctv.dfn.
## ID.Tmprtr.s -0.015
## CL.scaled -0.036 0.002
plot(mod.ad.full)
hist(residuals(mod.ad.full))
qqnorm(resid(mod.ad.full))
## Null mixed model
mod.ad.null <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.double.ad, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)
## only site identity as random variable
mod.ad.dummy.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.double.ad, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## only turtle identity as random variable
mod.ad.dummy.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.double.ad, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## Turtle identity without the dummy variable
mod.ad.ID <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.ad, na.action=na.exclude, REML = FALSE)
## Turtle and site identity without the dummy variable
mod.ad.ID.site <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.double.ad, na.action=na.exclude, REML = FALSE)
The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.
The less complex model is always first.
anova(mod.ad.null, mod.ad.dummy.site)
## Data: df.double.ad
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null 14 1008.7 1064.5 -490.35 980.7
## mod.ad.dummy.site 15 1010.7 1070.5 -490.35 980.7 0 1 1
anova(mod.ad.null, mod.ad.dummy.ID)
## Data: df.double.ad
## Models:
## mod.ad.null: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.ad.dummy.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.null 14 1008.70 1064.5 -490.35 980.70
## mod.ad.dummy.ID 15 947.72 1007.6 -458.86 917.72 62.98 1 2.088e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.ad.ID, mod.ad.ID.site)
## Data: df.double.ad
## Models:
## mod.ad.ID: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.ad.ID.site: active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.ad.ID 14 945.72 1001.6 -458.86 917.72
## mod.ad.ID.site 15 947.72 1007.6 -458.86 917.72 0 1 1
The test is considered significant when chisq is over 3.84
Only turtle identity will be kept as a random variable.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.ad.full <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.ad, na.action=na.exclude, REML = FALSE)
summary(mod.ad.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## Tank.Site + Year + JulianDay.scaled + Hour.active.defenses.scaled +
## ID.Temperature.scaled + CL.scaled + (1 | ID)
## Data: df.double.ad
##
## AIC BIC logLik deviance df.resid
## 945.7 1001.6 -458.9 917.7 385
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.79108 -0.53650 -0.09793 0.53562 2.75285
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4063 0.6374
## Residual 0.3136 0.5600
## Number of obs: 399, groups: ID, 211
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.99390 0.11984 265.28283 8.293 5.59e-15
## House.200.scaled -0.16773 0.06670 208.95640 -2.515 0.012672
## Distance.channel.scaled -0.22794 0.06804 204.39522 -3.350 0.000963
## Vessels.activities.2019.scaled 0.29346 0.07673 232.15000 3.825 0.000168
## Order.trial.scaled 0.04266 0.03569 312.46630 1.195 0.232800
## SexM 0.31457 0.12583 199.14779 2.500 0.013228
## Tank.SiteT -0.02854 0.16170 382.88556 -0.176 0.860013
## Year2020 -0.18281 0.11228 396.77606 -1.628 0.104301
## JulianDay.scaled -0.11814 0.07653 299.65326 -1.544 0.123692
## Hour.active.defenses.scaled 0.02421 0.05086 330.13083 0.476 0.634344
## ID.Temperature.scaled 0.07757 0.05812 379.58873 1.335 0.182732
## CL.scaled 0.16711 0.06562 188.19014 2.547 0.011678
##
## (Intercept) ***
## House.200.scaled *
## Distance.channel.scaled ***
## Vessels.activities.2019.scaled ***
## Order.trial.scaled
## SexM *
## Tank.SiteT
## Year2020
## JulianDay.scaled
## Hour.active.defenses.scaled
## ID.Temperature.scaled
## CL.scaled *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.200. Dstn.. V..201 Ordr.. SexM Tnk.ST Yr2020 JlnDy.
## Hs.200.scld -0.117
## Dstnc.chnn. 0.050 0.149
## Vssl..2019. 0.162 -0.008 -0.491
## Ordr.trl.sc -0.031 0.033 -0.080 0.186
## SexM -0.689 0.255 -0.085 0.071 0.006
## Tank.SiteT -0.327 0.019 -0.188 -0.114 0.165 0.074
## Year2020 -0.256 -0.329 0.022 -0.171 -0.198 -0.106 -0.365
## JulnDy.scld -0.039 0.370 0.253 -0.119 -0.174 0.090 -0.503 0.214
## Hr.ctv.dfn. 0.173 0.081 0.059 0.080 0.142 -0.063 -0.620 0.223 0.140
## ID.Tmprtr.s 0.026 0.101 -0.202 -0.049 0.020 0.057 0.368 -0.339 -0.405
## CL.scaled -0.279 -0.073 -0.052 0.131 0.009 0.345 0.078 -0.087 0.032
## Hr.c.. ID.Tm.
## Hs.200.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Tank.SiteT
## Year2020
## JulnDy.scld
## Hr.ctv.dfn.
## ID.Tmprtr.s -0.031
## CL.scaled -0.040 0.022
mod.ad.final <- lmer(active.defenses ~ House.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.ad, na.action=na.exclude, REML = TRUE)
summary(mod.ad.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: active.defenses ~ House.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## ID.Temperature.scaled + CL.scaled + (1 | ID)
## Data: df.double.ad
##
## REML criterion at convergence: 1361.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.38654 -0.62432 -0.09817 0.56221 2.80205
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.3194 0.5651
## Residual 0.4221 0.6497
## Number of obs: 561, groups: ID, 215
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.88864 0.08714 206.58701 10.198 < 2e-16
## House.200.scaled -0.15392 0.04871 192.66811 -3.160 0.00183
## Distance.channel.scaled -0.14479 0.06045 220.21573 -2.395 0.01745
## Vessels.activities.2019.scaled 0.18377 0.06160 193.27807 2.983 0.00322
## Order.trial.scaled 0.05845 0.02852 477.18753 2.049 0.04098
## SexM 0.33800 0.10978 196.40374 3.079 0.00238
## ID.Temperature.scaled 0.11145 0.04409 537.90670 2.528 0.01177
## CL.scaled 0.13982 0.05858 203.37653 2.387 0.01791
##
## (Intercept) ***
## House.200.scaled **
## Distance.channel.scaled *
## Vessels.activities.2019.scaled **
## Order.trial.scaled *
## SexM **
## ID.Temperature.scaled *
## CL.scaled *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.200. Dstn.. V..201 Ordr.. SexM ID.Tm.
## Hs.200.scld -0.185
## Dstnc.chnn. 0.014 0.084
## Vssl..2019. -0.037 0.015 -0.542
## Ordr.trl.sc -0.138 -0.026 -0.019 0.030
## SexM -0.816 0.182 -0.107 0.089 -0.016
## ID.Tmprtr.s -0.032 0.100 -0.146 -0.087 -0.197 0.084
## CL.scaled -0.290 -0.201 -0.062 0.148 -0.008 0.318 0.034
confint(mod.ad.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 0.717840684 1.05943287
## House.200.scaled -0.249395513 -0.05844638
## Distance.channel.scaled -0.263267334 -0.02630938
## Vessels.activities.2019.scaled 0.063044184 0.30449727
## Order.trial.scaled 0.002546783 0.11435758
## SexM 0.122832090 0.55316600
## ID.Temperature.scaled 0.025025958 0.19786843
## CL.scaled 0.025004563 0.25464168
r.squaredGLMM(mod.ad.final)
## R2m R2c
## [1,] 0.109186 0.4928679
marginal: only for the fixed effect. conditional: both fixed and random effect.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Tank.Site + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled, data = df.double.escape, na.action=na.exclude)
gvif(mod.escape.vif)
## GVIF df GVIF^(1/(2*df))
## House.400.scaled 2.4177 1 1.5549
## Urban.200.scaled 2.0373 1 1.4273
## Distance.channel.scaled 2.0279 1 1.4240
## Vessels.activities.2019.scaled 2.1394 1 1.4627
## Order.trial.scaled 1.2156 1 1.1025
## Sex 1.2303 1 1.1092
## Year 1.4837 1 1.2181
## Tank.Site 4.9140 1 2.2168
## JulianDay.scaled 3.3188 1 1.8218
## Hour.Platform.scaled 2.3715 1 1.5400
## ID.Temperature.scaled 1.5853 1 1.2591
## CL.scaled 1.3132 1 1.1459
## Wind.scaled 2.5983 1 1.6119
mod.escape.vif <- lm(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled, data = df.double.escape, na.action=na.exclude)
gvif(mod.escape.vif)
## GVIF df GVIF^(1/(2*df))
## House.400.scaled 2.4121 1 1.5531
## Urban.200.scaled 2.0233 1 1.4224
## Distance.channel.scaled 1.9596 1 1.3999
## Vessels.activities.2019.scaled 2.1290 1 1.4591
## Order.trial.scaled 1.2150 1 1.1023
## Sex 1.2275 1 1.1079
## Year 1.4698 1 1.2124
## JulianDay.scaled 3.1470 1 1.7740
## Hour.Platform.scaled 1.6506 1 1.2848
## ID.Temperature.scaled 1.4847 1 1.2185
## CL.scaled 1.3042 1 1.1420
## Wind.scaled 1.6589 1 1.2880
We deleted Tank.Site given that the GVIF^(1/(2*df)) was over two.
# Visualization of the correlations
cor.pearson.escape <- df.double.escape[, c(20,21,6,3,10,12,8,16,17,11)]
chart.Correlation(cor.pearson.escape, histogram=TRUE, pch=19)
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.double.escape, REML = TRUE, na.action=na.exclude)
summary(mod.escape.full)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled +
## CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
## Data: df.double.escape
##
## REML criterion at convergence: 1444.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1969 -0.6070 -0.0417 0.5045 3.2380
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.52919 0.7275
## Site (Intercept) 0.02688 0.1640
## Residual 0.73644 0.8582
## Number of obs: 481, groups: ID, 196; Site, 19
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.19093 0.16937 46.96034 18.840 < 2e-16
## House.400.scaled 0.09007 0.12955 13.59285 0.695 0.4986
## Urban.200.scaled 0.20958 0.09627 9.67590 2.177 0.0554
## Distance.channel.scaled 0.21748 0.09849 16.12511 2.208 0.0421
## Vessels.activities.2019.scaled -0.15503 0.10953 7.47936 -1.415 0.1972
## Order.trial.scaled -0.29096 0.04956 388.36311 -5.870 9.33e-09
## SexM -0.10902 0.15319 150.86753 -0.712 0.4778
## Year2020 0.26273 0.15773 62.56523 1.666 0.1008
## JulianDay.scaled 0.42383 0.13360 15.25573 3.172 0.0062
## Hour.Platform.scaled -0.01431 0.05757 253.59142 -0.249 0.8039
## ID.Temperature.scaled -0.11100 0.08604 73.74765 -1.290 0.2011
## CL.scaled 0.08780 0.08086 173.52075 1.086 0.2790
## Wind.scaled -0.16415 0.06627 291.08805 -2.477 0.0138
##
## (Intercept) ***
## House.400.scaled
## Urban.200.scaled .
## Distance.channel.scaled *
## Vessels.activities.2019.scaled
## Order.trial.scaled ***
## SexM
## Year2020
## JulianDay.scaled **
## Hour.Platform.scaled
## ID.Temperature.scaled
## CL.scaled
## Wind.scaled *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
plot(mod.escape.full)
hist(residuals(mod.escape.full))
qqnorm(resid(mod.escape.full))
## Null mixed model
mod.escape.null <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy), data = df.double.escape, na.action=na.exclude, control=lmerControl(check.nlev.gtr.1="ignore"), REML = FALSE)
## only site identity as random variable
mod.escape.dummy.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|Site), data = df.double.escape, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## only turtle identity as random variable
mod.escape.dummy.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|dummy) + (1|ID), data = df.double.escape, control=lmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude, REML = FALSE)
## Turtle identity without the dummy variable
mod.escape.ID <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.double.escape, na.action=na.exclude, REML = FALSE)
## Turtle and site identity without the dummy variable
mod.escape.ID.site <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|Site) + (1|ID), data = df.double.escape, na.action=na.exclude, REML = FALSE)
The model is singular meaning that the parameters are on the boundary of the feasible parameter space: variances of one or more linear combinations of effects are (close to) zero. Possible causes behind this: overly complex models.
The less complex model is always first.
anova(mod.escape.null, mod.escape.dummy.site)
## Data: df.double.escape
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.null 15 1481.4 1544.1 -725.72 1451.4
## mod.escape.dummy.site 16 1483.3 1550.1 -725.66 1451.3 0.1206 1 0.7284
anova(mod.escape.null, mod.escape.dummy.ID)
## Data: df.double.escape
## Models:
## mod.escape.null: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy)
## mod.escape.dummy.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.null 15 1481.4 1544.1 -725.72 1451.4
## mod.escape.dummy.ID 16 1434.5 1501.3 -701.25 1402.5 48.944 1 2.634e-12
##
## mod.escape.null
## mod.escape.dummy.ID ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.escape.ID, mod.escape.ID.site)
## Data: df.double.escape
## Models:
## mod.escape.ID: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | ID)
## mod.escape.ID.site: log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.escape.ID 15 1432.5 1495.1 -701.25 1402.5
## mod.escape.ID.site 16 1434.5 1501.3 -701.25 1402.5 0 1 1
The test is considered significant when chisq is over 3.84
Only turtle identity will be kept.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.escape.full <- lmer(log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.double.escape, na.action=na.exclude, REML = FALSE)
summary(mod.escape.full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula:
## log.escape ~ House.400.scaled + Urban.200.scaled + Distance.channel.scaled +
## Vessels.activities.2019.scaled + Order.trial.scaled + Sex +
## Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled +
## CL.scaled + Wind.scaled + (1 | ID)
## Data: df.double.escape
##
## AIC BIC logLik deviance df.resid
## 1432.5 1495.1 -701.2 1402.5 466
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2232 -0.6156 -0.0512 0.5064 3.2580
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4960 0.7043
## Residual 0.7321 0.8556
## Number of obs: 481, groups: ID, 196
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.17772 0.15558 239.66937 20.425 < 2e-16
## House.400.scaled 0.10897 0.11261 198.87413 0.968 0.33439
## Urban.200.scaled 0.20547 0.08165 211.94975 2.516 0.01260
## Distance.channel.scaled 0.22969 0.08675 195.02223 2.648 0.00877
## Vessels.activities.2019.scaled -0.16066 0.09050 188.33151 -1.775 0.07747
## Order.trial.scaled -0.28677 0.04867 436.83513 -5.892 7.63e-09
## SexM -0.08576 0.14709 178.14486 -0.583 0.56058
## Year2020 0.22789 0.14649 325.02237 1.556 0.12075
## JulianDay.scaled 0.43899 0.11651 226.72118 3.768 0.00021
## Hour.Platform.scaled -0.02489 0.05563 460.77839 -0.447 0.65477
## ID.Temperature.scaled -0.11494 0.08046 465.86473 -1.429 0.15381
## CL.scaled 0.08680 0.07838 186.28421 1.107 0.26956
## Wind.scaled -0.17356 0.06446 448.01732 -2.692 0.00736
##
## (Intercept) ***
## House.400.scaled
## Urban.200.scaled *
## Distance.channel.scaled **
## Vessels.activities.2019.scaled .
## Order.trial.scaled ***
## SexM
## Year2020
## JulianDay.scaled ***
## Hour.Platform.scaled
## ID.Temperature.scaled
## CL.scaled
## Wind.scaled **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
mod.escape.final <- lmer(log.escape ~ Urban.200.scaled + Order.trial.scaled + JulianDay.scaled + CL.scaled + Wind.scaled + (1|ID), data = df.double.escape, na.action=na.exclude, REML = TRUE)
summary(mod.escape.final)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log.escape ~ Urban.200.scaled + Order.trial.scaled + JulianDay.scaled +
## CL.scaled + Wind.scaled + (1 | ID)
## Data: df.double.escape
##
## REML criterion at convergence: 1618.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.39559 -0.61219 -0.06053 0.54123 3.11180
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4678 0.6840
## Residual 0.7913 0.8896
## Number of obs: 543, groups: ID, 202
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.30917 0.07195 235.61868 45.995 < 2e-16 ***
## Urban.200.scaled 0.24196 0.07259 209.06938 3.333 0.00102 **
## Order.trial.scaled -0.26885 0.04347 477.11367 -6.184 1.34e-09 ***
## JulianDay.scaled 0.33947 0.08571 243.62059 3.961 9.82e-05 ***
## CL.scaled 0.14548 0.06330 209.04497 2.298 0.02253 *
## Wind.scaled -0.15321 0.05068 483.67081 -3.023 0.00263 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) U.200. Ordr.. JlnDy. CL.scl
## Urbn.200.sc -0.226
## Ordr.trl.sc -0.222 -0.182
## JulnDy.scld -0.376 0.563 -0.187
## CL.scaled -0.045 -0.115 0.008 0.103
## Wind.scaled 0.133 0.160 -0.440 0.266 -0.055
confint(mod.escape.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 3.16816101 3.45018404
## Urban.200.scaled 0.09968258 0.38423802
## Order.trial.scaled -0.35405874 -0.18364043
## JulianDay.scaled 0.17147967 0.50747005
## CL.scaled 0.02141824 0.26953839
## Wind.scaled -0.25253317 -0.05388696
r.squaredGLMM(mod.escape.final)
## R2m R2c
## [1,] 0.1460005 0.46328
marginal: only for the fixed effect. conditional: both fixed and random effect.
GVIF^(1/(2*df)) over two indicate the presence of multicollinearity
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Urban.600.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Year + Tank.Site + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.emergence, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
## GVIF df GVIF^(1/(2*df))
## House.300.scaled 7.8026 1 2.7933
## Urban.600.scaled 10.2844 1 3.2069
## Distance.channel.scaled 1.7805 1 1.3344
## Vessels.activities.2019.scaled 2.0218 1 1.4219
## Order.trial.scaled 1.2107 1 1.1003
## Sex 1.3053 1 1.1425
## Year 1.4926 1 1.2217
## Tank.Site 3.1891 1 1.7858
## JulianDay.scaled 2.3481 1 1.5324
## Hour.Platform.scaled 2.2332 1 1.4944
## ID.Temperature.scaled 1.7556 1 1.3250
## CL.scaled 1.2567 1 1.1210
mod.emergence.vif <- glm(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled, data = df.double.emergence, family = binomial, na.action=na.exclude)
gvif(mod.emergence.vif)
## GVIF df GVIF^(1/(2*df))
## House.300.scaled 1.9292 1 1.3890
## Distance.channel.scaled 1.7571 1 1.3256
## Vessels.activities.2019.scaled 1.9590 1 1.3996
## Order.trial.scaled 1.2102 1 1.1001
## Sex 1.2209 1 1.1049
## Tank.Site 3.1018 1 1.7612
## Year 1.4882 1 1.2199
## JulianDay.scaled 2.1570 1 1.4687
## Hour.Platform.scaled 2.2185 1 1.4895
## ID.Temperature.scaled 1.6455 1 1.2828
## CL.scaled 1.2470 1 1.1167
Urban areas within 600m was deleted given that the GVIF^(1/(2*df)) was over two.
# Visualization of the correlations
cor.pearson.emergence <- df.double.emergence[, c(19,22,6,3,10,12,8,16,17)]
chart.Correlation(cor.pearson.emergence, histogram=TRUE, pch=19)
The Pearson correlation coefficient over 0.8 between Urban areas within 600m and number of houses within 300m confirmed the deletion.
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.double.emergence, family = binomial, na.action=na.exclude)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0597096 (tol = 0.002, component 1)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled +
## Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled +
## (1 | ID) + (1 | Site)
## Data: df.double.emergence
##
## AIC BIC logLik deviance df.resid
## 574.1 633.9 -273.0 546.1 515
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9251 -0.3956 -0.2373 0.4351 2.9867
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 2.6800 1.6371
## Site (Intercept) 0.5229 0.7231
## Number of obs: 529, groups: ID, 209; Site, 21
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.57697 0.46645 -1.237 0.2161
## House.300.scaled -0.27675 0.35107 -0.788 0.4305
## Distance.channel.scaled -0.27959 0.29474 -0.949 0.3428
## Vessels.activities.2019.scaled 0.51196 0.33349 1.535 0.1247
## Order.trial.scaled 0.07448 0.15409 0.483 0.6288
## SexM 0.22499 0.42826 0.525 0.5993
## Tank.SiteT -2.90817 0.64776 -4.490 7.14e-06 ***
## Year2020 -0.24392 0.46219 -0.528 0.5977
## JulianDay.scaled 0.95437 0.34523 2.764 0.0057 **
## Hour.Platform.scaled 0.09197 0.22069 0.417 0.6769
## ID.Temperature.scaled -0.05166 0.25219 -0.205 0.8377
## CL.scaled 0.02028 0.22268 0.091 0.9274
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.300. Dstn.. V..201 Ordr.. SexM Tnk.ST Yr2020 JlnDy.
## Hs.300.scld 0.054
## Dstnc.chnn. 0.101 0.151
## Vssl..2019. 0.058 0.021 -0.497
## Ordr.trl.sc -0.044 -0.082 -0.046 0.056
## SexM -0.589 0.169 -0.077 0.082 -0.025
## Tank.SiteT -0.207 -0.041 -0.084 -0.096 0.172 -0.021
## Year2020 -0.408 -0.212 -0.084 -0.105 -0.207 -0.051 -0.094
## JulnDy.scld -0.029 0.351 0.203 -0.182 -0.188 0.098 -0.468 0.013
## Hr.Pltfrm.s 0.236 0.088 0.030 0.056 0.197 -0.021 -0.614 -0.007 0.151
## ID.Tmprtr.s 0.072 0.085 -0.172 0.020 0.036 0.015 0.348 -0.228 -0.404
## CL.scaled -0.216 -0.036 -0.011 0.049 0.003 0.324 0.051 -0.119 0.097
## Hr.Pl. ID.Tm.
## Hs.300.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Tank.SiteT
## Year2020
## JulnDy.scld
## Hr.Pltfrm.s
## ID.Tmprtr.s -0.036
## CL.scaled -0.042 -0.039
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0597096 (tol = 0.002, component 1)
gm_all <- allFit(mod.emergence.full)
## bobyqa : [OK]
## Nelder_Mead :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0366235 (tol = 0.002, component 1)
## [OK]
## nlminbwrap : [OK]
## nmkbw :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00203983 (tol = 0.002, component 1)
## [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00236751 (tol = 0.002, component 1)
## [OK]
t(sapply(gm_all,fixef))
## (Intercept) House.300.scaled
## bobyqa -0.5703545 -0.2731547
## Nelder_Mead -0.5776692 -0.2809434
## nlminbwrap -0.5703548 -0.2731633
## nmkbw -0.5705922 -0.2733797
## optimx.L-BFGS-B -0.5703964 -0.2732323
## nloptwrap.NLOPT_LN_NELDERMEAD -0.5705662 -0.2732201
## nloptwrap.NLOPT_LN_BOBYQA -0.5704294 -0.2729278
## Distance.channel.scaled
## bobyqa -0.2793841
## Nelder_Mead -0.2806871
## nlminbwrap -0.2793867
## nmkbw -0.2794297
## optimx.L-BFGS-B -0.2793669
## nloptwrap.NLOPT_LN_NELDERMEAD -0.2795616
## nloptwrap.NLOPT_LN_BOBYQA -0.2791320
## Vessels.activities.2019.scaled Order.trial.scaled
## bobyqa 0.5165569 0.07778553
## Nelder_Mead 0.5154441 0.07581981
## nlminbwrap 0.5165607 0.07778218
## nmkbw 0.5166040 0.07775609
## optimx.L-BFGS-B 0.5165086 0.07779155
## nloptwrap.NLOPT_LN_NELDERMEAD 0.5167676 0.07783627
## nloptwrap.NLOPT_LN_BOBYQA 0.5165501 0.07783208
## SexM Tank.SiteT Year2020 JulianDay.scaled
## bobyqa 0.2235015 -2.916581 -0.2552020 0.9607171
## Nelder_Mead 0.2291159 -2.903567 -0.2546789 0.9564602
## nlminbwrap 0.2234918 -2.916589 -0.2551919 0.9607245
## nmkbw 0.2235574 -2.916467 -0.2551090 0.9606228
## optimx.L-BFGS-B 0.2234028 -2.916398 -0.2551102 0.9606051
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2235494 -2.916669 -0.2550607 0.9607448
## nloptwrap.NLOPT_LN_BOBYQA 0.2235864 -2.916952 -0.2552062 0.9610121
## Hour.Platform.scaled ID.Temperature.scaled
## bobyqa 0.09735240 -0.04527925
## Nelder_Mead 0.09061758 -0.04467571
## nlminbwrap 0.09735059 -0.04528315
## nmkbw 0.09716383 -0.04536702
## optimx.L-BFGS-B 0.09730574 -0.04525554
## nloptwrap.NLOPT_LN_NELDERMEAD 0.09739978 -0.04529126
## nloptwrap.NLOPT_LN_BOBYQA 0.09742754 -0.04567319
## CL.scaled
## bobyqa 0.01697201
## Nelder_Mead 0.02125493
## nlminbwrap 0.01696731
## nmkbw 0.01698761
## optimx.L-BFGS-B 0.01694443
## nloptwrap.NLOPT_LN_NELDERMEAD 0.01699645
## nloptwrap.NLOPT_LN_BOBYQA 0.01706011
sapply(gm_all,logLik)
## bobyqa Nelder_Mead
## -273.0322 -273.0332
## nlminbwrap nmkbw
## -273.0322 -273.0322
## optimx.L-BFGS-B nloptwrap.NLOPT_LN_NELDERMEAD
## -273.0322 -273.0322
## nloptwrap.NLOPT_LN_BOBYQA
## -273.0322
We decided to use optimx.L-BFGS-B optimizer to include in our initial model.
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID) + (1|Site), data = df.double.emergence, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), family = binomial, na.action=na.exclude)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled +
## Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled +
## (1 | ID) + (1 | Site)
## Data: df.double.emergence
## Control:
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
##
## AIC BIC logLik deviance df.resid
## 574.1 633.9 -273.0 546.1 515
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9279 -0.3948 -0.2357 0.4347 2.9939
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 2.7080 1.6456
## Site (Intercept) 0.5328 0.7299
## Number of obs: 529, groups: ID, 209; Site, 21
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.57040 0.46818 -1.218 0.22310
## House.300.scaled -0.27323 0.35298 -0.774 0.43889
## Distance.channel.scaled -0.27937 0.29628 -0.943 0.34572
## Vessels.activities.2019.scaled 0.51651 0.33560 1.539 0.12379
## Order.trial.scaled 0.07779 0.15439 0.504 0.61436
## SexM 0.22340 0.42973 0.520 0.60316
## Tank.SiteT -2.91640 0.64979 -4.488 7.18e-06 ***
## Year2020 -0.25511 0.46414 -0.550 0.58256
## JulianDay.scaled 0.96061 0.34753 2.764 0.00571 **
## Hour.Platform.scaled 0.09731 0.22129 0.440 0.66014
## ID.Temperature.scaled -0.04526 0.25277 -0.179 0.85791
## CL.scaled 0.01694 0.22343 0.076 0.93955
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.300. Dstn.. V..201 Ordr.. SexM Tnk.ST Yr2020 JlnDy.
## Hs.300.scld 0.054
## Dstnc.chnn. 0.101 0.151
## Vssl..2019. 0.057 0.021 -0.497
## Ordr.trl.sc -0.044 -0.082 -0.046 0.056
## SexM -0.589 0.169 -0.076 0.082 -0.025
## Tank.SiteT -0.208 -0.041 -0.084 -0.097 0.171 -0.020
## Year2020 -0.408 -0.211 -0.085 -0.105 -0.208 -0.051 -0.093
## JulnDy.scld -0.028 0.352 0.203 -0.181 -0.186 0.098 -0.468 0.011
## Hr.Pltfrm.s 0.236 0.088 0.030 0.056 0.197 -0.020 -0.616 -0.008 0.152
## ID.Tmprtr.s 0.071 0.085 -0.171 0.020 0.036 0.015 0.347 -0.229 -0.402
## CL.scaled -0.215 -0.035 -0.010 0.049 0.003 0.324 0.053 -0.118 0.097
## Hr.Pl. ID.Tm.
## Hs.300.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Tank.SiteT
## Year2020
## JulnDy.scld
## Hr.Pltfrm.s
## ID.Tmprtr.s -0.036
## CL.scaled -0.042 -0.038
plot(mod.emergence.full)
hist(residuals(mod.emergence.full))
qqnorm(resid(mod.emergence.full))
## Null mixed model
mod.emergence.null <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy), data = df.double.emergence, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## only site identity as random variable
mod.emergence.dummy.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|Site), data = df.double.emergence, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## only turtle identity as random variable
mod.emergence.dummy.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|dummy) + (1|ID), data = df.double.emergence, family = binomial, control=glmerControl(check.nlev.gtr.1="ignore"), na.action=na.exclude)
## Turtle identity without the dummy variable
mod.emergence.ID <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
## Turtle and site identity without the dummy variable
mod.emergence.ID.site <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|Site) + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
The less complex model is always first.
anova(mod.emergence.null, mod.emergence.dummy.site)
## Data: df.double.emergence
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | Site)
## npar AIC BIC logLik deviance Chisq Df
## mod.emergence.null 13 614.21 669.73 -294.11 588.21
## mod.emergence.dummy.site 14 602.88 662.67 -287.44 574.88 13.329 1
## Pr(>Chisq)
## mod.emergence.null
## mod.emergence.dummy.site 0.0002614 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.null, mod.emergence.dummy.ID)
## Data: df.double.emergence
## Models:
## mod.emergence.null: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy)
## mod.emergence.dummy.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | dummy) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.emergence.null 13 614.21 669.73 -294.11 588.21
## mod.emergence.dummy.ID 14 576.50 636.29 -274.25 548.50 39.709 1 2.948e-10
##
## mod.emergence.null
## mod.emergence.dummy.ID ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod.emergence.ID, mod.emergence.ID.site)
## Data: df.double.emergence
## Models:
## mod.emergence.ID: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## mod.emergence.ID.site: emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | Site) + (1 | ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod.emergence.ID 13 574.49 630.01 -274.24 548.49
## mod.emergence.ID.site 14 574.06 633.86 -273.03 546.06 2.4239 1 0.1195
The test is considered significant when chisq is over 3.84
We do not need to keep sampling site identity as a random variable.
We are selecting the final model with the backward selection procedure. At each step, we deleted the variable with the higher p value. The deletion of each predictor variable is confirmed with a likelihood ratio test. We need to create a new dataset at each step to have only the rows with complete observations for all the preditors. It allow us to perform the likelihood ratio test that does not run between two models with not the same number of observations. We are not showing all these steps here, we can find them in the MixedModels_Behaviors_CodeR file.
mod.emergence.full <- glmer(emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled + Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled + Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ House.300.scaled + Distance.channel.scaled + Vessels.activities.2019.scaled +
## Order.trial.scaled + Sex + Tank.Site + Year + JulianDay.scaled +
## Hour.Platform.scaled + ID.Temperature.scaled + CL.scaled + (1 | ID)
## Data: df.double.emergence
## Control:
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
##
## AIC BIC logLik deviance df.resid
## 574.5 630.0 -274.2 548.5 516
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8941 -0.3865 -0.2438 0.4490 3.1677
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 3.263 1.806
## Number of obs: 529, groups: ID, 209
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.59974 0.43860 -1.367 0.17151
## House.300.scaled -0.36068 0.29042 -1.242 0.21427
## Distance.channel.scaled -0.34887 0.24819 -1.406 0.15983
## Vessels.activities.2019.scaled 0.58629 0.25755 2.276 0.02282 *
## Order.trial.scaled 0.08994 0.15152 0.594 0.55280
## SexM 0.20905 0.43345 0.482 0.62960
## Tank.SiteT -2.90224 0.62366 -4.654 3.26e-06 ***
## Year2020 -0.02535 0.41231 -0.061 0.95098
## JulianDay.scaled 0.80274 0.26871 2.987 0.00281 **
## Hour.Platform.scaled 0.11054 0.20980 0.527 0.59827
## ID.Temperature.scaled 0.06824 0.21781 0.313 0.75405
## CL.scaled -0.09072 0.22025 -0.412 0.68040
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) H.300. Dstn.. V..201 Ordr.. SexM Tnk.ST Yr2020 JlnDy.
## Hs.300.scld 0.043
## Dstnc.chnn. 0.081 0.122
## Vssl..2019. 0.117 0.087 -0.501
## Ordr.trl.sc -0.071 -0.086 -0.074 0.060
## SexM -0.625 0.225 -0.094 0.122 -0.029
## Tank.SiteT -0.243 -0.045 -0.092 -0.220 0.168 -0.002
## Year2020 -0.466 -0.213 0.029 -0.282 -0.201 -0.051 -0.077
## JulnDy.scld -0.054 0.335 0.199 -0.065 -0.223 0.080 -0.524 0.125
## Hr.Pltfrm.s 0.259 0.119 0.013 0.160 0.194 -0.040 -0.599 -0.044 0.150
## ID.Tmprtr.s 0.073 0.146 -0.243 0.031 -0.001 0.057 0.297 -0.328 -0.323
## CL.scaled -0.226 -0.113 -0.036 0.074 0.024 0.315 0.095 -0.062 -0.002
## Hr.Pl. ID.Tm.
## Hs.300.scld
## Dstnc.chnn.
## Vssl..2019.
## Ordr.trl.sc
## SexM
## Tank.SiteT
## Year2020
## JulnDy.scld
## Hr.Pltfrm.s
## ID.Tmprtr.s -0.051
## CL.scaled -0.038 0.046
mod.emergence.final <- glmer(emergence ~ Vessels.activities.2019.scaled + Tank.Site + JulianDay.scaled + (1|ID), data = df.double.emergence, family = binomial, control = glmerControl(optimizer ='optimx', optCtrl=list(method='L-BFGS-B')), na.action=na.exclude)
summary(mod.emergence.final)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## emergence ~ Vessels.activities.2019.scaled + Tank.Site + JulianDay.scaled +
## (1 | ID)
## Data: df.double.emergence
## Control:
## glmerControl(optimizer = "optimx", optCtrl = list(method = "L-BFGS-B"))
##
## AIC BIC logLik deviance df.resid
## 619.1 641.0 -304.6 609.1 577
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7942 -0.3722 -0.2399 0.4728 3.1463
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 4.089 2.022
## Number of obs: 582, groups: ID, 215
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5568 0.2251 -2.474 0.0134 *
## Vessels.activities.2019.scaled 0.4822 0.2102 2.294 0.0218 *
## Tank.SiteT -2.8945 0.4209 -6.877 6.10e-12 ***
## JulianDay.scaled 1.0130 0.2244 4.515 6.34e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) V..201 Tnk.ST
## Vssl..2019. 0.012
## Tank.SiteT -0.143 -0.257
## JulnDy.scld -0.198 -0.101 -0.456
confint(mod.emergence.final, level = 0.95, method = "Wald")
## 2.5 % 97.5 %
## .sig01 NA NA
## (Intercept) -0.99798497 -0.1156176
## Vessels.activities.2019.scaled 0.07017862 0.8941339
## Tank.SiteT -3.71937426 -2.0695618
## JulianDay.scaled 0.57321285 1.4527686
r.squaredGLMM(mod.emergence.final)
## R2m R2c
## theoretical 0.1869336 0.6375022
## delta 0.1633519 0.5570811
marginal: only for the fixed effect. conditional: both fixed and random effect. The delta method can be used with all distributions and link functions.