Packages
library(tidyverse)
library(corrplot)
library(lmerTest)
library(lme4)
library(vegan)
library(DescTools)
Datasets
fig2_data.csv
fig2_glmm.csv
fig3_data.csv
fig4_data.csv
fig5_data.csv
GLMM Setup
Check for collinearity
#Using the monthly means Figure 2 data to make a matrix for a correlation plot of the predictor variables
#Load data
fig2_data <- read_csv("fig2_data.csv")
#Creating data frame to use filtering for plastic debris to remove duplicates and keeping only the predictor variables
fig2_cor <- fig2_data %>%
filter(
debris_type == "plastic_debris"
) %>%
select(urban_population, tech_rank, waste_picker_rank, river_width_m, river_length)
#Making river length km from m
fig2_cor$river_length <- fig2_cor$river_length/1000
#Creating a matrix
fig2_cor <- as.matrix(fig2_cor)
#Making the correlation plot
cor(fig2_cor)
corrplot(cor(fig2_cor))
River length and river width appear to be highly collinear (>88%). Therefore, we will run two GLMMs for each debris scenario: one that includes river width, and one that includes river length, but not both together.
Run GLMM scenarios
#Using the monthly totals Figure 2 data with the predictor variables included
#Load data
fig2_glmm <- read_csv("fig2_glmm.csv")
#Checking the variable classes in the data frame. Technology ranking and waste picker ranking are categorical variables and therefore should be factors
sapply(fig2_glmm, class)
fig2_glmm$tech_rank <- as.factor(fig2_glmm$tech_rank)
fig2_glmm$waste_picker_rank <- as.factor(fig2_glmm$waste_picker_rank)
#Z-Score the continuous variables because if not, we receive a warning that we should consider rescaling
fig2_glmm$urban_population <- scale(fig2_glmm$urban_population)
fig2_glmm$river_width_m <- scale(fig2_glmm$river_width_m)
fig2_glmm$river_length_km <- scale(fig2_glmm$river_length_km)
#Plastic debris scenario (adding -1 at the end of the model removes the fitting to an intercept)
plastic_debris_model_width <- lmer(plastic_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_width_m-1, data=fig2_glmm) #river width
plastic_debris_model_length <- lmer(plastic_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_length_km-1, data=fig2_glmm) #river length
summary(plastic_debris_model_width)
summary(plastic_debris_model_length)
#All debris scenario
all_debris_model_width <- lmer(all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_width_m-1, data=fig2_glmm) #river width
all_debris_model_length <- lmer(all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_length_km-1, data=fig2_glmm) #river length
summary(all_debris_model_width)
summary(all_debris_model_length)
#Proportion of plastic:all debris scenario
prop_debris_model_width <- lmer(plastic_debris/all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_width_m-1, data=fig2_glmm) #river width
prop_debris_model_length <- lmer(plastic_debris/all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_length_km-1, data=fig2_glmm) #river length
summary(prop_debris_model_width)
summary(prop_debris_model_length)
#There is a warning when running these models if scale function not added to continuous variables before running: "some predictor variables are on very different scales: consider rescaling"
plastic_debris <- lmer(plastic_debris ~ (1|date) + river_id, data=fig2_glmm)
summary(plastic_debris)
anova <- aov(plastic_debris ~ river_id, data = fig2_glmm)
summary(anova)
GLMM results
Across all models, there are no significant effects from any of our predictor variables.
Plastic Debris LMER (river width)
summary(plastic_debris_model_width)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: plastic_debris ~ (1 | date) + (1 | river_id) + urban_population +
## tech_rank + waste_picker_rank + river_width_m - 1
## Data: fig2_glmm
##
## REML criterion at convergence: 5273.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8233 -0.2720 0.0179 0.1364 4.7218
##
## Random effects:
## Groups Name Variance Std.Dev.
## date (Intercept) 8269231 2876
## river_id (Intercept) 142757718 11948
## Residual 97654069 9882
## Number of obs: 253, groups: date, 43; river_id, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## urban_population 1.152e+04 6.753e+03 9.856e-01 1.705 0.340
## tech_rank1 2.702e+04 1.141e+04 9.804e-01 2.369 0.258
## tech_rank2 1.016e+04 9.233e+03 1.006e+00 1.100 0.469
## tech_rank3 8.080e+02 1.342e+04 1.024e+00 0.060 0.962
## waste_picker_rank2 -2.511e+04 1.356e+04 9.793e-01 -1.851 0.319
## waste_picker_rank3 1.184e+04 1.383e+04 1.004e+00 0.856 0.549
## river_width_m -3.798e+03 7.450e+03 9.937e-01 -0.510 0.700
##
## Correlation of Fixed Effects:
## urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1 0.352
## tech_rank2 -0.053 0.256
## tech_rank3 0.310 0.325 0.273
## wst_pckr_r2 -0.492 -0.567 -0.422 -0.183
## wst_pckr_r3 0.134 -0.248 -0.550 -0.628 0.072
## rivr_wdth_m 0.234 0.318 0.005 0.633 0.045 -0.576
Plastic Debris LMER (river length)
summary(plastic_debris_model_length)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: plastic_debris ~ (1 | date) + (1 | river_id) + urban_population +
## tech_rank + waste_picker_rank + river_length_km - 1
## Data: fig2_glmm
##
## REML criterion at convergence: 5273.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8227 -0.2743 0.0197 0.1373 4.7217
##
## Random effects:
## Groups Name Variance Std.Dev.
## date (Intercept) 8324501 2885
## river_id (Intercept) 85177225 9229
## Residual 97616643 9880
## Number of obs: 253, groups: date, 43; river_id, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## urban_population 1.305e+04 5.155e+03 9.676e-01 2.532 0.246
## tech_rank1 2.828e+04 8.420e+03 9.596e-01 3.358 0.193
## tech_rank2 8.855e+03 7.316e+03 1.002e+00 1.210 0.439
## tech_rank3 -1.329e+03 1.022e+04 1.022e+00 -0.130 0.917
## waste_picker_rank2 -2.554e+04 1.056e+04 9.607e-01 -2.420 0.258
## waste_picker_rank3 1.344e+04 1.037e+04 9.980e-01 1.297 0.419
## river_length_km -4.704e+03 4.534e+03 9.780e-01 -1.038 0.491
##
## Correlation of Fixed Effects:
## urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1 0.287
## tech_rank2 -0.081 0.276
## tech_rank3 0.083 0.175 0.380
## wst_pckr_r2 -0.520 -0.606 -0.403 -0.175
## wst_pckr_r3 0.359 -0.106 -0.653 -0.602 0.066
## rvr_lngth_k -0.138 0.066 0.172 0.607 0.069 -0.526
All Debris LMER (river width)
summary(all_debris_model_width)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: all_debris ~ (1 | date) + (1 | river_id) + urban_population +
## tech_rank + waste_picker_rank + river_width_m - 1
## Data: fig2_glmm
##
## REML criterion at convergence: 5430.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9587 -0.2669 -0.0121 0.1670 3.7092
##
## Random effects:
## Groups Name Variance Std.Dev.
## date (Intercept) 19401676 4405
## river_id (Intercept) 181651928 13478
## Residual 182862666 13523
## Number of obs: 253, groups: date, 43; river_id, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## urban_population 1.787e+04 7.656e+03 9.776e-01 2.334 0.262
## tech_rank1 3.838e+04 1.293e+04 9.711e-01 2.969 0.213
## tech_rank2 1.649e+04 1.050e+04 1.010e+00 1.571 0.359
## tech_rank3 5.069e+03 1.529e+04 1.034e+00 0.332 0.795
## waste_picker_rank2 -3.527e+04 1.537e+04 9.684e-01 -2.295 0.268
## waste_picker_rank3 1.331e+04 1.571e+04 1.004e+00 0.847 0.552
## river_width_m -4.649e+03 8.454e+03 9.892e-01 -0.550 0.681
##
## Correlation of Fixed Effects:
## urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1 0.351
## tech_rank2 -0.056 0.256
## tech_rank3 0.308 0.326 0.275
## wst_pckr_r2 -0.491 -0.566 -0.422 -0.183
## wst_pckr_r3 0.135 -0.249 -0.550 -0.629 0.073
## rivr_wdth_m 0.235 0.319 0.004 0.633 0.045 -0.577
All Debris LMER (river length)
summary(all_debris_model_length)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: all_debris ~ (1 | date) + (1 | river_id) + urban_population +
## tech_rank + waste_picker_rank + river_length_km - 1
## Data: fig2_glmm
##
## REML criterion at convergence: 5430.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9573 -0.2788 -0.0114 0.1717 3.7077
##
## Random effects:
## Groups Name Variance Std.Dev.
## date (Intercept) 19597343 4427
## river_id (Intercept) 103531761 10175
## Residual 182740127 13518
## Number of obs: 253, groups: date, 43; river_id, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## urban_population 1.972e+04 5.735e+03 9.493e-01 3.439 0.191
## tech_rank1 3.994e+04 9.362e+03 9.395e-01 4.266 0.159
## tech_rank2 1.493e+04 8.182e+03 1.004e+00 1.825 0.318
## tech_rank3 2.652e+03 1.146e+04 1.032e+00 0.231 0.854
## waste_picker_rank2 -3.577e+04 1.173e+04 9.390e-01 -3.048 0.215
## waste_picker_rank3 1.508e+04 1.158e+04 9.946e-01 1.302 0.418
## river_length_km -5.588e+03 5.051e+03 9.647e-01 -1.106 0.473
##
## Correlation of Fixed Effects:
## urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1 0.285
## tech_rank2 -0.085 0.278
## tech_rank3 0.080 0.176 0.383
## wst_pckr_r2 -0.518 -0.604 -0.403 -0.175
## wst_pckr_r3 0.361 -0.106 -0.652 -0.604 0.067
## rvr_lngth_k -0.137 0.066 0.170 0.606 0.070 -0.526
Proportion Debris LMER (river width)
summary(prop_debris_model_width)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## plastic_debris/all_debris ~ (1 | date) + (1 | river_id) + urban_population +
## tech_rank + waste_picker_rank + river_width_m - 1
## Data: fig2_glmm
##
## REML criterion at convergence: -4.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4734 -0.5398 0.0206 0.3629 9.0973
##
## Random effects:
## Groups Name Variance Std.Dev.
## date (Intercept) 0.00000 0.0000
## river_id (Intercept) 0.08493 0.2914
## Residual 0.05016 0.2240
## Number of obs: 253, groups: date, 43; river_id, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## urban_population -0.01167 0.16445 0.98908 -0.071 0.955
## tech_rank1 0.82551 0.27767 0.98241 2.973 0.210
## tech_rank2 0.49308 0.22439 1.00157 2.197 0.272
## tech_rank3 0.40551 0.32628 1.01959 1.243 0.428
## waste_picker_rank2 0.08242 0.33044 0.98385 0.249 0.845
## waste_picker_rank3 0.05642 0.33649 1.00463 0.168 0.894
## river_width_m 0.03632 0.18139 0.99633 0.200 0.874
##
## Correlation of Fixed Effects:
## urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1 0.352
## tech_rank2 -0.052 0.254
## tech_rank3 0.311 0.324 0.272
## wst_pckr_r2 -0.492 -0.567 -0.423 -0.184
## wst_pckr_r3 0.134 -0.248 -0.551 -0.627 0.072
## rivr_wdth_m 0.234 0.318 0.005 0.634 0.045 -0.577
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Proportion Debris LMER (river length)
summary(prop_debris_model_length)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## plastic_debris/all_debris ~ (1 | date) + (1 | river_id) + urban_population +
## tech_rank + waste_picker_rank + river_length_km - 1
## Data: fig2_glmm
##
## REML criterion at convergence: -4.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4778 -0.5402 0.0254 0.3590 9.0929
##
## Random effects:
## Groups Name Variance Std.Dev.
## date (Intercept) 0.00000 0.0000
## river_id (Intercept) 0.08682 0.2947
## Residual 0.05016 0.2240
## Number of obs: 253, groups: date, 43; river_id, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## urban_population -0.01634 0.16320 0.98482 -0.100 0.937
## tech_rank1 0.80542 0.26668 0.97817 3.020 0.208
## tech_rank2 0.48748 0.23034 0.99734 2.116 0.282
## tech_rank3 0.33799 0.32132 1.01066 1.052 0.482
## waste_picker_rank2 0.07634 0.33449 0.98136 0.228 0.858
## waste_picker_rank3 0.11833 0.32694 1.00045 0.362 0.779
## river_length_km -0.01921 0.14334 0.99036 -0.134 0.915
##
## Correlation of Fixed Effects:
## urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1 0.288
## tech_rank2 -0.078 0.273
## tech_rank3 0.086 0.173 0.377
## wst_pckr_r2 -0.521 -0.607 -0.404 -0.176
## wst_pckr_r3 0.358 -0.106 -0.653 -0.600 0.066
## rvr_lngth_k -0.138 0.067 0.174 0.608 0.069 -0.527
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
ANOVA Setup
Want to explore whether plastic and total debris are significantly different across sites
Option 1: ANOVA by location
Option 2: LMER with date as random effect, pairwise test and compare across all sites where the reference level is set for each river
Of the options, the ANOVA is the easiest to interpret and will be the most natural fit, and will therefore be using Option 1
Run ANOVA scenarios
#ANOVA for plastic debris by river
anova_plastic <- aov(plastic_debris ~ river_id, data = fig2_glmm)
summary(anova_plastic)
#ANOVA for all debris by river
anova_debris <- aov(all_debris ~ river_id, data = fig2_glmm)
summary(anova_debris)
ANOVA results
Plastic Debris ANOVA
summary(anova_plastic)
## Df Sum Sq Mean Sq F value Pr(>F)
## river_id 7 3.803e+10 5.433e+09 51.48 <2e-16 ***
## Residuals 245 2.586e+10 1.055e+08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
EtaSq(anova_plastic)
## eta.sq eta.sq.part
## river_id 0.5952577 0.5952577
# Extract the p-value
p_value_plastic <- summary(anova_plastic)[[1]][["Pr(>F)"]][1]
print(p_value_plastic)
## [1] 1.08712e-44
Plastic debris collected across sites differs significantly (p = 1.08712e-44).
All Debris ANOVA
summary(anova_debris)
## Df Sum Sq Mean Sq F value Pr(>F)
## river_id 7 7.430e+10 1.061e+10 52.98 <2e-16 ***
## Residuals 245 4.908e+10 2.003e+08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
EtaSq(anova_debris)
## eta.sq eta.sq.part
## river_id 0.6021763 0.6021763
# Extract the p-value
p_value_debris <- summary(anova_debris)[[1]][["Pr(>F)"]][1]
print(p_value_debris)
## [1] 1.353249e-45
Total debris collected across sites differs significantly (p = 1.353249e-45).
Chi Square Setup
Prepare Chi Square contingency tables
#Using proportion data for Figures 3, 4, and 5
#Load data
fig3_data <- read_csv("fig3_data.csv")
fig4_data <- read_csv("fig4_data.csv")
fig5_data <- read_csv("fig5_data.csv")
#Creating a contingency table of polymer proportions
fig3_data_contingency <- fig3_data %>%
select(
country, polymer, polymer_proportions
) %>%
pivot_wider(
names_from = "polymer",
values_from = "polymer_proportions"
) %>%
select(
!country
)
#Multiplying the proportions by 100 (%)
fig3_data_contingency <- round(mutate_if(fig3_data_contingency, is.numeric, ~ . * 100),1)
#Creating a contingency table of single-use item proportions
fig4_data_contingency <- fig4_data %>%
select(
country, item, item_proportions
) %>%
pivot_wider(
names_from = "item",
values_from = "item_proportions"
) %>%
select(
!country
)
#Multiplying the proportions by 100 (%)
fig4_data_contingency <- round(mutate_if(fig4_data_contingency, is.numeric, ~ . * 100),1)
#Creating a contingency table of end-of-life fate proportions
fig5_data_contingency <- fig5_data %>%
select(
country, fate, fate_proportions
) %>%
pivot_wider(
names_from = "fate",
values_from = "fate_proportions"
) %>%
select(
!country
)
#Multiplying the proportions by 100 (%)
fig5_data_contingency <- round(mutate_if(fig5_data_contingency, is.numeric, ~ . * 100),1)
Run Chi Square tests
fig3_chi <- chisq.test(fig3_data_contingency)
fig4_chi <- chisq.test(fig4_data_contingency)
fig5_chi <- chisq.test(fig5_data_contingency)
Chi Square results
Polymer Proportion Chi Square
fig3_chi
##
## Pearson's Chi-squared test
##
## data: fig3_data_contingency
## X-squared = 751.39, df = 42, p-value < 2.2e-16
The polymer proportions across sites are significantly different (p = 9.388124e-131).
Single-Use Item Proportion Chi Square
fig4_chi
##
## Pearson's Chi-squared test
##
## data: fig4_data_contingency
## X-squared = 124.48, df = 12, p-value < 2.2e-16
The single-use item proportions across sites are significantly different (p = 7.86554e-21).
End-of-Life Fate Proportion Chi Square
fig5_chi
##
## Pearson's Chi-squared test
##
## data: fig5_data_contingency
## X-squared = 1619, df = 28, p-value < 2.2e-16
The end-of-life fate proportions across sites are significantly different (p = 4.940656e-324)