Packages

library(tidyverse)
library(corrplot)
library(lmerTest)
library(lme4)
library(vegan)
library(DescTools)

Datasets

fig2_data.csv

fig2_glmm.csv

fig3_data.csv

fig4_data.csv

fig5_data.csv

Figure 2. Total Debris & Plastic Debris Collected Statistics

GLMM Setup

  1. Plastic Debris
  2. Total Debris
  3. Plastic Debris:Total Debris

Check for collinearity

#Using the monthly means Figure 2 data to make a matrix for a correlation plot of the predictor variables

#Load data
fig2_data <- read_csv("fig2_data.csv")

#Creating data frame to use filtering for plastic debris to remove duplicates and keeping only the predictor variables
fig2_cor <- fig2_data %>%
  filter(
    debris_type == "plastic_debris"
  ) %>% 
  select(urban_population, tech_rank, waste_picker_rank, river_width_m, river_length)

#Making river length km from m
fig2_cor$river_length <- fig2_cor$river_length/1000

#Creating a matrix                       
fig2_cor <- as.matrix(fig2_cor)

#Making the correlation plot
cor(fig2_cor)
corrplot(cor(fig2_cor))

River length and river width appear to be highly collinear (>88%). Therefore, we will run two GLMMs for each debris scenario: one that includes river width, and one that includes river length, but not both together.

Run GLMM scenarios

#Using the monthly totals Figure 2 data with the predictor variables included

#Load data
fig2_glmm <- read_csv("fig2_glmm.csv")

#Checking the variable classes in the data frame. Technology ranking and waste picker ranking are categorical variables and therefore should be factors
sapply(fig2_glmm, class)
fig2_glmm$tech_rank <- as.factor(fig2_glmm$tech_rank)
fig2_glmm$waste_picker_rank <- as.factor(fig2_glmm$waste_picker_rank)

#Z-Score the continuous variables because if not, we receive a warning that we should consider rescaling
fig2_glmm$urban_population <- scale(fig2_glmm$urban_population)
fig2_glmm$river_width_m <- scale(fig2_glmm$river_width_m)
fig2_glmm$river_length_km <- scale(fig2_glmm$river_length_km)

#Plastic debris scenario (adding -1 at the end of the model removes the fitting to an intercept)
plastic_debris_model_width <- lmer(plastic_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_width_m-1, data=fig2_glmm) #river width
plastic_debris_model_length <- lmer(plastic_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_length_km-1, data=fig2_glmm) #river length
summary(plastic_debris_model_width)
summary(plastic_debris_model_length)

#All debris scenario
all_debris_model_width <- lmer(all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_width_m-1, data=fig2_glmm) #river width
all_debris_model_length <- lmer(all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_length_km-1, data=fig2_glmm) #river length
summary(all_debris_model_width)
summary(all_debris_model_length)

#Proportion of plastic:all debris scenario
prop_debris_model_width <- lmer(plastic_debris/all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_width_m-1, data=fig2_glmm) #river width
prop_debris_model_length <- lmer(plastic_debris/all_debris ~ (1|date) + (1|river_id) + urban_population + tech_rank + waste_picker_rank + river_length_km-1, data=fig2_glmm) #river length
summary(prop_debris_model_width)
summary(prop_debris_model_length)

#There is a warning when running these models if scale function not added to continuous variables before running: "some predictor variables are on very different scales: consider rescaling"

plastic_debris <- lmer(plastic_debris ~ (1|date) + river_id, data=fig2_glmm)
summary(plastic_debris)

anova <- aov(plastic_debris ~ river_id, data = fig2_glmm)
summary(anova)

GLMM results

Across all models, there are no significant effects from any of our predictor variables.

Plastic Debris LMER (river width)

summary(plastic_debris_model_width)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: plastic_debris ~ (1 | date) + (1 | river_id) + urban_population +  
##     tech_rank + waste_picker_rank + river_width_m - 1
##    Data: fig2_glmm
## 
## REML criterion at convergence: 5273.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8233 -0.2720  0.0179  0.1364  4.7218 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  date     (Intercept)   8269231  2876   
##  river_id (Intercept) 142757718 11948   
##  Residual              97654069  9882   
## Number of obs: 253, groups:  date, 43; river_id, 8
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)
## urban_population    1.152e+04  6.753e+03  9.856e-01   1.705    0.340
## tech_rank1          2.702e+04  1.141e+04  9.804e-01   2.369    0.258
## tech_rank2          1.016e+04  9.233e+03  1.006e+00   1.100    0.469
## tech_rank3          8.080e+02  1.342e+04  1.024e+00   0.060    0.962
## waste_picker_rank2 -2.511e+04  1.356e+04  9.793e-01  -1.851    0.319
## waste_picker_rank3  1.184e+04  1.383e+04  1.004e+00   0.856    0.549
## river_width_m      -3.798e+03  7.450e+03  9.937e-01  -0.510    0.700
## 
## Correlation of Fixed Effects:
##             urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1   0.352                                   
## tech_rank2  -0.053  0.256                            
## tech_rank3   0.310  0.325  0.273                     
## wst_pckr_r2 -0.492 -0.567 -0.422 -0.183              
## wst_pckr_r3  0.134 -0.248 -0.550 -0.628  0.072       
## rivr_wdth_m  0.234  0.318  0.005  0.633  0.045 -0.576

Plastic Debris LMER (river length)

summary(plastic_debris_model_length)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: plastic_debris ~ (1 | date) + (1 | river_id) + urban_population +  
##     tech_rank + waste_picker_rank + river_length_km - 1
##    Data: fig2_glmm
## 
## REML criterion at convergence: 5273.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8227 -0.2743  0.0197  0.1373  4.7217 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  date     (Intercept)  8324501 2885    
##  river_id (Intercept) 85177225 9229    
##  Residual             97616643 9880    
## Number of obs: 253, groups:  date, 43; river_id, 8
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)
## urban_population    1.305e+04  5.155e+03  9.676e-01   2.532    0.246
## tech_rank1          2.828e+04  8.420e+03  9.596e-01   3.358    0.193
## tech_rank2          8.855e+03  7.316e+03  1.002e+00   1.210    0.439
## tech_rank3         -1.329e+03  1.022e+04  1.022e+00  -0.130    0.917
## waste_picker_rank2 -2.554e+04  1.056e+04  9.607e-01  -2.420    0.258
## waste_picker_rank3  1.344e+04  1.037e+04  9.980e-01   1.297    0.419
## river_length_km    -4.704e+03  4.534e+03  9.780e-01  -1.038    0.491
## 
## Correlation of Fixed Effects:
##             urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1   0.287                                   
## tech_rank2  -0.081  0.276                            
## tech_rank3   0.083  0.175  0.380                     
## wst_pckr_r2 -0.520 -0.606 -0.403 -0.175              
## wst_pckr_r3  0.359 -0.106 -0.653 -0.602  0.066       
## rvr_lngth_k -0.138  0.066  0.172  0.607  0.069 -0.526

All Debris LMER (river width)

summary(all_debris_model_width)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: all_debris ~ (1 | date) + (1 | river_id) + urban_population +  
##     tech_rank + waste_picker_rank + river_width_m - 1
##    Data: fig2_glmm
## 
## REML criterion at convergence: 5430.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9587 -0.2669 -0.0121  0.1670  3.7092 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  date     (Intercept)  19401676  4405   
##  river_id (Intercept) 181651928 13478   
##  Residual             182862666 13523   
## Number of obs: 253, groups:  date, 43; river_id, 8
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)
## urban_population    1.787e+04  7.656e+03  9.776e-01   2.334    0.262
## tech_rank1          3.838e+04  1.293e+04  9.711e-01   2.969    0.213
## tech_rank2          1.649e+04  1.050e+04  1.010e+00   1.571    0.359
## tech_rank3          5.069e+03  1.529e+04  1.034e+00   0.332    0.795
## waste_picker_rank2 -3.527e+04  1.537e+04  9.684e-01  -2.295    0.268
## waste_picker_rank3  1.331e+04  1.571e+04  1.004e+00   0.847    0.552
## river_width_m      -4.649e+03  8.454e+03  9.892e-01  -0.550    0.681
## 
## Correlation of Fixed Effects:
##             urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1   0.351                                   
## tech_rank2  -0.056  0.256                            
## tech_rank3   0.308  0.326  0.275                     
## wst_pckr_r2 -0.491 -0.566 -0.422 -0.183              
## wst_pckr_r3  0.135 -0.249 -0.550 -0.629  0.073       
## rivr_wdth_m  0.235  0.319  0.004  0.633  0.045 -0.577

All Debris LMER (river length)

summary(all_debris_model_length)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: all_debris ~ (1 | date) + (1 | river_id) + urban_population +  
##     tech_rank + waste_picker_rank + river_length_km - 1
##    Data: fig2_glmm
## 
## REML criterion at convergence: 5430.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9573 -0.2788 -0.0114  0.1717  3.7077 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  date     (Intercept)  19597343  4427   
##  river_id (Intercept) 103531761 10175   
##  Residual             182740127 13518   
## Number of obs: 253, groups:  date, 43; river_id, 8
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)
## urban_population    1.972e+04  5.735e+03  9.493e-01   3.439    0.191
## tech_rank1          3.994e+04  9.362e+03  9.395e-01   4.266    0.159
## tech_rank2          1.493e+04  8.182e+03  1.004e+00   1.825    0.318
## tech_rank3          2.652e+03  1.146e+04  1.032e+00   0.231    0.854
## waste_picker_rank2 -3.577e+04  1.173e+04  9.390e-01  -3.048    0.215
## waste_picker_rank3  1.508e+04  1.158e+04  9.946e-01   1.302    0.418
## river_length_km    -5.588e+03  5.051e+03  9.647e-01  -1.106    0.473
## 
## Correlation of Fixed Effects:
##             urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1   0.285                                   
## tech_rank2  -0.085  0.278                            
## tech_rank3   0.080  0.176  0.383                     
## wst_pckr_r2 -0.518 -0.604 -0.403 -0.175              
## wst_pckr_r3  0.361 -0.106 -0.652 -0.604  0.067       
## rvr_lngth_k -0.137  0.066  0.170  0.606  0.070 -0.526

Proportion Debris LMER (river width)

summary(prop_debris_model_width)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## plastic_debris/all_debris ~ (1 | date) + (1 | river_id) + urban_population +  
##     tech_rank + waste_picker_rank + river_width_m - 1
##    Data: fig2_glmm
## 
## REML criterion at convergence: -4.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4734 -0.5398  0.0206  0.3629  9.0973 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  date     (Intercept) 0.00000  0.0000  
##  river_id (Intercept) 0.08493  0.2914  
##  Residual             0.05016  0.2240  
## Number of obs: 253, groups:  date, 43; river_id, 8
## 
## Fixed effects:
##                    Estimate Std. Error       df t value Pr(>|t|)
## urban_population   -0.01167    0.16445  0.98908  -0.071    0.955
## tech_rank1          0.82551    0.27767  0.98241   2.973    0.210
## tech_rank2          0.49308    0.22439  1.00157   2.197    0.272
## tech_rank3          0.40551    0.32628  1.01959   1.243    0.428
## waste_picker_rank2  0.08242    0.33044  0.98385   0.249    0.845
## waste_picker_rank3  0.05642    0.33649  1.00463   0.168    0.894
## river_width_m       0.03632    0.18139  0.99633   0.200    0.874
## 
## Correlation of Fixed Effects:
##             urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1   0.352                                   
## tech_rank2  -0.052  0.254                            
## tech_rank3   0.311  0.324  0.272                     
## wst_pckr_r2 -0.492 -0.567 -0.423 -0.184              
## wst_pckr_r3  0.134 -0.248 -0.551 -0.627  0.072       
## rivr_wdth_m  0.234  0.318  0.005  0.634  0.045 -0.577
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Proportion Debris LMER (river length)

summary(prop_debris_model_length)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## plastic_debris/all_debris ~ (1 | date) + (1 | river_id) + urban_population +  
##     tech_rank + waste_picker_rank + river_length_km - 1
##    Data: fig2_glmm
## 
## REML criterion at convergence: -4.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4778 -0.5402  0.0254  0.3590  9.0929 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  date     (Intercept) 0.00000  0.0000  
##  river_id (Intercept) 0.08682  0.2947  
##  Residual             0.05016  0.2240  
## Number of obs: 253, groups:  date, 43; river_id, 8
## 
## Fixed effects:
##                    Estimate Std. Error       df t value Pr(>|t|)
## urban_population   -0.01634    0.16320  0.98482  -0.100    0.937
## tech_rank1          0.80542    0.26668  0.97817   3.020    0.208
## tech_rank2          0.48748    0.23034  0.99734   2.116    0.282
## tech_rank3          0.33799    0.32132  1.01066   1.052    0.482
## waste_picker_rank2  0.07634    0.33449  0.98136   0.228    0.858
## waste_picker_rank3  0.11833    0.32694  1.00045   0.362    0.779
## river_length_km    -0.01921    0.14334  0.99036  -0.134    0.915
## 
## Correlation of Fixed Effects:
##             urbn_p tch_r1 tch_r2 tch_r3 wst__2 wst__3
## tech_rank1   0.288                                   
## tech_rank2  -0.078  0.273                            
## tech_rank3   0.086  0.173  0.377                     
## wst_pckr_r2 -0.521 -0.607 -0.404 -0.176              
## wst_pckr_r3  0.358 -0.106 -0.653 -0.600  0.066       
## rvr_lngth_k -0.138  0.067  0.174  0.608  0.069 -0.527
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

ANOVA Setup

  1. Plastic Debris
  2. Total Debris

Run ANOVA scenarios

#ANOVA for plastic debris by river
anova_plastic <- aov(plastic_debris ~ river_id, data = fig2_glmm)
summary(anova_plastic)

#ANOVA for all debris by river
anova_debris <- aov(all_debris ~ river_id, data = fig2_glmm)
summary(anova_debris)

ANOVA results

Plastic Debris ANOVA

summary(anova_plastic)
##              Df    Sum Sq   Mean Sq F value Pr(>F)    
## river_id      7 3.803e+10 5.433e+09   51.48 <2e-16 ***
## Residuals   245 2.586e+10 1.055e+08                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
EtaSq(anova_plastic)
##             eta.sq eta.sq.part
## river_id 0.5952577   0.5952577
# Extract the p-value
p_value_plastic <- summary(anova_plastic)[[1]][["Pr(>F)"]][1]
print(p_value_plastic)
## [1] 1.08712e-44

Plastic debris collected across sites differs significantly (p = 1.08712e-44).

All Debris ANOVA

summary(anova_debris)
##              Df    Sum Sq   Mean Sq F value Pr(>F)    
## river_id      7 7.430e+10 1.061e+10   52.98 <2e-16 ***
## Residuals   245 4.908e+10 2.003e+08                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
EtaSq(anova_debris)
##             eta.sq eta.sq.part
## river_id 0.6021763   0.6021763
# Extract the p-value
p_value_debris <- summary(anova_debris)[[1]][["Pr(>F)"]][1]
print(p_value_debris)
## [1] 1.353249e-45

Total debris collected across sites differs significantly (p = 1.353249e-45).

Figure 3. Polymer Composition of Plastic Debris Collected

Figure 4. Proportion of Single-Use Plastic Item Categories in Plastic Debris Collected

Figure 5. End-of-Life Fate of Plastic Debris Collected

Chi Square Setup

  1. Polymer Proportions across sites
  2. Single-Use Item Proportions across subset of sites
  3. End-of-Life Fate Proportions across sites

Prepare Chi Square contingency tables

#Using proportion data for Figures 3, 4, and 5

#Load data
fig3_data <- read_csv("fig3_data.csv")
fig4_data <- read_csv("fig4_data.csv")
fig5_data <- read_csv("fig5_data.csv")

#Creating a contingency table of polymer proportions
fig3_data_contingency <- fig3_data %>%
  select(
    country, polymer, polymer_proportions
  ) %>% 
  pivot_wider(
    names_from = "polymer",
    values_from = "polymer_proportions"
  ) %>% 
  select(
    !country
  )

#Multiplying the proportions by 100 (%)
fig3_data_contingency <- round(mutate_if(fig3_data_contingency, is.numeric, ~ . * 100),1)

#Creating a contingency table of single-use item proportions
fig4_data_contingency <- fig4_data %>%
  select(
    country, item, item_proportions
  ) %>% 
  pivot_wider(
    names_from = "item",
    values_from = "item_proportions"
  ) %>% 
  select(
    !country
  )

#Multiplying the proportions by 100 (%)
fig4_data_contingency <- round(mutate_if(fig4_data_contingency, is.numeric, ~ . * 100),1)

#Creating a contingency table of end-of-life fate proportions
fig5_data_contingency <- fig5_data %>%
  select(
    country, fate, fate_proportions
  ) %>% 
  pivot_wider(
    names_from = "fate",
    values_from = "fate_proportions"
  )  %>% 
  select(
    !country
  )

#Multiplying the proportions by 100 (%)
fig5_data_contingency <- round(mutate_if(fig5_data_contingency, is.numeric, ~ . * 100),1)

Run Chi Square tests

fig3_chi <- chisq.test(fig3_data_contingency)
fig4_chi <- chisq.test(fig4_data_contingency)
fig5_chi <- chisq.test(fig5_data_contingency)

Chi Square results

Polymer Proportion Chi Square

fig3_chi
## 
##  Pearson's Chi-squared test
## 
## data:  fig3_data_contingency
## X-squared = 751.39, df = 42, p-value < 2.2e-16

The polymer proportions across sites are significantly different (p = 9.388124e-131).

Single-Use Item Proportion Chi Square

fig4_chi
## 
##  Pearson's Chi-squared test
## 
## data:  fig4_data_contingency
## X-squared = 124.48, df = 12, p-value < 2.2e-16

The single-use item proportions across sites are significantly different (p = 7.86554e-21).

End-of-Life Fate Proportion Chi Square

fig5_chi
## 
##  Pearson's Chi-squared test
## 
## data:  fig5_data_contingency
## X-squared = 1619, df = 28, p-value < 2.2e-16

The end-of-life fate proportions across sites are significantly different (p = 4.940656e-324)