Packages

library(tidyverse)
library(dplyr)
library(zoo)
library(devtools)
library(fastR2)
library(ggplot2)

Datasets

icc_data.csv

fig4_data.csv

Manage and Wrangle Coastal Cleanup Data (icc_data.csv) to Analysis-Ready Format (final_icc_dataset.csv)

Wrangle coastal cleanup data to same format as fig4_data.csv

#Load data
icc_data <- read_csv("icc_data.csv")
fig4_data <- read_csv("fig4_data.csv")

#Reclassify columns as necessary (date, numeric)
icc_data$cleanup_date <- as.yearmon(icc_data$cleanup_date)
icc_data$bottles_sum <- as.numeric(icc_data$bottles_sum)
icc_data$bag_sum <- as.numeric(icc_data$bag_sum)
icc_data$wrapper_sum <- as.numeric(icc_data$wrapper_sum)
icc_data$foam_sum <- as.numeric(icc_data$foam_sum)
icc_data$other_sum <- as.numeric(icc_data$other_sum)
icc_data$total_weight <- as.numeric(icc_data$total_weight)

#Filter for rows with data
icc_data <- icc_data %>% 
  rowwise() %>% 
  filter(
    total_weight > 0
  )

#Combine items for each month and location so there is one entry per month per location (not multiple for multiple devices)  
icc_data <- icc_data %>% 
  group_by(cleanup_date, country) %>% 
    summarise(
      plastic_item_bottles = sum(bottles_sum, na.rm = TRUE), 
      plastic_item_bags = sum(bag_sum, na.rm = TRUE), 
      plastic_item_wrappers = sum(wrapper_sum, na.rm = TRUE),
      plastic_item_foam = sum(foam_sum, na.rm = TRUE),
      plastic_item_other = sum(other_sum, na.rm = TRUE),
      plastic_debris_items_total = sum(total_weight, na.rm = TRUE)
    )

#Pivot so each location and month has a row for all items
icc_data <- icc_data %>% 
  pivot_longer(
    cols = plastic_item_bottles:plastic_debris_items_total,
    names_to = "item",
    values_to = "mass_kg"
  )

#Reclassify columns as needed (numeric, factor, date)
icc_data$mass_kg <- as.numeric(icc_data$mass_kg)
icc_data$item <- as.factor(icc_data$item)
icc_data$cleanup_date <- as.yearmon(icc_data$cleanup_date)

#Summarize to get total weights for items at all locations
icc_data <- icc_data %>% 
  group_by(country, item) %>% 
  summarise(
    item_total_mass = sum(mass_kg, na.rm = TRUE), #total mass of each item at each location
    )

#Calculate a proportion for each item at each location based on the total of that item/total of all plastic. 
#Calculating this using the monthly mean/monthly total yields exact same results.
icc_data <- icc_data %>% 
  group_by(country) %>% 
  mutate(
    item_proportions = item_total_mass / item_total_mass[item == "plastic_debris_items_total"])

#Calculate Wilson score intervals for 95% confidence intervals

#Mutate new column for "trials" = total plastic weight for each country
icc_data <- icc_data %>% 
  group_by(country) %>%
  mutate(trials = item_total_mass[item == "plastic_debris_items_total"])

#Make sure necessary columns are numeric
icc_data$item_total_mass <- as.numeric(icc_data$item_total_mass)
icc_data$trials <- as.numeric(icc_data$trials)

#Run Wilson test where the total weight of the item at each country is the success out the trials, which is the total kg weight for that country
icc_data <- icc_data %>% 
  ungroup() %>% 
  rowwise() %>%
  mutate(
    wilson_lower = wilson.ci(item_total_mass, n = trials, conf.level = 0.95)[1],
    wilson_upper = wilson.ci(item_total_mass, n = trials, conf.level = 0.95)[2])

#Select and arrange columns
icc_data <- icc_data[,c(1,2,3,4,6,7)]

#Remove total summed computational rows
icc_data <- icc_data %>% 
  filter(
    item != "plastic_debris_items_total"
  )

Coastal Cleanup Data Visualizations

Coastal Cleanup Data Only

#Change item to class(factor)
icc_data$item <- as.factor(icc_data$item)

#Set levels of item factor for proper stacked bar graph order (y-axis)
icc_data$item <- factor(icc_data$item, levels = c("plastic_item_other", "plastic_item_foam", "plastic_item_bottles", "plastic_item_wrappers", "plastic_item_bags"))

#Change country to class(factor)
icc_data$country <- as.factor(icc_data$country)

#Set levels of country factor for proper x-axis order
icc_data$country <- factor(icc_data$country, levels = c("Vietnam", "Ecuador", "Indonesia", "Kenya"))

#Create figure in ggplot
figure_coastalcleanup <- ggplot(icc_data, aes(fill = item, y = item_proportions, x = country, group = item)) + #country on x-axis, item props on y-axis, box color filled based on item type, grouped by item because we're going to stack the boxes by country
  geom_bar(stat = "identity", color = "#636363", linewidth = 0.4, alpha = 0.7) + #stack item proportions (y-axis) for each country (x-axis)
  theme_classic() +
  scale_fill_manual(values = c("#DEDEDE", "#00a37d","#18a94e", "#5eb200","#d6ca00")) + #color blind friendly pallet emphasizing plastic bags with standout color
  theme(axis.text.x = element_blank()) +
  theme(axis.title.x = element_blank()) +
  theme(axis.title.y = element_blank()) +
  theme(legend.position = "none") + #keeping everything blank because these details are designed in Canva
  theme(axis.text.y = element_text(face = "bold", colour = "#d6ca00")) +
  scale_y_continuous(labels = scales::label_percent(), breaks = seq(0,1, .1), limits = c(0, 1.0000000001), expand = c(0.005,0.005))

figure_coastalcleanup

Coastal Cleanup and River Side-by-Side

#Mutate new column for coastal cleanup data ID
icc_data <- icc_data %>% 
  mutate(source = 'coast')

#Load river data
fig4_data <- read_csv("fig4_data.csv")

#Mutate new column for river data ID
fig4_data <- fig4_data %>% 
  mutate(source = 'river')

#Remove river_id and bind coastal and river datasets
data_combined <- fig4_data %>% 
  select(-river_id) %>% 
  bind_rows(icc_data)

#Select only columns necessary
data_combined <- data_combined %>% 
  select(country, item, item_total_mass, item_proportions, source)

#Recalssify source and item as factors
data_combined$source <- as.factor(data_combined$source)
data_combined$item <- as.factor(data_combined$item)

#Set item factor order
data_combined$item <- factor(data_combined$item, levels = c("plastic_item_other", "plastic_item_foam", "plastic_item_bottles", "plastic_item_wrappers", "plastic_item_bags"))

#Recalssicy country as factor and set factor levels
data_combined$country <- as.factor(data_combined$country)
data_combined$country <- factor(data_combined$country, levels = c("Vietnam", "Ecuador", "Indonesia", "Kenya"))

#Create figure in ggplot
figure_coastalcleanup_river <- ggplot(data_combined) + 
  geom_bar(aes(fill = item, y = item_proportions, x = source), color = "#636363", stat = 'identity', position='stack',  linewidth = 0.4, alpha = 0.7) + #country on x-axis, item props on y-axis, box color filled based on item type, grouped by item because we're going to stack the boxes by country
  facet_grid(~country, switch = "x") + #coast and river side-by-side
  theme(strip.placement = "outside",
        strip.background = element_rect(fill = NA, color = "white"),
        panel.spacing = unit(-.01,"cm")) +
theme_classic() +
  scale_fill_manual(values = c("#DEDEDE", "#00a37d","#18a94e", "#5eb200","#d6ca00"), name = "Item Category", labels = c("Other", "Food & Packaging Foam", "Beverage Bottles", "Food Wrappers", "Grocery & Trash Bags")) +
  theme(axis.title.x = element_blank()) +
  theme(axis.title.y = element_blank()) +
  theme(axis.text.y = element_text(face = "bold", colour = "#d6ca00")) +
  scale_y_continuous(labels = scales::label_percent(), breaks = seq(0,1, .1), limits = c(0, 1.0000000001), expand = c(0.005,0.005)) +
  geom_vline(xintercept = c(3.5), size = 2, colour = "black", lty = 5)

figure_coastalcleanup_river