Packages
library(tidyverse)
library(dplyr)
library(zoo)
library(devtools)
library(fastR2)
library(ggplot2)
Datasets
icc_data.csv
fig4_data.csv
Wrangle coastal cleanup data to same format as fig4_data.csv
#Load data
icc_data <- read_csv("icc_data.csv")
fig4_data <- read_csv("fig4_data.csv")
#Reclassify columns as necessary (date, numeric)
icc_data$cleanup_date <- as.yearmon(icc_data$cleanup_date)
icc_data$bottles_sum <- as.numeric(icc_data$bottles_sum)
icc_data$bag_sum <- as.numeric(icc_data$bag_sum)
icc_data$wrapper_sum <- as.numeric(icc_data$wrapper_sum)
icc_data$foam_sum <- as.numeric(icc_data$foam_sum)
icc_data$other_sum <- as.numeric(icc_data$other_sum)
icc_data$total_weight <- as.numeric(icc_data$total_weight)
#Filter for rows with data
icc_data <- icc_data %>%
rowwise() %>%
filter(
total_weight > 0
)
#Combine items for each month and location so there is one entry per month per location (not multiple for multiple devices)
icc_data <- icc_data %>%
group_by(cleanup_date, country) %>%
summarise(
plastic_item_bottles = sum(bottles_sum, na.rm = TRUE),
plastic_item_bags = sum(bag_sum, na.rm = TRUE),
plastic_item_wrappers = sum(wrapper_sum, na.rm = TRUE),
plastic_item_foam = sum(foam_sum, na.rm = TRUE),
plastic_item_other = sum(other_sum, na.rm = TRUE),
plastic_debris_items_total = sum(total_weight, na.rm = TRUE)
)
#Pivot so each location and month has a row for all items
icc_data <- icc_data %>%
pivot_longer(
cols = plastic_item_bottles:plastic_debris_items_total,
names_to = "item",
values_to = "mass_kg"
)
#Reclassify columns as needed (numeric, factor, date)
icc_data$mass_kg <- as.numeric(icc_data$mass_kg)
icc_data$item <- as.factor(icc_data$item)
icc_data$cleanup_date <- as.yearmon(icc_data$cleanup_date)
#Summarize to get total weights for items at all locations
icc_data <- icc_data %>%
group_by(country, item) %>%
summarise(
item_total_mass = sum(mass_kg, na.rm = TRUE), #total mass of each item at each location
)
#Calculate a proportion for each item at each location based on the total of that item/total of all plastic.
#Calculating this using the monthly mean/monthly total yields exact same results.
icc_data <- icc_data %>%
group_by(country) %>%
mutate(
item_proportions = item_total_mass / item_total_mass[item == "plastic_debris_items_total"])
#Calculate Wilson score intervals for 95% confidence intervals
#Mutate new column for "trials" = total plastic weight for each country
icc_data <- icc_data %>%
group_by(country) %>%
mutate(trials = item_total_mass[item == "plastic_debris_items_total"])
#Make sure necessary columns are numeric
icc_data$item_total_mass <- as.numeric(icc_data$item_total_mass)
icc_data$trials <- as.numeric(icc_data$trials)
#Run Wilson test where the total weight of the item at each country is the success out the trials, which is the total kg weight for that country
icc_data <- icc_data %>%
ungroup() %>%
rowwise() %>%
mutate(
wilson_lower = wilson.ci(item_total_mass, n = trials, conf.level = 0.95)[1],
wilson_upper = wilson.ci(item_total_mass, n = trials, conf.level = 0.95)[2])
#Select and arrange columns
icc_data <- icc_data[,c(1,2,3,4,6,7)]
#Remove total summed computational rows
icc_data <- icc_data %>%
filter(
item != "plastic_debris_items_total"
)
Coastal Cleanup Data Only
#Change item to class(factor)
icc_data$item <- as.factor(icc_data$item)
#Set levels of item factor for proper stacked bar graph order (y-axis)
icc_data$item <- factor(icc_data$item, levels = c("plastic_item_other", "plastic_item_foam", "plastic_item_bottles", "plastic_item_wrappers", "plastic_item_bags"))
#Change country to class(factor)
icc_data$country <- as.factor(icc_data$country)
#Set levels of country factor for proper x-axis order
icc_data$country <- factor(icc_data$country, levels = c("Vietnam", "Ecuador", "Indonesia", "Kenya"))
#Create figure in ggplot
figure_coastalcleanup <- ggplot(icc_data, aes(fill = item, y = item_proportions, x = country, group = item)) + #country on x-axis, item props on y-axis, box color filled based on item type, grouped by item because we're going to stack the boxes by country
geom_bar(stat = "identity", color = "#636363", linewidth = 0.4, alpha = 0.7) + #stack item proportions (y-axis) for each country (x-axis)
theme_classic() +
scale_fill_manual(values = c("#DEDEDE", "#00a37d","#18a94e", "#5eb200","#d6ca00")) + #color blind friendly pallet emphasizing plastic bags with standout color
theme(axis.text.x = element_blank()) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank()) +
theme(legend.position = "none") + #keeping everything blank because these details are designed in Canva
theme(axis.text.y = element_text(face = "bold", colour = "#d6ca00")) +
scale_y_continuous(labels = scales::label_percent(), breaks = seq(0,1, .1), limits = c(0, 1.0000000001), expand = c(0.005,0.005))
figure_coastalcleanup
Coastal Cleanup and River Side-by-Side
#Mutate new column for coastal cleanup data ID
icc_data <- icc_data %>%
mutate(source = 'coast')
#Load river data
fig4_data <- read_csv("fig4_data.csv")
#Mutate new column for river data ID
fig4_data <- fig4_data %>%
mutate(source = 'river')
#Remove river_id and bind coastal and river datasets
data_combined <- fig4_data %>%
select(-river_id) %>%
bind_rows(icc_data)
#Select only columns necessary
data_combined <- data_combined %>%
select(country, item, item_total_mass, item_proportions, source)
#Recalssify source and item as factors
data_combined$source <- as.factor(data_combined$source)
data_combined$item <- as.factor(data_combined$item)
#Set item factor order
data_combined$item <- factor(data_combined$item, levels = c("plastic_item_other", "plastic_item_foam", "plastic_item_bottles", "plastic_item_wrappers", "plastic_item_bags"))
#Recalssicy country as factor and set factor levels
data_combined$country <- as.factor(data_combined$country)
data_combined$country <- factor(data_combined$country, levels = c("Vietnam", "Ecuador", "Indonesia", "Kenya"))
#Create figure in ggplot
figure_coastalcleanup_river <- ggplot(data_combined) +
geom_bar(aes(fill = item, y = item_proportions, x = source), color = "#636363", stat = 'identity', position='stack', linewidth = 0.4, alpha = 0.7) + #country on x-axis, item props on y-axis, box color filled based on item type, grouped by item because we're going to stack the boxes by country
facet_grid(~country, switch = "x") + #coast and river side-by-side
theme(strip.placement = "outside",
strip.background = element_rect(fill = NA, color = "white"),
panel.spacing = unit(-.01,"cm")) +
theme_classic() +
scale_fill_manual(values = c("#DEDEDE", "#00a37d","#18a94e", "#5eb200","#d6ca00"), name = "Item Category", labels = c("Other", "Food & Packaging Foam", "Beverage Bottles", "Food Wrappers", "Grocery & Trash Bags")) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank()) +
theme(axis.text.y = element_text(face = "bold", colour = "#d6ca00")) +
scale_y_continuous(labels = scales::label_percent(), breaks = seq(0,1, .1), limits = c(0, 1.0000000001), expand = c(0.005,0.005)) +
geom_vline(xintercept = c(3.5), size = 2, colour = "black", lty = 5)
figure_coastalcleanup_river