Load data —————————————————————
file_names <- c(
"composition_subregions",
"composition_continent",
"composition_income",
"composition_continents_ctry_time",
"composition_continents_ctry"
)
# Read CSVs into a named list
df_list <- map(file_names, ~read_csv(paste0("../aggregated_data/", .x, ".csv")))
names(df_list) <- file_names
# Assign each list element to a variable
list2env(df_list, envir = .GlobalEnv)
<environment: R_GlobalEnv>
aggregates <- read_csv("../aggregated_data/aggregates.csv")
Functions: tabulate and plot authors composition by vars of
interest.
plot_composition <- function(df, var1, var2) {
if (is.character(var1) && is.character(var2)) {
var1 <- ensym(var1)
var2 <- ensym(var2)
}
df %>%
ggplot(aes(y = n, axis1 = !!var1, axis2 = !!var2)) +
geom_alluvium(aes(fill = !!var2)) +
geom_stratum() +
geom_label(stat = "stratum",
aes(label = after_stat(stratum)),
size = 3,
label.size = NA) +
theme(legend.position = "none") +
labs(y = "Country salience") +
scale_fill_viridis_d() +
scale_x_continuous(breaks = 1:2, labels = c("Country salience \n by region", "Composition of authors \n by region")) +
coord_cartesian(xlim = c(0.8, 2.13))
}
Alluvial, subregions.
options(scipen = 999)
composition_subregions %>%
plot_composition("subregion_gr", "team_subregion")
ggsave("../output_figures/figure_S3B_subregions_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())

Continents
# Choose a qualitative palette with up to 8–12 distinct colors
team_continent_cats <- c("Northern America", "Europe", "Asia", "Collaboration", "Africa", "Oceania", "Lat.America & Car.")
n_colors <- length(team_continent_cats)
custom_colors <- brewer.pal(n = n_colors, name = "Set2")
(continents_alluvial <- composition_continent %>%
plot_composition("continents_gr", "team_continent") +
geom_text(stat = "flow",
aes(label = paste0(as.character(round(percent)), '%')),
nudge_x = .25,
check_overlap = T,
size = 3)) +
scale_fill_manual(values = custom_colors)
ggsave("../output_figures/figure_S3A_continents_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())

# Cumulative percentage of three main authors affiliation mentions.
composition_continent %>% group_by(continents_gr) %>% summarise(cumpercent = sum(percent))
Alluvial: Income
(income_alluvial <- composition_income %>%
plot_composition("income_ISO3", "team_income") +
geom_text(stat = "flow",
aes(label = paste0(as.character(round(percent)), '%')),
nudge_x = .25,
check_overlap = T,
size = 3) +
geom_label(aes(x = 2, y = 10000, label = "Low income"), size = 3, fill = "white", label.size = NA) +
scale_x_continuous(breaks = 1:2, labels = c("Country salience \n by income groups", "Composition of authors \n by income groups")))
ggsave("../output_figures/figure_4_income_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())

Reviewer’s request. Country analysis.
Bars. Selected countries by continent composition.
# Based on migration relevance.
# CIV removed research salience is very low and plot is not useful.
ei_ctry <- aggregates %>% filter(ISO3 != "CIV") %>% group_by(continent) %>% arrange(-emig_immig) %>% slice(1:1)
sel_alluvial <- c(ei_ctry$ISO3, "SYR", "VEN", "AFG")
sel_alluvial_mig <- c(ei_ctry$ISO3)
sel_alluvial_crisis <- c("SYR", "VEN", "AFG")
bar_countries_time_mig <- composition_continents_ctry_time %>%
filter(ISO3 %in% sel_alluvial_mig) %>%
ggplot() +
geom_bar(mapping = aes(x = pubyear, y = n, group = team_continent, fill = team_continent), stat = "identity") +
facet_wrap(~ country_nm_fct, scales = "free", ncol = 3) +
theme_minimal(base_size = 16) +
scale_fill_manual(values = custom_colors) + # custom_colors defined for previous plot with continents.
theme(
legend.position = "bottom") +
labs(title = "Migration relevance",
fill = "Composition of authors",
y = "Research salience",
x = "Publication year")
bar_countries_time_cri <- composition_continents_ctry_time %>%
filter(ISO3 %in% sel_alluvial_crisis) %>%
ggplot() +
geom_bar(mapping = aes(x = pubyear, y = n, group = team_continent, fill = team_continent), stat = "identity") +
facet_wrap(~ country_nm, scales = "free") +
theme_minimal(base_size = 16) +
scale_fill_manual(values = custom_colors) +
theme(
legend.position = "none") +
labs(title = "Migration crisis",
fill = "Composition of authors",
y = "Research salience",
x = "Publication year")
combined_bar_countries <- bar_countries_time_mig / bar_countries_time_cri
ggsave("../output_figures/figure_5_countries_time.png", width = 10, height = 12, dpi = 600, plot = last_plot())
Alluvial. Selected countries, by continents.
composition_continents_ctry %>%
plot_composition("country_nm", "team_continent") +
scale_fill_manual(values = custom_colors) +
scale_x_continuous(breaks = 1:2, labels = c("Country salience \n in selected countries", "Composition of authors \n by region"))
ggsave("../output_figures/figure_S4_countries_continents_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())

# Main continent researching about the country
composition_max_labels <- composition_continents_ctry %>%
group_by(country_nm) %>%
slice_max(percent, n = 1, with_ties = FALSE) %>%
ungroup()
composition_max_labels
---
title: "R Notebook"
output: html_notebook
---

```{r}
rm(list = ls())

Sys.setenv(LANG = "en")
library(tidyverse)      # Includes dplyr, ggplot2, readr, purrr, stringr
library(countrycode)
library(ggalluvial)
library(RColorBrewer)
library(viridis)
library(patchwork)
```

# Load data ---------------------------------------------------------------

```{r}
file_names <- c(
  "composition_subregions",
  "composition_continent",
  "composition_income",
  "composition_continents_ctry_time",
  "composition_continents_ctry"
)

# Read CSVs into a named list
df_list <- map(file_names, ~read_csv(paste0("../aggregated_data/", .x, ".csv")))
names(df_list) <- file_names

# Assign each list element to a variable
list2env(df_list, envir = .GlobalEnv)

aggregates <- read_csv("../aggregated_data/aggregates.csv")
```
### Functions: tabulate and plot authors composition by vars of interest.
```{r}
plot_composition <- function(df, var1, var2) {  
  
  if (is.character(var1) && is.character(var2)) { 
    var1 <- ensym(var1)
    var2 <- ensym(var2)
  }
  
  df %>% 
    ggplot(aes(y = n, axis1 = !!var1, axis2 = !!var2)) +
    geom_alluvium(aes(fill = !!var2)) +
    geom_stratum() +
    geom_label(stat = "stratum",
              aes(label = after_stat(stratum)),
              size = 3,
              label.size = NA) +
    theme(legend.position = "none") +
    labs(y = "Country salience") +
    scale_fill_viridis_d() +
    scale_x_continuous(breaks = 1:2, labels = c("Country salience \n by region", "Composition of authors \n by region")) +
    coord_cartesian(xlim = c(0.8, 2.13)) 
  
}

```

### Alluvial, subregions.

```{r}
options(scipen = 999)
composition_subregions %>% 
  plot_composition("subregion_gr", "team_subregion") 

ggsave("../output_figures/figure_S3B_subregions_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())
```
### Continents
```{r}
# Choose a qualitative palette with up to 8–12 distinct colors
team_continent_cats <- c("Northern America", "Europe", "Asia", "Collaboration", "Africa", "Oceania", "Lat.America & Car.")

n_colors <- length(team_continent_cats)
custom_colors <- brewer.pal(n = n_colors, name = "Set2")

(continents_alluvial <- composition_continent %>% 
  plot_composition("continents_gr", "team_continent") +
  geom_text(stat = "flow",
              aes(label = paste0(as.character(round(percent)), '%')),
              nudge_x = .25,
            check_overlap = T,
            size = 3)) +
  scale_fill_manual(values = custom_colors)
  
ggsave("../output_figures/figure_S3A_continents_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())

# Cumulative percentage of three main authors affiliation mentions.
composition_continent %>% group_by(continents_gr) %>% summarise(cumpercent = sum(percent))
```
### Alluvial: Income
```{r}
(income_alluvial <- composition_income %>% 
  plot_composition("income_ISO3", "team_income") +
  geom_text(stat = "flow",
              aes(label = paste0(as.character(round(percent)), '%')),
              nudge_x = .25,
            check_overlap = T,
            size = 3) +
  geom_label(aes(x = 2, y = 10000, label = "Low income"), size = 3, fill = "white", label.size = NA) +
  scale_x_continuous(breaks = 1:2, labels = c("Country salience \n by income groups", "Composition of authors \n  by income groups"))) 

ggsave("../output_figures/figure_4_income_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())

```
# Reviewer's request. Country analysis.

## Bars. Selected countries by continent composition.
```{r}
# Based on migration relevance.
# CIV removed research salience is very low and plot is not useful.
ei_ctry <- aggregates %>% filter(ISO3 != "CIV") %>% group_by(continent) %>% arrange(-emig_immig) %>% slice(1:1)
sel_alluvial <- c(ei_ctry$ISO3, "SYR", "VEN", "AFG")
sel_alluvial_mig <- c(ei_ctry$ISO3)
sel_alluvial_crisis <- c("SYR", "VEN", "AFG")

bar_countries_time_mig <- composition_continents_ctry_time %>%
    filter(ISO3 %in% sel_alluvial_mig) %>% 
    ggplot() +
    geom_bar(mapping = aes(x = pubyear, y = n, group = team_continent, fill = team_continent), stat = "identity") +
    facet_wrap(~ country_nm_fct, scales = "free", ncol = 3) +
    theme_minimal(base_size = 16) +
    scale_fill_manual(values = custom_colors) + # custom_colors defined for previous plot with continents.
    theme(
      legend.position = "bottom") +
    labs(title = "Migration relevance",
         fill = "Composition of authors",
         y = "Research salience",
         x = "Publication year")

bar_countries_time_cri <- composition_continents_ctry_time %>%
    filter(ISO3 %in% sel_alluvial_crisis) %>% 
    ggplot() +
    geom_bar(mapping = aes(x = pubyear, y = n, group = team_continent, fill = team_continent), stat = "identity") +
    facet_wrap(~ country_nm, scales = "free") +
    theme_minimal(base_size = 16) +
    scale_fill_manual(values = custom_colors) +
    theme(
      legend.position = "none") +
    labs(title = "Migration crisis",
         fill = "Composition of authors",
         y = "Research salience",
         x = "Publication year")

combined_bar_countries <- bar_countries_time_mig / bar_countries_time_cri

ggsave("../output_figures/figure_5_countries_time.png",  width = 10, height = 12, dpi = 600, plot = last_plot())
```


## Alluvial. Selected countries, by continents.
```{r}
composition_continents_ctry %>% 
  plot_composition("country_nm", "team_continent") +
  scale_fill_manual(values = custom_colors) +
  scale_x_continuous(breaks = 1:2, labels = c("Country salience \n in selected countries", "Composition of authors \n  by region"))

ggsave("../output_figures/figure_S4_countries_continents_alluvial.png", width = 7, height = 7, dpi = 500, limitsize = F, plot = last_plot())

# Main continent researching about the country
composition_max_labels <- composition_continents_ctry %>%
  group_by(country_nm) %>%
  slice_max(percent, n = 1, with_ties = FALSE) %>%
  ungroup()

composition_max_labels 
```




