Expand code
cite_packages(output = "table", pkgs = "Session", out.dir = getwd()) Package Version Citation
1 base 4.5.3 @base
2 patchwork 1.3.2 @patchwork
3 tidyverse 2.0.0 @tidyverse
cite_packages(output = "table", pkgs = "Session", out.dir = getwd()) Package Version Citation
1 base 4.5.3 @base
2 patchwork 1.3.2 @patchwork
3 tidyverse 2.0.0 @tidyverse
cols.layer <- c(
"10m" = "#78B7C5",
"chlMax" = "#046C9A"
)The raw data was procesesd using nf-core/metatdenovo (v1.0.1): https://nf-co.re/metatdenovo/1.0.1/ using Prokka as orf caller, GTDB release 220 for taxonomic annotation, and EggNOG mapper for functional annotation.
tax <- read_tsv('data/metatdenovo/gtdb_ncbi_merged.tsv.gz', col_types = 'cccccccc')tpm <- read_tsv("data/metatdenovo/user_assembly.prokka.counts.tsv.gz", show_col_types = F) %>%
mutate(sample = gsub("R", ".", sample)) %>%
mutate(sample = factor(sample, levels = c(
"S1.1", "S1.2", "S2.1", "S2.2", "S3.1", "S3.2", "S4.1", "S4.2", "S5.1", "S6.1",
"S5.2", "S6.2", "S7.1", "S7.2", "S8.1", "S8.2", "S9.1", "S9.2", "S10.1", "S10.2"
))) %>%
select(-start, -end,- strand, -length, -chr)
meta <- read_tsv("data/sample_key.tsv", show_col_types = FALSE) %>%
mutate(station = as.character(station)) %>%
mutate(sample = gsub("R", ".", sample)) %>%
mutate(sample = factor(sample, levels = c(
"S1.1", "S1.2", "S2.1", "S2.2", "S3.1", "S3.2", "S4.1", "S4.2", "S5.1", "S6.1",
"S5.2", "S6.2", "S7.1", "S7.2", "S8.1", "S8.2", "S9.1", "S9.2", "S10.1", "S10.2"
))) %>%
mutate(layer = factor(layer, levels = c("10m", "chlMax")))In total, there are 20 metatranscriptomes from five stations. At every station, samples were taken in duplicates from 10 m depth and from chl. max. On average, each sample has 81.2 million reads (sd 8.7, n = 20). Off all the transcripts, 53.0 % was functionally annotated (Eggnog mapper), 36.5 % was functionally annotated including a KO reference, 51.7 % of the transcripts was annotated on phylum level, 48.0 % on order level, 43.5 % on genus level, and 32.7 % on species level.
63.6 % of the transcript were functionally annotated with Kofam scan. A summary of the bioinformatic pipeline will soon be added to this introduction.
kofam <- read_tsv("data/metatdenovo/user_assembly.prokka.kofamscan-uniq.tsv.gz", show_col_types = F) %>%
select(-thrshld, -score)emapper <- read_tsv("data/metatdenovo/user_assembly.prokka.emapper.tsv.gz", show_col_types = F) %>%
select(orf, cog = cog_category)i <- c("domain","phylum","class","order","family","genus")
for (level in i) {
## The (!!as.name(level)) converts the variable level to e.g. 'genus'
d <- tpm %>% left_join(tax, by = "orf") %>% filter(!is.na((!!as.name(level))))
format(nrow(d) / nrow(tpm) * 100, digits = 3) %>% paste(stringr::str_to_title(level), ": ", ., " %", sep = "") %>% print()
}[1] "Domain: 55.4 %"
[1] "Phylum: 51.7 %"
[1] "Class: 50.1 %"
[1] "Order: 48 %"
[1] "Family: 47 %"
[1] "Genus: 43.5 %"
i <- c("domain","phylum","class","order","family","genus")
for (level in i) {
## The (!!as.name(level)) converts the variable level to e.g. 'genus'
d <- tpm %>% left_join(tax, by = "orf") %>% inner_join(kofam, by = "orf") %>% filter(!is.na((!!as.name(level))))
format(nrow(d) / nrow(tpm) * 100, digits = 3) %>% paste(stringr::str_to_title(level), ": ", ., " %", sep = "") %>% print()
}[1] "Domain: 46.7 %"
[1] "Phylum: 43.2 %"
[1] "Class: 41.8 %"
[1] "Order: 40.2 %"
[1] "Family: 39.2 %"
[1] "Genus: 36 %"
tpm %>%
left_join(tax, by = "orf") %>%
replace_na(list("domain" = "Unknown")) %>%
group_by(domain) %>% summarise(tpm = sum(tpm) / 20, .groups = "drop") %>%
arrange(desc(tpm))# A tibble: 5 × 2
domain tpm
<chr> <dbl>
1 Unknown 407152.
2 Bacteria 377129.
3 Eukaryota 101564.
4 Archaea 94688.
5 Viruses 19467.
Figure 1 depicts the ratio between bacteria and archaea for both depth layers. Note how the abundance of archaea is slightly higher in most of the deeper samples (chlorophyll max depth).
tpm %>%
inner_join(tax, by = "orf") %>%
inner_join(meta, by = "sample") %>%
group_by(sample, domain) %>% summarise(tpm = sum(tpm), .groups = "drop") %>%
mutate(domain = factor(domain, levels = c("Viruses", "Eukaryota", "Archaea", "Bacteria"))) %>%
ggplot(aes(
x = sample,
y = tpm,
fill = domain
)) +
geom_col() + theme_tidy() +
scale_y_continuous(breaks = c(0, 2e5, 4e5, 6e5), labels = c(0, 0.2, 0.4, 0.6)) +
labs(y = expression('Relative abundance (tpm '%*% '10'^'6'*')')) +
scale_fill_manual(
values = c("Bacteria" = "#C7B19C", "Archaea" = "#A2A475", "Viruses" = "#D8B70A", "Eukaryota" = "#FAEFD1")
) +
theme(
aspect.ratio = 0.5,
legend.title = element_blank(),
axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()
) +
annotate("segment", x = 0.55, xend = 2.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 2.55, xend = 4.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 4.55, xend = 6.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 6.55, xend = 8.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 8.55, xend = 10.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 10.55, xend = 12.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 12.55, xend = 14.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 14.55, xend = 16.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 16.55, xend = 18.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 18.55, xend = 20.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("label", x = 2.5, y = -30000, label = "5", label.size = 0, size = 6/.pt) +
annotate("label", x = 6.5, y = -30000, label = "26", label.size = 0, size = 6/.pt) +
annotate("label", x =10.5, y = -30000, label = "42", label.size = 0, size = 6/.pt) +
annotate("label", x =14.5, y = -30000, label = "53", label.size = 0, size = 6/.pt) +
annotate("label", x =18.5, y = -30000, label = "56", label.size = 0, size = 6/.pt)Warning in annotate("label", x = 2.5, y = -30000, label = "5", label.size = 0,
: Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 6.5, y = -30000, label = "26", label.size = 0,
: Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 10.5, y = -30000, label = "42", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 14.5, y = -30000, label = "53", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 18.5, y = -30000, label = "56", label.size =
0, : Ignoring unknown parameters: `label.size`
t <- tpm %>%
inner_join(tax, by = "orf") %>% filter(domain %in% c("Bacteria", "Archaea")) %>%
group_by(order, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(tpm = sum(tpm), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean.tpm = sum(tpm), .groups = 'drop') %>%
filter(!is.na(order)) %>%
top_n(9, mean.tpm)
t %>% arrange(desc(mean.tpm))# A tibble: 9 × 2
order mean.tpm
<chr> <dbl>
1 Pseudomonadales 1270276.
2 Poseidoniales 1188108.
3 Pelagibacterales 775835.
4 PS1 651304.
5 Nitrososphaerales 632899.
6 Flavobacteriales 626936.
7 Enterobacterales 333125.
8 SAR86 248401.
9 Rhodobacterales 204835.
Figure 2 explores the data by plotting the most abundant orders over all samples, regardless if the RNA transcript was functionally annotated or not. Note that transcripts without a taxonomic annotation (the unidentified fraction in Figure 3) are not included in this figure. It clearly shows how the Nitrosphaerales are more abundant in the deeper layer.
p.order <- tpm %>%
left_join(tax, by = "orf") %>%
mutate(order = case_when(
order %in% t$order ~ order,
domain == "Eukaryota" ~ "Eukaryotes",
is.na(order) ~ "Unidentified",
!order %in% t$order ~ "Other"
)) %>%
filter(order != "Unidentified") %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, order) %>%
summarise(tpm = sum(tpm), .groups = 'drop') %>%
# Call the plot
ggplot(aes(x = sample,
y = tpm,
fill = fct_relevel(order, rev(names(cols.order)))
)
) +
labs(x = '', y = expression('Relative abundance (TPM '%*% '10'^'6'*')'), fill = "Order") +
geom_col() +
scale_y_continuous(breaks = c(0, 2e5, 4e5, 6e5, 8e5, 1e6),
labels = c("0.0", "0.2", "0.4", "0.6", "0.8", "1.0")
) +
scale_fill_manual(values = cols.order) +
theme_tidy(legend = "right", fontsize = 5) +
#guides(fill = guide_legend(nrow = 3, reverse = TRUE)) +
theme(
axis.ticks.x = element_blank(),
legend.key.width = unit(3.5, "mm"),
legend.key.height = unit(4, "mm"),
legend.margin = margin(t = -10),
legend.text = element_text(margin = margin(l = 2)),
axis.text.x = element_blank()
) +
annotate("segment", x = 0.55, xend = 2.45, y = -25000, yend = -25000, color = "#78B7C5") +
annotate("segment", x = 2.55, xend = 4.45, y = -25000, yend = -25000, color = "#046C9A") +
annotate("segment", x = 4.55, xend = 6.45, y = -25000, yend = -25000, color = "#78B7C5") +
annotate("segment", x = 6.55, xend = 8.45, y = -25000, yend = -25000, color = "#046C9A") +
annotate("segment", x = 8.55, xend = 10.45, y = -25000, yend = -25000, color = "#78B7C5") +
annotate("segment", x = 10.55, xend = 12.45, y = -25000, yend = -25000, color = "#046C9A") +
annotate("segment", x = 12.55, xend = 14.45, y = -25000, yend = -25000, color = "#78B7C5") +
annotate("segment", x = 14.55, xend = 16.45, y = -25000, yend = -25000, color = "#046C9A") +
annotate("segment", x = 16.55, xend = 18.45, y = -25000, yend = -25000, color = "#78B7C5") +
annotate("segment", x = 18.55, xend = 20.45, y = -25000, yend = -25000, color = "#046C9A") +
annotate("label", x = 2.5, y = -25000, label = "Station 5", label.size = 0, size = 5/.pt) +
annotate("label", x = 6.5, y = -25000, label = "26", label.size = 0, size = 5/.pt) +
annotate("label", x =10.5, y = -25000, label = "42", label.size = 0, size = 5/.pt) +
annotate("label", x =14.5, y = -25000, label = "53", label.size = 0, size = 5/.pt) +
annotate("label", x =18.5, y = -25000, label = "56", label.size = 0, size = 5/.pt)Warning in annotate("label", x = 2.5, y = -25000, label = "Station 5",
label.size = 0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 6.5, y = -25000, label = "26", label.size = 0,
: Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 10.5, y = -25000, label = "42", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 14.5, y = -25000, label = "53", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 18.5, y = -25000, label = "56", label.size =
0, : Ignoring unknown parameters: `label.size`
p.ordert <- tpm %>%
inner_join(tax, by = "orf") %>% filter(domain %in% c("Bacteria", "Archaea")) %>%
group_by(phylum, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(tpm = sum(tpm), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean.tpm = sum(tpm), .groups = 'drop') %>%
filter(!is.na(phylum)) %>%
top_n(8, mean.tpm)
t %>% arrange(desc(mean.tpm))# A tibble: 8 × 2
phylum mean.tpm
<chr> <dbl>
1 Pseudomonadota 4848562.
2 Thermoplasmatota 1203098.
3 Bacteroidota 791534.
4 Thermoproteota 644346.
5 Cyanobacteriota 604239.
6 Marinisomatota 225380.
7 Chloroflexota 202820.
8 SAR324 147854.
Figure 3 explores the data by plotting the most abundant phyla over all samples, regardless if the RNA transcript was functionally annotated or not. Note that the taxonomy is outdated and needs to be updated (e.g. Pseudomonadota instead of Proteobacteria). 87.7 % of the annotated ORFs are affiliated with bacteria, while 13.1 % is affiliated with Archaea.
p.phylum <- tpm %>%
left_join(tax, by = "orf") %>%
mutate(phylum = case_when(
phylum %in% t$phylum ~ phylum,
domain == "Eukaryota" ~ "Eukaryotes",
is.na(phylum) ~ "Unidentified",
!phylum %in% t$phylum ~ "Other"
)) %>%
filter(phylum != "Unidentified") %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, phylum) %>%
summarise(tpm = sum(tpm), .groups = 'drop') %>%
# Call the plot
ggplot(aes(x = sample,
y = tpm,
fill = fct_relevel(phylum, names(cols.phylum) %>% rev())
)
) +
labs(x = '', y = expression('Relative abundance (tpm '%*% '10'^'6'*')'), fill = "") +
geom_col() +
scale_y_continuous(breaks = c(0, 2e5, 4e5, 6e5, 8e5, 1e6),
labels = c("0.0", "0.2", "0.4", "0.6", "0.8", "1.0")
) +
scale_fill_manual(values = cols.phylum) +
theme_tidy(fontsize = 5) +
theme(
aspect.ratio = 0.5,
axis.ticks.x = element_blank(),
legend.key.height = unit(3.5, "mm"),
legend.margin = margin(),
panel.border = element_rect(fill = "transparent", linewidth = 0.5),
axis.text.x = element_blank()
) +
annotate("segment", x = 0.55, xend = 2.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 2.55, xend = 4.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 4.55, xend = 6.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 6.55, xend = 8.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 8.55, xend = 10.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 10.55, xend = 12.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 12.55, xend = 14.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 14.55, xend = 16.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 16.55, xend = 18.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 18.55, xend = 20.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("label", x = 2.5, y = -30000, label = "Station 5", label.size = 0, size = 5/.pt) +
annotate("label", x = 6.5, y = -30000, label = "26", label.size = 0, size = 5/.pt) +
annotate("label", x =10.5, y = -30000, label = "42", label.size = 0, size = 5/.pt) +
annotate("label", x =14.5, y = -30000, label = "53", label.size = 0, size = 5/.pt) +
annotate("label", x =18.5, y = -30000, label = "56", label.size = 0, size = 5/.pt)Warning in annotate("label", x = 2.5, y = -30000, label = "Station 5",
label.size = 0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 6.5, y = -30000, label = "26", label.size = 0,
: Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 10.5, y = -30000, label = "42", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 14.5, y = -30000, label = "53", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 18.5, y = -30000, label = "56", label.size =
0, : Ignoring unknown parameters: `label.size`
p.phylumt <- tpm %>%
inner_join(tax, by = "orf") %>% filter(domain %in% c("Bacteria", "Archaea")) %>%
group_by(genus, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(tpm = sum(tpm), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean.tpm = sum(tpm), .groups = 'drop') %>%
filter(!is.na(genus)) %>%
top_n(10, mean.tpm)
t %>% left_join(tax, by = "genus") %>%
select(genus, order, phylum, mean.tpm) %>%
distinct() %>%
arrange(desc(mean.tpm)) %>%
rename(Genus = genus, Order = order, Phylum = phylum) %>%
knitr::kable()| Genus | Order | Phylum | mean.tpm |
|---|---|---|---|
| MGIIb-O3 | Poseidoniales | Thermoplasmatota | 952516.8 |
| Pseudothioglobus | PS1 | Pseudomonadota | 587638.1 |
| Pelagibacter | Pelagibacterales | Pseudomonadota | 545749.0 |
| Nitrosopumilus | Nitrososphaerales | Thermoproteota | 436459.6 |
| HTCC2207 | Pseudomonadales | Pseudomonadota | 325660.1 |
| UBA9145 | Pseudomonadales | Pseudomonadota | 302374.6 |
| Lutibacter | Flavobacteriales | Bacteroidota | 242030.1 |
| Photorhabdus | Enterobacterales | Pseudomonadota | 214127.5 |
| ASP10-02a | Pseudomonadales | Pseudomonadota | 204665.4 |
| Arctic96AD-7 | SAR324 | SAR324 | 141825.1 |
Figure 4 shows the most abundant genera. It reveals how some genera (e.g. Pelagibacter and Ketobacter) are more abundant (and active) in the 10 m depth, while for example Nitrosopumilus is more abundant in the deeper sites. Note from the metadata that the depth of chlorophyll max strongly varies between the stations.
p.genus <- tpm %>%
left_join(tax, by = "orf") %>%
mutate(genus = case_when(
genus %in% t$genus ~ genus,
is.na(genus) ~ "Unidentified",
!genus %in% t$genus ~ "Other"
)) %>%
filter(genus != "Unidentified") %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, genus) %>%
summarise(tpm = sum(tpm), .groups = 'drop') %>%
# Call the plot
ggplot(aes(x = sample,
y = tpm,
fill = fct_relevel(genus, rev(names(cols.genus)))
)
) +
labs(x = '', y = expression('Relative abundance (tpm '%*% '10'^'6'*')'), fill = "Genus") +
geom_col() +
scale_y_continuous(breaks = c(0, 2e5, 4e5, 6e5),
labels = c("0.0", "0.2", "0.4", "0.6")
) +
scale_fill_manual(values = cols.genus) +
theme_tidy(legend = "right", fontsize = 5) +
theme(
aspect.ratio = 0.5,
axis.ticks.x = element_blank(),
legend.key.height = unit(3.5, "mm"),
axis.text.x = element_blank(),
legend.title = element_blank(),
legend.margin = margin()
) +
annotate("segment", x = 0.55, xend = 2.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 2.55, xend = 4.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 4.55, xend = 6.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 6.55, xend = 8.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 8.55, xend = 10.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 10.55, xend = 12.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 12.55, xend = 14.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 14.55, xend = 16.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("segment", x = 16.55, xend = 18.45, y = -30000, yend = -30000, color = "#78B7C5") +
annotate("segment", x = 18.55, xend = 20.45, y = -30000, yend = -30000, color = "#046C9A") +
annotate("label", x = 2.5, y = -30000, label = "Station 5", label.size = 0, size = 5/.pt) +
annotate("label", x = 6.5, y = -30000, label = "26", label.size = 0, size = 5/.pt) +
annotate("label", x =10.5, y = -30000, label = "42", label.size = 0, size = 5/.pt) +
annotate("label", x =14.5, y = -30000, label = "53", label.size = 0, size = 5/.pt) +
annotate("label", x =18.5, y = -30000, label = "56", label.size = 0, size = 5/.pt)Warning in annotate("label", x = 2.5, y = -30000, label = "Station 5",
label.size = 0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 6.5, y = -30000, label = "26", label.size = 0,
: Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 10.5, y = -30000, label = "42", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 14.5, y = -30000, label = "53", label.size =
0, : Ignoring unknown parameters: `label.size`
Warning in annotate("label", x = 18.5, y = -30000, label = "56", label.size =
0, : Ignoring unknown parameters: `label.size`
p.genusp.phylum / p.genus + plot_annotation(tag_levels = "a") &
theme(
plot.tag.location = "panel",
plot.tag.position = c(0.03, 0.95)
)ggsave(filename = "figures/community-genus-phylum.pdf", width = 180, height = 160, units = "mm")hydro <- readxl::read_excel("data/primarydata.xlsx", sheet = 1) %>%
mutate(station = as.character(station)) %>%
inner_join(meta, by = c("station", "layer", "replicate")) %>%
rename_with(tolower) %>%
mutate(sample = factor(sample, levels = c(
"S1.1", "S1.2", "S2.1", "S2.2", "S3.1", "S3.2", "S4.1", "S4.2", "S5.1", "S6.1",
"S5.2", "S6.2", "S7.1", "S7.2", "S8.1", "S8.2", "S9.1", "S9.2", "S10.1", "S10.2"
))) %>% arrange(sample)m <- tpm %>%
inner_join(kofam, by = "orf") %>% group_by(sample, ko) %>% summarise(tpm = sum(tpm), .groups = "drop") %>%
# Standard Vegan transformation: Turn table with with samples as *rows*
select(sample, ko, tpm) %>%
pivot_wider(names_from = "ko", values_from = "tpm", values_fill = 0) %>%
# Turn into a numeric matrix
column_to_rownames('sample') %>% vegan::decostand(method = "hellinger") %>%
rownames_to_column("sample") %>%
pivot_longer(-1, names_to = "ko", values_to = "hellinger")
m <- m %>% # Create a matrix
spread(ko, hellinger) %>%
mutate(sample = factor(sample, levels = c(
"S1.1", "S1.2", "S2.1", "S2.2", "S3.1", "S3.2", "S4.1", "S4.2", "S5.1", "S6.1",
"S5.2", "S6.2", "S7.1", "S7.2", "S8.1", "S8.2", "S9.1", "S9.2", "S10.1", "S10.2"
))) %>% arrange(sample) %>%
column_to_rownames("sample")set.seed(999)
vegan::rda(
m ~ depth + doc + tdp + tdn + chlach,
correlation = T,
data = hydro %>%
column_to_rownames('sample')
) -> rda
# This one will contain the proportions explained which we get by dividing the
# eigenvalue of each component with the sum of eigenvalues for all components.
p <- c(rda$CCA$eig, rda$CA$eig)/sum(c(rda$CCA$eig, rda$CA$eig))
# This one is collecting the coordinates for arrows that will depict how the
# factors in our model point in the coordinate
a <- rda$CCA$biplot %>% data.frame() %>% tibble::rownames_to_column('factor')
lab <- tibble(factor = a$factor,
label = c("Depth", "", "TDP & DOC", "TDN", "Chl.")
) %>% inner_join(a, by = "factor")
summary(rda)
Call:
rda(formula = m ~ depth + doc + tdp + tdn + chlach, data = hydro %>% column_to_rownames("sample"), correlation = T)
Partitioning of variance:
Inertia Proportion
Total 0.12901 1.0000
Constrained 0.08715 0.6756
Unconstrained 0.04185 0.3244
Eigenvalues, and their contribution to the variance
Importance of components:
RDA1 RDA2 RDA3 RDA4 RDA5 PC1
Eigenvalue 0.05428 0.01333 0.008325 0.007262 0.00396 0.01153
Proportion Explained 0.42073 0.10331 0.064530 0.056293 0.03070 0.08935
Cumulative Proportion 0.42073 0.52404 0.588568 0.644861 0.67556 0.76491
PC2 PC3 PC4 PC5 PC6 PC7
Eigenvalue 0.006923 0.006138 0.004627 0.003283 0.002629 0.001858
Proportion Explained 0.053661 0.047575 0.035864 0.025451 0.020377 0.014403
Cumulative Proportion 0.818568 0.866144 0.902008 0.927459 0.947836 0.962239
PC8 PC9 PC10 PC11 PC12
Eigenvalue 0.001149 0.0009144 0.0007716 0.0005756 0.0005587
Proportion Explained 0.008908 0.0070881 0.0059811 0.0044620 0.0043310
Cumulative Proportion 0.971147 0.9782351 0.9842161 0.9886781 0.9930091
PC13 PC14
Eigenvalue 0.0004823 0.0004195
Proportion Explained 0.0037389 0.0032520
Cumulative Proportion 0.9967480 1.0000000
Accumulated constrained eigenvalues
Importance of components:
RDA1 RDA2 RDA3 RDA4 RDA5
Eigenvalue 0.05428 0.01333 0.008325 0.007262 0.00396
Proportion Explained 0.62278 0.15292 0.095521 0.083328 0.04544
Cumulative Proportion 0.62278 0.77571 0.871229 0.954557 1.00000
vegan::RsquareAdj(rda)$r.squared
[1] 0.6755601
$adj.r.squared
[1] 0.5596887
p.rda <- rda$CCA$wa %>% data.frame() %>% tibble::rownames_to_column('sample') %>%
inner_join(hydro, by = 'sample') %>%
ggplot(aes(x = RDA1, y = RDA2)) +
geom_vline(xintercept = 0, colour = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, colour = "grey", linetype = "dotted") +
geom_point(aes(colour = layer, fill = layer), size = 5, shape = 21, stroke = 0.75) +
xlab(sprintf("RDA1 (%2.1f%% explained)", p[[1]] * 100)) +
ylab(sprintf("RDA2 (%2.1f%% explained)", p[[2]] * 100)) +
geom_text(aes(label = station), size = 5/.pt, color = "black") +
geom_segment(
data = a, mapping = aes(xend = RDA1/4, yend = RDA2/4), linewidth = 0.3,
x = 0, y = 0, arrow = arrow(length = unit(1.5, "mm"), type = "closed")
) +
geom_label(
data = lab, mapping = aes(x = RDA1/8, y = RDA2/8, label = label),
size = 5/.pt, parse = F, label.size = 0
) +
# Add R-squared
annotate("text", x = Inf, y = -Inf,
label = "paste(R ^ 2-adj, \" = 0.56\")",
size = 5/.pt, vjust = -0.5, hjust = 1.05, parse = TRUE) +
scale_color_manual(values = cols.layer, labels = c("10 m", "Chl. max"), name = "") +
scale_fill_manual(values = alpha(cols.layer, alpha = 0.2), guide = "none") +
theme_tidy(fontsize = 5) +
theme(
legend.background = element_rect(fill = NA),
legend.text = element_text(margin = margin(r = 12, unit = "pt")),
legend.position = "inside",
legend.position.inside = c(0.89,0.93)
)Warning: The `label.size` argument of `geom_label()` is deprecated as of ggplot2 3.5.0.
ℹ Please use the `linewidth` argument instead.
p.rda(p.order + p.rda) + plot_annotation(tag_levels = "a") & theme(
plot.margin = margin(r = 2, t = 2),
plot.tag.location = "panel",
plot.tag.position = c(0.03, 0.97),
)ggsave(filename = "figures/rda.pdf", width = 180, height = 80, units = "mm")cols.order <- c( # Sorted on abundance
"Pseudomonadales" = "#C7B19C",
"Poseidoniales" = "#D69C4E",
"Pelagibacterales" = "#972D15",
"PS1" = "#FAEFD1",
"Flavobacteriales" = "#D8B70A",
"Nitrososphaerales" = "#A2A475",
"Cyanobacteriales" = "#00A08A",
"SAR86" = "#1B5331",
"Rhodobacterales" = "#9986A5",
"Other" = "#D5D5D3",
"Not annotated" = "#046C9A"
) %>% rev()cogs <- read_tsv("data/cogs.tsv", show_col_types = FALSE) %>%
rename(cog = cog_category) %>%
filter(cog %in% c('C','J','P','G','U','M'))
i <- emapper %>%
inner_join(cogs, by = "cog") %>%
inner_join(tpm, by = "orf") %>%
inner_join(meta, by = "sample") %>%
inner_join(tax, by = "orf") %>%
replace_na(list(order = 'Not annotated')) %>%
mutate(order = case_when(
order %in% names(cols.order) ~ order,
!order %in% names(cols.order) ~ 'Other',
.default = NA
)) %>%
group_by(cat, station, layer, order) %>%
summarise(tpm = sum(tpm), .groups = "drop") %>%
# Prepare the labels
mutate(layer = if_else(layer == '10m', '10 m', 'chl max.')) %>%
mutate(layer = paste(station, ' (', layer, ')', sep = '')) %>%
mutate(layer = factor(layer, levels =
c('5 (10 m)', '5 (chl max.)',
'26 (10 m)', '26 (chl max.)',
'42 (10 m)', '42 (chl max.)',
'53 (10 m)', '53 (chl max.)',
'56 (10 m)', '56 (chl max.)'
)))i %>%
mutate(cat = gsub(' \\[[A-Z]\\]', '', cat)) %>%
ggplot(aes(
x = tpm,
y = fct_rev(layer),
fill = fct_relevel(order, names(cols.order))
)) +
geom_col() + facet_wrap(~ cat, scales = 'free_x') + labs(x = 'Relative abundance (TPM)', fill = 'Order') +
scale_x_continuous(labels = scales::label_comma()) +
scale_fill_manual(values = cols.order) +
theme_tidy(fontsize = 5) +
theme(
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
legend.key.width = unit(4, 'mm'), legend.key.height = unit(5, 'mm')
)ggsave('figures/cogs-stations.pdf', width = 180, height = 140, units = 'mm')kofam %>%
inner_join(tpm, by = "orf") %>%
group_by(ko, ko_definition) %>% summarise(tpm = sum(tpm), .groups = "drop") %>%
slice_max(tpm, n = 100) %>%
knitr::kable(digits = 0, format = "simple")| ko | ko_definition | tpm |
|---|---|---|
| K02703 | photosystem II P680 reaction center D1 protein [EC:1.10.3.9] | 627346 |
| K14178 | amorpha-4,11-diene synthase [EC:4.2.3.24] | 331289 |
| K07374 | tubulin alpha | 313355 |
| K03320 | ammonium transporter, Amt family | 284483 |
| K03283 | heat shock 70kDa protein 1/2/6/8 | 207749 |
| K16116 | pristinamycin I synthetase 1 | 165412 |
| K02002 | glycine betaine/proline transport system substrate-binding protein | 118536 |
| K11294 | nucleolin | 104259 |
| K02358 | elongation factor Tu | 100407 |
| K02256 | cytochrome c oxidase subunit 1 [EC:7.1.1.9] | 88388 |
| K21395 | TRAP-type transport system periplasmic protein | 78857 |
| K04643 | sensory rhodopsin | 76747 |
| K00336 | NADH-quinone oxidoreductase subunit G [EC:7.1.1.2] | 70494 |
| K10946 | methane/ammonia monooxygenase subunit C | 69480 |
| K02710 | photosystem II PsbI protein | 67351 |
| K02707 | photosystem II cytochrome b559 subunit alpha | 65452 |
| K25467 | secretoglobin family 2A (mammaglobin) | 63538 |
| K14016 | ubiquitin fusion degradation protein 1 | 61662 |
| K05284 | GPI mannosyltransferase 1 subunit M [EC:2.4.1.-] | 58292 |
| K01601 | ribulose-bisphosphate carboxylase large chain [EC:4.1.1.39] | 55658 |
| K14393 | cation/acetate symporter | 55339 |
| K02704 | photosystem II CP47 chlorophyll apoprotein | 54066 |
| K02711 | photosystem II PsbJ protein | 51502 |
| K02705 | photosystem II CP43 chlorophyll apoprotein | 49359 |
| K02718 | photosystem II PsbT protein | 47237 |
| K01074 | palmitoyl-protein thioesterase [EC:3.1.2.22] | 45535 |
| K08720 | outer membrane protein OmpU | 45487 |
| K02697 | photosystem I subunit IX | 44336 |
| K02110 | F-type H+-transporting ATPase subunit c | 41208 |
| K03704 | cold shock protein | 40770 |
| K10408 | dynein axonemal heavy chain | 39878 |
| K04078 | chaperonin GroES | 38936 |
| K02128 | F-type H+-transporting ATPase subunit c | 37358 |
| K02014 | iron complex outermembrane recepter protein | 37218 |
| K00412 | ubiquinol-cytochrome c reductase cytochrome b subunit | 36617 |
| K15210 | snRNA-activating protein complex subunit 3 | 35474 |
| K03040 | DNA-directed RNA polymerase subunit alpha [EC:2.7.7.6] | 34003 |
| K02126 | F-type H+-transporting ATPase subunit a | 32951 |
| K00410 | ubiquinol-cytochrome c reductase cytochrome b/c1 subunit | 32685 |
| K15551 | taurine transport system substrate-binding protein | 32549 |
| K02950 | small subunit ribosomal protein S12 | 32518 |
| K09969 | general L-amino acid transport system substrate-binding protein | 32510 |
| K15738 | ABC transport system ATP-binding/permease protein | 32359 |
| K02261 | cytochrome c oxidase subunit 2 | 31900 |
| K04659 | thrombospondin 2/3/4/5 | 31820 |
| K02262 | cytochrome c oxidase subunit 3 | 31691 |
| K02055 | putative spermidine/putrescine transport system substrate-binding protein | 30024 |
| K07335 | basic membrane protein A and related proteins | 28741 |
| K13049 | carboxypeptidase PM20D1 [EC:3.4.17.-] | 28712 |
| K02712 | photosystem II PsbK protein | 28349 |
| K02992 | small subunit ribosomal protein S7 | 26946 |
| K02027 | multiple sugar transport system substrate-binding protein | 26593 |
| K02864 | large subunit ribosomal protein L10 | 26404 |
| K02108 | F-type H+-transporting ATPase subunit a | 26390 |
| K03530 | DNA-binding protein HU-beta | 26093 |
| K02114 | F-type H+-transporting ATPase subunit epsilon | 25872 |
| K02706 | photosystem II P680 reaction center D2 protein [EC:1.10.3.9] | 25397 |
| K02935 | large subunit ribosomal protein L7/L12 | 25388 |
| K04968 | transient receptor potential cation channel subfamily C member 5 | 25164 |
| K02274 | cytochrome c oxidase subunit I [EC:7.1.1.9] | 24163 |
| K00368 | nitrite reductase (NO-forming) [EC:1.7.2.1] | 23730 |
| K02109 | F-type H+-transporting ATPase subunit b | 23473 |
| K07795 | putative tricarboxylic transport membrane protein | 22989 |
| K02640 | cytochrome b6-f complex subunit 5 | 22808 |
| K11244 | cell wall integrity and stress response component | 22290 |
| K04077 | chaperonin GroEL [EC:5.6.1.7] | 22116 |
| K10944 | methane/ammonia monooxygenase subunit A [EC:1.14.18.3 1.14.99.39] | 20857 |
| K07375 | tubulin beta | 20511 |
| K02058 | simple sugar transport system substrate-binding protein | 20436 |
| K02690 | photosystem I P700 chlorophyll a apoprotein A2 | 20313 |
| K17644 | norsolorinic acid ketoreductase [EC:1.1.1.349] | 20203 |
| K02996 | small subunit ribosomal protein S9 | 20109 |
| K02952 | small subunit ribosomal protein S13 | 20018 |
| K01602 | ribulose-bisphosphate carboxylase small chain [EC:4.1.1.39] | 20002 |
| K02874 | large subunit ribosomal protein L14 | 19822 |
| K02887 | large subunit ribosomal protein L20 | 19548 |
| K03935 | NADH dehydrogenase (ubiquinone) Fe-S protein 2 [EC:7.1.1.2] | 19480 |
| K02982 | small subunit ribosomal protein S3 | 19430 |
| K02945 | small subunit ribosomal protein S1 | 19321 |
| K02111 | F-type H+/Na+-transporting ATPase subunit alpha [EC:7.1.2.2 7.2.2.1] | 19255 |
| K00053 | ketol-acid reductoisomerase [EC:1.1.1.86] | 19188 |
| K02906 | large subunit ribosomal protein L3 | 19139 |
| K06806 | cadherin 19, type 2 | 18980 |
| K02035 | peptide/nickel transport system substrate-binding protein | 18898 |
| K02954 | small subunit ribosomal protein S14 | 18745 |
| K02133 | F-type H+-transporting ATPase subunit beta [EC:7.1.2.2] | 18269 |
| K00138 | aldehyde dehydrogenase [EC:1.2.1.-] | 18228 |
| K19824 | rubrerythrin | 18202 |
| K16815 | calcium-independent phospholipase A2-gamma | 17831 |
| K00344 | NADPH:quinone reductase [EC:1.6.5.5] | 17640 |
| K02965 | small subunit ribosomal protein S19 | 17463 |
| K03574 | 8-oxo-dGTP diphosphatase [EC:3.6.1.55] | 17142 |
| K05849 | solute carrier family 8 (sodium/calcium exchanger) | 17097 |
| K20410 | target of rapamycin complex 2 subunit MAPKAP1/AVO1 | 16779 |
| K01999 | branched-chain amino acid transport system substrate-binding protein | 16584 |
| K02899 | large subunit ribosomal protein L27 | 16160 |
| K04564 | superoxide dismutase, Fe-Mn family [EC:1.15.1.1] | 16108 |
| K04641 | bacteriorhodopsin | 16015 |
| K11253 | histone H3 | 15958 |
| K02638 | plastocyanin | 15927 |
The gene list is from Royo-Llonch et al. [1]. It contains the KEGG ortholog (KO) and the pathway the gene is involved in. KOs of interest in the data that are not represented in the gene list are manually categorized. Shows the RNA transcripts for the carbon fixation pathways for each depth layer. To do so, one KO catalyzing the rate-limiting reaction was selected for each pathway. The Wood-Ljungdahl pathway was also included in this analysis but the representing KO was not detected, or the detected KO did not have a taxonomic annotation.
cols.ko <- c( # Sorted on abundance
"Nitrososphaerales" = "#A2A475",
"Cyanobacteriales" = "#D69C4E",
"Flavobacteriales" = "#D8B70A",
"Burkholderiales" = "#78B7C5",
"Pseudomonadales" = "#C7B19C",
"Chitinophagales" = "#00A08A",
"Rhodobacterales" = "#046C9A",
"Campylobacterales" = "#1B5331",
"Pelagibacterales" = "#972D15",
"PS1" = "#FAEFD1",
"Other" = "#D5D5D3",
"Not annotated" = "#8D8680"
) %>% rev()k <- read_tsv('data/genelist.tsv', col_types = 'cccc') %>%
filter(pathway != 'Remodelling')kos <- kofam %>%
filter(evalue < 1e-3) %>%
# Use only the KOs from the gene list
inner_join(k, by = 'ko') %>%
left_join(tax, by = "orf") %>%
replace_na(list("domain" = "Bacteria", "order" = "Not annotated", "phylum" = "Not annotated")) %>%
filter(domain %in% c("Bacteria", "Archaea")) %>%
# Add the tpm for each ORF and add the metadata
inner_join(tpm, by = "orf") %>%
inner_join(meta, by = "sample") %>%
mutate(
order = if_else(order %in% names(cols.ko), order, "Other"),
order = factor(order, levels = names(cols.ko))
) %>%
# Sum the tpm
group_by(pathway, metabolism, layer, order) %>% summarise(tpm = sum(tpm), .groups = "drop")Warning in inner_join(., k, by = "ko"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 20998 of `x` matches multiple rows in `y`.
ℹ Row 2 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
Warning in inner_join(., tpm, by = "orf"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 23645 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
kofam %>%
filter(evalue < 1e-3) %>%
# Use only the KOs from the gene list
inner_join(k, by = 'ko') %>%
left_join(tax, by = "orf") %>%
replace_na(list("domain" = "Bacteria", "order" = "Not annotated", "phylum" = "Not annotated")) %>%
filter(domain %in% c("Bacteria", "Archaea")) %>%
# Add the tpm for each ORF and add the metadata
inner_join(tpm, by = "orf") %>%
filter(metabolism %in% c('N metabolism','P metabolism','Central C metabolism')) %>%
group_by(order) %>% summarise(n = sum(tpm)) %>% slice_max(n, n = 11)Warning in inner_join(., k, by = "ko"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 20998 of `x` matches multiple rows in `y`.
ℹ Row 2 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
Warning in inner_join(., tpm, by = "orf"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 23645 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
# A tibble: 11 × 2
order n
<chr> <dbl>
1 Nitrososphaerales 442029.
2 PS1 32018.
3 Not annotated 25433.
4 Pelagibacterales 21718.
5 Pseudomonadales 17107.
6 Rhodospirillales_A 11230.
7 SAR86 3300.
8 Burkholderiales 2091.
9 SAR324 1977.
10 Flavobacteriales 1799.
11 HIMB59 1700.
p1 <- kos %>%
filter(metabolism == "Central C metabolism") %>%
filter(!pathway %in% c('Uronate pathway (udh)','ED pathway (edd)','PRPP biosynthesis (RPPK)')) %>%
ggplot(aes(
x = tpm,
y = paste(pathway, layer) %>% fct_rev(),
fill = fct_relevel(order, names(cols.ko))
)) +
geom_col(show.legend = TRUE) +
scale_fill_manual(values = cols.ko, drop = FALSE) +
scale_y_discrete(
labels = . %>% gsub('.* chlMax', '', .) %>% gsub(' 10m', '', .)
) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "TPM", y = "") +
theme_tidy(fontsize = 5)
p1The genes listed below are typically catalyze the rate limiting step of the pathway. Like all other analysis, filtering is done based on the KOs and grouping of the KOs is avoided.
i <- k %>%
filter(pathway == 'N fixation') %>%
mutate(
layer = '10m',
order = 'Other',
tpm = 0
) %>%
mutate(layer = if_else(ko == 'K02588', 'chlMax', layer)) %>%
select(-ko, -ko_definition)
p2 <- kos %>%
filter(metabolism == "N metabolism") %>%
rbind(i) %>%
mutate(pathway = if_else(pathway == 'Denitrification', 'Nitrite reductase (nirK)', pathway)) %>%
filter(pathway != 'Urea metabolism') %>%
ggplot(aes(
x = tpm,
y = paste(pathway, layer) %>% fct_rev(),
fill = fct_relevel(order, names(cols.ko))
)) +
geom_col(show.legend = TRUE) +
scale_fill_manual(values = cols.ko, drop = FALSE) +
scale_y_discrete(
labels = . %>% gsub('.* chlMax', '', .) %>% gsub(' 10m', '', .)
) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "TPM", y = "") +
theme_tidy(fontsize = 5)
p2p3 <- kos %>%
filter(metabolism == "P metabolism") %>%
ggplot(aes(
x = tpm,
y = paste(pathway, layer) %>% fct_rev(),
fill = fct_relevel(order, names(cols.ko))
)) +
geom_col(show.legend = TRUE) +
scale_fill_manual(values = cols.ko, drop = FALSE) +
scale_y_discrete(
labels = . %>% gsub('.* chlMax', '', .) %>% gsub(' 10m', '', .)
) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "TPM", y = "") +
theme_tidy(fontsize = 5)
p3Figure 10 shows the tpm for each representative KO summed over all the samples (n = 20). N transformation, central C metabolism, and photosynthesis plus aerobic respiration. Udh uronate dehydrogenase, edd phosphogluconate dehydratase, icd isocitrate dehydrogenase, ICL isocitrate lyase, G6PD glucose-6-phosphate 1-dehydrogenase, PRPP phosphoribosyl pyrophosphate, RPPK ribose-phosphate pyrophosphokinase, ED: Entner–Doudoroff. Note how the transcripts involved in carbon fixation are predominantly represented by the Streptomycetales (Streptomyces sp.) and the Burkholderiales (Nitrosomonas sp.) while only few transcripts were detected affiliated with cyanobacteria. The oxidation from ammonia to nitrite (nitrification) is performed by Nitrosopumilus sp. while the further oxidation from nitrite to nitrate (K00370) is done by a representative from the family Nitrospinaceae.
(p1 + p2 + p3) + plot_annotation(tag_levels = list(c('c','d','e'))) + plot_layout(guides = 'collect') &
theme(
axis.ticks.y = element_blank(),
legend.position = 'bottom',
legend.key.height = unit(3.5, "mm"), legend.key.width = unit(2.5, "mm"),
legend.text = element_text(margin = margin(l = 2)),
legend.margin = margin(t = -2),
plot.margin = margin(t = 10),
plot.tag.location = "panel",
plot.tag.position = c(0.93, 0.95),
legend.title = element_blank(),
aspect.ratio = 1.4,
axis.text.y = element_text(margin = margin())
) & guides(fill = guide_legend(reverse = TRUE))ggsave("figures/ko-modules.pdf", width = 180, height = 160, units = "mm")cols.order <- c( # Sorted on abundance
"Pseudomonadales" = "#C7B19C",
"Pelagibacterales" = "#972D15",
"SAR324" = "#D69C4E",
"Marinisomatales" = "#046C9A",
"Puniceispirillales_A" = "#9986A5",
"PS1" = "#FAEFD1",
"Rhodobacterales" = "#00A08A",
"SAR86" = "#D5D5D3",
"Burkholderiales" = "#78B7C5",
"UBA8366" = "#A2A475",
"Flavobacteriales" = "#D8B70A",
"Other" = "#D5D5D3"
) %>% rev()kos <- read_tsv('data/morphology_genes.tsv.gz', col_types = 'ccdccc')
kos <- kos %>%
inner_join(tax, by = "orf") %>%
inner_join(tpm, by = "orf") %>%
inner_join(meta, by = "sample") %>%
replace_na(list("order" = "Other")) %>%
mutate(order = if_else(order %in% names(cols.order), order, "Other")) %>%
group_by(layer, label, order, category) %>% summarise(tpm = sum(tpm), .groups = "drop") %>%
mutate(order = factor(order, names(cols.order))) %>%
mutate(i = if_else(
layer == "10m", true = label, false = paste(label, layer)
))kos %>%
group_by(order) %>%
summarise(n = sum(tpm)) %>% filter(!is.na(order)) %>%
arrange(desc(n))# A tibble: 12 × 2
order n
<fct> <dbl>
1 Pseudomonadales 17963.
2 Other 16800.
3 Pelagibacterales 10111.
4 SAR324 5653.
5 Marinisomatales 5321.
6 PS1 2997.
7 Puniceispirillales_A 2628.
8 SAR86 1367.
9 Burkholderiales 1355.
10 Flavobacteriales 1328.
11 Rhodobacterales 1188.
12 UBA8366 802.
lab <- kos %>%
filter(category == "EPS biosynthesis") %>%
arrange(i) %>%
select(i) %>%
distinct() %>%
mutate(i = if_else(grepl("chlMax", i), "", i)) %>%
pull(i) %>% rev()
p1 <- kos %>%
filter(category == "EPS biosynthesis") %>%
ggplot(aes(
x = tpm,
y = fct_rev(i),
fill = order
)) +
geom_col(width = 0.85, show.legend = TRUE) +
scale_fill_manual(values = cols.order, drop = FALSE) +
labs(x = "Relative abundance (tpm)", y = "") +
theme_tidy(legend = "right", fontsize = 5) +
scale_x_continuous(labels = scales::label_comma()) +
scale_y_discrete(labels = lab) +
guides(fill = guide_legend(reverse = TRUE)) +
theme(
legend.title = element_blank(),
axis.ticks.y = element_blank(),
legend.text = element_text(margin = margin(l = 2)),
legend.key.height = unit(3.5, "mm"), legend.key.width = unit(2.5, "mm"),
)
p1lab <- kos %>%
filter(category == "Vesicle formation") %>%
arrange(i) %>%
select(i) %>%
distinct() %>%
mutate(i = if_else(grepl("chlMax", i), "", i)) %>%
pull(i) %>% rev()
p2 <- kos %>%
filter(category == "Vesicle formation") %>%
ggplot(aes(
x = tpm,
y = fct_rev(i),
fill = order
)) +
geom_col(width = 0.85, show.legend = TRUE) +
scale_fill_manual(values = cols.order, drop = FALSE) +
labs(x = "Relative abundance (tpm)", y = "") +
scale_y_discrete(labels = lab) +
theme_tidy(legend = "right", fontsize = 5) +
scale_x_continuous(labels = scales::label_comma()) +
guides(fill = guide_legend(reverse = TRUE)) +
theme(
legend.title = element_blank(),
axis.ticks.y = element_blank(),
legend.text = element_text(margin = margin(l = 2)),
legend.key.height = unit(3.5, "mm"), legend.key.width = unit(2.5, "mm"),
)
p2lab <- kos %>%
filter(category == "External appendages and membrane extensions") %>%
arrange(i) %>%
select(i) %>%
distinct() %>%
mutate(i = if_else(grepl("chlMax", i), "", i)) %>%
pull(i) %>% rev()
p3 <- kos %>%
filter(category == "External appendages and membrane extensions") %>%
ggplot(aes(
x = tpm,
y = fct_rev(i),
fill = order
)) +
geom_col(width = 0.85, show.legend = TRUE) +
scale_fill_manual(values = cols.order, drop = FALSE) +
labs(x = "Relative abundance (tpm)", y = "") +
theme_tidy(legend = "right", fontsize = 5) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_discrete(labels = lab) +
scale_x_continuous(labels = scales::label_comma()) +
theme(
legend.title = element_blank(),
axis.ticks.y = element_blank(),
legend.text = element_text(margin = margin(l = 2)),
legend.key.height = unit(3.5, "mm"), legend.key.width = unit(2.5, "mm"),
)
p3d <- read_tsv('data/genelist_maintenance.tsv', col_types = 'c') d <- d %>%
left_join(kofam, by = "ko", relationship = "many-to-many") %>%
inner_join(tax, by = "orf") %>%
inner_join(tpm, by = "orf") %>%
inner_join(meta, by = "sample") %>%
mutate(layer = factor(layer, c("10m", "chlMax"))) %>%
mutate(i = if_else(
layer == "10m", true = subcategory, paste(subcategory, layer)
)) %>%
mutate(order = if_else(is.na(order), "Other", order)) %>%
mutate(order = if_else(!order %in% names(cols.order), "Other", order)) %>%
mutate(order = factor(order, levels = names(cols.order))) %>%
group_by(layer, order, i, category) %>% summarise(tpm = sum(tpm), .groups = "drop")Warning in inner_join(., tpm, by = "orf"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 5186 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
lab <- d %>%
filter(category == "Defense system") %>%
arrange(i) %>%
select(i) %>%
distinct() %>%
mutate(i = if_else(grepl("chlMax", i), "", i)) %>%
pull(i) %>% rev()
px <- d %>%
filter(category == "Defense system") %>%
ggplot(aes(
x = tpm,
y = fct_rev(i),
fill = fct_relevel(order, names(cols.order))
)) +
geom_col(show.legend = TRUE, width = 0.8) +
scale_fill_manual(values = cols.order, drop = FALSE) +
scale_y_discrete(labels = lab) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Relative abundance (tpm)", y = "", fill = "") +
theme_tidy(fontsize = 5, legend = "right") +
guides(fill = guide_legend(reverse = T)) +
theme(
axis.ticks.y = element_blank(),
legend.key.height = unit(3.5, "mm"),
aspect.ratio = 1.4,
panel.border = element_rect(fill = "transparent", linewidth = 0.75)
)
pxlab <- d %>%
filter(category == "Osmoregulation") %>%
arrange(i) %>%
select(i) %>%
distinct() %>%
mutate(i = if_else(grepl("chlMax", i), "", i)) %>%
pull(i) %>% rev()
py <- d %>%
filter(category == "Osmoregulation") %>%
ggplot(aes(
x = tpm,
y = fct_rev(i),
fill = fct_relevel(order, names(cols.order))
)) +
geom_col(show.legend = TRUE, width = 0.8) +
scale_fill_manual(values = cols.order, drop = FALSE) +
scale_y_discrete(labels = lab) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Relative abundance (tpm)", y = "", fill = "") +
theme_tidy(fontsize = 5, legend = "right") +
guides(fill = guide_legend(reverse = T)) +
theme(
axis.ticks.y = element_blank(),
legend.key.height = unit(3.5, "mm"),
aspect.ratio = 1.4,
panel.border = element_rect(fill = "transparent", linewidth = 0.75)
)
pylab <- d %>%
filter(category == "DNA repair and recombination") %>%
arrange(i) %>%
select(i) %>%
distinct() %>%
mutate(i = if_else(grepl("chlMax", i), "", i)) %>%
pull(i) %>% rev()
pz <- d %>%
filter(category == "DNA repair and recombination") %>%
ggplot(aes(
x = tpm,
y = fct_rev(i),
fill = fct_relevel(order, names(cols.order))
)) +
geom_col(show.legend = TRUE, width = 0.8) +
scale_fill_manual(values = cols.order, drop = FALSE) +
scale_x_continuous(labels = scales::label_comma()) +
scale_y_discrete(labels = lab) +
labs(x = "Relative abundance (tpm)", y = "", fill = "") +
theme_tidy(fontsize = 5, legend = "right") +
guides(fill = guide_legend(reverse = T)) +
theme(
axis.ticks.y = element_blank(),
legend.key.height = unit(3.5, "mm"),
aspect.ratio = 1.4,
panel.border = element_rect(fill = "transparent", linewidth = 0.75)
)
pz(p1 + p2 + p3) / (px + py + pz) +
plot_annotation(tag_levels = list(c('a: EPS biosynthesis', 'b: Vesicle formation', 'c: External appendages\nand membrane extensions','d: Defense system', 'e: Osmoregulation', 'f: DNA repair and recombination'))) +
plot_layout(guides = "collect") & guides(fill = guide_legend(reverse = TRUE)) &
theme(
legend.key.height = unit(3.5, "mm"), legend.key.width = unit(2.5, "mm"),
plot.margin = margin(t = 10),
plot.tag.location = "panel",
plot.tag.position = c(0.5, 1.05), plot.tag = element_text(size = 5, face = "plain"),
axis.ticks.y = element_blank(),
legend.position = "right", legend.title = element_blank(),
legend.margin = margin(t = -10), legend.text = element_text(margin = margin(l = 2))
)ggsave(filename = "figures/maintenance.pdf", width = 180, height = 140, units = "mm")