Diurnal oscillations in gut bacterial load and composition eclipse seasonal and lifetime dynamics in wild meerkats, Suricata suricatta
Risely, A.1*, Wilhelm, K.1, Clutton-Brock, T.2, 3, 4, Manser, M.B. 3, 4, 5, and Sommer, S.1
Corresponding author: Alice Risely, alice.risely@uni-ulm.de
library(phyloseq)
library(plyr)
library(dplyr)
library(ggplot2)
library(viridis)
library("ggsci")
library(tidyverse)
library(expss)
library(metagMisc)
library(forcats)
library(decontam)
library(lubridate)
library("birk")
library(gridExtra)
library(ape)
library(ggrepel)
library(RColorBrewer)
library(speedyseq)
# session info
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
## [3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Australia.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] speedyseq_0.5.3.9002 RColorBrewer_1.1-2 ggrepel_0.8.2
## [4] ape_5.4 gridExtra_2.3 birk_2.1.2
## [7] lubridate_1.7.9 decontam_1.6.0 metagMisc_0.0.4
## [10] expss_0.10.2 forcats_0.5.0 stringr_1.4.0
## [13] purrr_0.3.4 readr_1.3.1 tidyr_1.1.0
## [16] tibble_3.0.1 tidyverse_1.3.0 ggsci_2.9
## [19] viridis_0.5.1 viridisLite_0.3.0 ggplot2_3.3.2
## [22] dplyr_1.0.0 plyr_1.8.6 phyloseq_1.30.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-143 matrixStats_0.56.0 fs_1.4.1
## [4] httr_1.4.1 tools_3.6.2 backports_1.1.7
## [7] R6_2.4.1 vegan_2.5-6 DBI_1.1.0
## [10] BiocGenerics_0.32.0 mgcv_1.8-31 colorspace_1.4-1
## [13] permute_0.9-5 ade4_1.7-15 withr_2.2.0
## [16] tidyselect_1.1.0 compiler_3.6.2 cli_2.0.2
## [19] rvest_0.3.5 Biobase_2.46.0 htmlTable_2.0.0
## [22] xml2_1.3.2 checkmate_2.0.0 scales_1.1.1
## [25] digest_0.6.27 foreign_0.8-74 rmarkdown_2.8
## [28] XVector_0.26.0 pkgconfig_2.0.3 htmltools_0.5.1.1
## [31] dbplyr_1.4.4 htmlwidgets_1.5.1 rlang_0.4.11
## [34] readxl_1.3.1 rstudioapi_0.11 generics_0.0.2
## [37] jsonlite_1.7.0 magrittr_1.5 biomformat_1.14.0
## [40] Matrix_1.2-18 Rcpp_1.0.4.6 munsell_0.5.0
## [43] S4Vectors_0.24.4 Rhdf5lib_1.8.0 fansi_0.4.1
## [46] lifecycle_0.2.0 stringi_1.4.6 yaml_2.2.1
## [49] MASS_7.3-54 zlibbioc_1.32.0 rhdf5_2.30.1
## [52] grid_3.6.2 blob_1.2.1 parallel_3.6.2
## [55] crayon_1.3.4 lattice_0.20-38 Biostrings_2.54.0
## [58] haven_2.3.1 splines_3.6.2 multtest_2.42.0
## [61] hms_0.5.3 knitr_1.33 pillar_1.4.4
## [64] igraph_1.2.5 reshape2_1.4.4 codetools_0.2-16
## [67] stats4_3.6.2 reprex_0.3.0 glue_1.4.1
## [70] evaluate_0.14 data.table_1.12.8 modelr_0.1.8
## [73] vctrs_0.3.1 foreach_1.5.0 cellranger_1.1.0
## [76] gtable_0.3.0 assertthat_0.2.1 xfun_0.22
## [79] broom_0.5.6 survival_3.2-7 iterators_1.0.12
## [82] IRanges_2.20.2 cluster_2.1.0 ellipsis_0.3.1
memory.limit(1000000)
## [1] 1e+06
# ASV table and taxonomy
meerkat_biom <-import_biom("C:/Users/risel/Dropbox/Sommer postdoc/Meerkat project/PROJECTS/MeerkatSpikeInData/Analysis updated/DATA/meerkat_otu_table.biom")
# sample metadata
meerkat_map <-import_qiime_sample_data ('C:/Users/risel/Dropbox/Sommer postdoc/Meerkat project/PROJECTS/MeerkatSpikeInData/Analysis updated/DATA/meerkat_metadata_updated.txt')
## phylo tree using fragement insertion method
meerkat_tree_FI<-read_tree("C:/Users/risel/Dropbox/Sommer postdoc/Meerkat project/PROJECTS/MeerkatSpikeInData/Analysis updated/DATA/meerkat_tree.nwk") # FI = fragemnt insertion
# merge phyloseq
meerkat_original <-merge_phyloseq(meerkat_biom, meerkat_map, meerkat_tree_FI)
meerkat_original
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 34284 taxa and 1223 samples ]:
## sample_data() Sample Data: [ 1223 samples by 8 sample variables ]:
## tax_table() Taxonomy Table: [ 34284 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 34284 tips and 34282 internal nodes ]:
## taxa are rows
meerkat<-meerkat_original
meerkat
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 34284 taxa and 1223 samples ]:
## sample_data() Sample Data: [ 1223 samples by 8 sample variables ]:
## tax_table() Taxonomy Table: [ 34284 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 34284 tips and 34282 internal nodes ]:
## taxa are rows
colnames(tax_table(meerkat)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
###get rid of the D_1 etc characters infront of tax names
taxonomy<-data.frame(meerkat@tax_table@.Data)
taxonomy$Kingdom<-substr(taxonomy$Kingdom, 6, 60)
taxonomy$Phylum<-substr(taxonomy$Phylum, 6, 60)
taxonomy$Class<-substr(taxonomy$Class, 6, 60)
taxonomy$Order<-substr(taxonomy$Order, 6, 60)
taxonomy$Family<-substr(taxonomy$Family, 6, 60)
taxonomy$Genus<-substr(taxonomy$Genus, 6, 60)
taxonomy1<-tax_table(as.matrix(taxonomy))
tax_table(meerkat)<-taxonomy1
meerkat
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 34284 taxa and 1223 samples ]:
## sample_data() Sample Data: [ 1223 samples by 8 sample variables ]:
## tax_table() Taxonomy Table: [ 34284 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 34284 tips and 34282 internal nodes ]:
## taxa are rows
##filter taxa that are not bacteria or not assigned at phylum level
meerkat <- meerkat %>%
subset_taxa(
Kingdom == "Bacteria")
meerkat
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 34012 taxa and 1223 samples ]:
## sample_data() Sample Data: [ 1223 samples by 8 sample variables ]:
## tax_table() Taxonomy Table: [ 34012 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 34012 tips and 34010 internal nodes ]:
## taxa are rows
meerkat <- meerkat %>%
subset_taxa(
Phylum != "NA")
meerkat
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 33811 taxa and 1223 samples ]:
## sample_data() Sample Data: [ 1223 samples by 8 sample variables ]:
## tax_table() Taxonomy Table: [ 33811 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 33811 tips and 33809 internal nodes ]:
## taxa are rows
meerkat <- meerkat %>%
subset_taxa(
Family != "Mitochondria")
meerkat
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31596 taxa and 1223 samples ]:
## sample_data() Sample Data: [ 1223 samples by 8 sample variables ]:
## tax_table() Taxonomy Table: [ 31596 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 31596 tips and 31594 internal nodes ]:
## taxa are rows
meerkat <- meerkat %>%
subset_taxa(
Class != "Chloroplast")
meerkat <- meerkat %>%
subset_taxa(
Order != "Chloroplast")
meerkat
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31587 taxa and 1223 samples ]:
## sample_data() Sample Data: [ 1223 samples by 8 sample variables ]:
## tax_table() Taxonomy Table: [ 31587 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 31587 tips and 31585 internal nodes ]:
## taxa are rows
paste(100-((sum(taxa_sums(meerkat))/sum(taxa_sums(meerkat_original))) *100), "% reads removed")
## [1] "1.68412612071259 % reads removed"
#### lab contaminants
meerkat<-subset_samples(meerkat, Storage !="ZYMO")
sample_data(meerkat)$Sample_or_Control<-ifelse(sample_data(meerkat)$Storage == "EXTRACTION CONTROL" | sample_data(meerkat)$Storage == "PCR CONTROL", "CONTROL", "TRUE SAMPLE")
table(sample_data(meerkat)$Sample_or_Control)
##
## CONTROL TRUE SAMPLE
## 31 1191
df <- as.data.frame(sample_data(meerkat)) # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(meerkat)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color=Sample_or_Control)) +
geom_point()+
ggtitle("Sequencing depth of negative controls")
############ prevalence method
sample_data(meerkat)$is.neg <- sample_data(meerkat)$Sample_or_Control == "CONTROL"
contamdf.prev <- isContaminant(meerkat, method="prevalence", neg="is.neg")
table(contamdf.prev$contaminant)
##
## FALSE TRUE
## 31576 11
contaminants<- row.names(subset(contamdf.prev, contaminant == TRUE))
contaminants_phylo<-prune_taxa(contaminants, meerkat)
### rmeove contaminants
taxa.names<-taxa_names(meerkat)
ToKeep <- taxa.names[!(taxa.names %in% contaminants)]
meerkat_nocontams<-prune_taxa(ToKeep, meerkat)
100-((sum(taxa_sums(meerkat_nocontams))/sum(taxa_sums(meerkat))) *100)
## [1] 0.00292979
# 0.003% reads removed
unique(sample_data(meerkat_nocontams)$Storage)
## [1] FROZEN FREEZEDRIED FREEZEDRIED_RNALATER
## [4] SAND EXTRACTION CONTROL PCR CONTROL
## [7] SAND-SAMPLE
## 7 Levels: EXTRACTION CONTROL FREEZEDRIED FREEZEDRIED_RNALATER ... SAND-SAMPLE
sandcontamination<-subset_samples(meerkat_nocontams, sample_data(meerkat_nocontams)$Storage != "EXTRACTION CONTROL" & sample_data(meerkat_nocontams)$Storage != "PCR CONTROL"& sample_data(meerkat_nocontams)$Storage != "ZYMO" & sample_data(meerkat_nocontams)$Storage != "SAND-SAMPLE")
sandcontamination1<-subset_samples(meerkat_nocontams, sample_data(meerkat_nocontams)$Storage != "EXTRACTION CONTROL" & sample_data(meerkat_nocontams)$Storage != "PCR CONTROL"& sample_data(meerkat_nocontams)$Storage != "ZYMO" & sample_data(meerkat_nocontams)$Storage != "SAND-SAMPLE")
unique(sample_data(sandcontamination)$Storage)
## [1] FROZEN FREEZEDRIED FREEZEDRIED_RNALATER
## [4] SAND
## Levels: FREEZEDRIED FREEZEDRIED_RNALATER FROZEN SAND
########
sample_data(sandcontamination)$is.neg <- sample_data(sandcontamination)$Storage == "SAND"
contamdf.prev <- decontam::isContaminant(sandcontamination, method="prevalence", neg="is.neg", threshold = 0.25)
#to be less strict use this threshold:
#contamdf.prev <- isContaminant(sandcontamination, method="prevalence", neg="is.neg", threshold = 0.2)
table(contamdf.prev$contaminant)
##
## FALSE TRUE
## 28919 2657
sample_data(sandcontamination)$Sand_or_Faecal<-ifelse(sample_data(sandcontamination)$Storage == "SAND", "SAND", "FAECAL")
ps.pa <- transform_sample_counts(sandcontamination, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Sand_or_Faecal == "SAND", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Sand_or_Faecal == "FAECAL", ps.pa)
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf.prev$contaminant)
df.pa$ASV<-row.names(df.pa)
subset(df.pa, pa.neg>4 & pa.pos > 900)
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, fill=contaminant, label =ASV)) +
geom_jitter(alpha = 0.5, size = 3, pch = 21, col = "black") +
theme_bw(base_size = 16)+
xlab("Prevalence (Sand samples)") +
ylab("Prevalence (Faecal Samples)")+
ggtitle("Choosing sand contaminants")+
# scale_y_log10()+
geom_text_repel( data = subset(df.pa, pa.neg>10 & pa.pos > 900))+
geom_text_repel( data = subset(df.pa, pa.neg>5 & pa.neg <7 & pa.pos > 300 & pa.pos < 500 ))+
scale_fill_manual(values = c("skyblue", "red"))
### CHANGE ONE ASV TO CONTAMINANT
df.pa$contaminant<-ifelse(df.pa$pa.neg>5 & df.pa$pa.neg <7 & df.pa$pa.pos > 300 & df.pa$pa.pos < 500 , "TRUE", df.pa$contaminant)
ggplot(data=df.pa, aes(x=pa.neg/max(pa.neg), y=pa.pos/max(pa.pos), fill=contaminant, label =ASV)) +
geom_jitter(alpha = 0.5, size = 3, pch = 21, col = "black") +
theme_bw(base_size = 12)+
xlab("Prevalence (Sand samples)") +
ylab("Prevalence (Faecal samples)")+
ggtitle("Choosing sand contaminants")+
scale_fill_manual(values = c("skyblue", "red"))
sand_beta<-sandcontamination1
sand_beta<-subset_samples(sand_beta, sample_sums(sand_beta)>1000)
sand_beta<- phyloseq::rarefy_even_depth(sand_beta, sample.size = 6000,rngsee = 100, replace = TRUE, trimOTUs=TRUE,verbose=TRUE)
sample_data(sand_beta)$Storage <-ifelse(sample_data(sand_beta)$Storage=="FREEZEDRIED_RNALATER", "FREEZEDRIED", as.character(sample_data(sand_beta)$Storage))
## beta diversity plot
## beta diversity plot
## beta diversity plot
meerkat.ord.bray <- ordinate(sand_beta, "MDS", "bray")
plot_ordination(sand_beta, meerkat.ord.bray, "samples", color="Storage")+ggtitle("Bray Curtis")+
scale_color_viridis(discrete = TRUE, option = "D")
meerkat.ord.jac <- ordinate(sand_beta, "MDS", "jaccard")
plot_ordination(sand_beta, meerkat.ord.jac, "samples", color="Storage")+ggtitle("Jaccard")+
scale_color_viridis(discrete = TRUE, option = "D")
########### stacked barplot
########### stacked barplot
########### stacked barplot
sand_bar<-merge_samples(sand_beta, "Storage", fun=mean)
#sand_bar<-transform(sand_bar, transform = "compositional")
sand_bar<-tax_glom(sand_bar, taxrank = "Family")
sand_bar<-merge_samples(sand_beta, "Storage", fun=mean)
barplot_df <- sand_bar %>%
tax_glom(taxrank = "Phylum") %>% # agglomerate at phylum level
transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
arrange(Phylum) %>% # Sort data frame alphabetically by phylum
filter(Abundance > 0.01) # Filter out low abundance taxa
mypal = pal_npg("nrc", alpha = 0.7)(10)
ggplot(barplot_df, aes(x = Sample, y = Abundance, fill = Phylum)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = mypal)+
theme_bw(base_size = 16)
#### by family
#### by family
#### by family
barplot_df_fam <- sand_bar %>%
speedyseq::tax_glom(taxrank = "Family") %>% # agglomerate at phylum level
transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
arrange(Phylum) %>% # Sort data frame alphabetically by phylum
filter(Abundance > 0.01) # Filter out low abundance taxa
## colour palette
n <- 35
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
ggplot(barplot_df_fam, aes(x = Sample, y = Abundance, fill = Family)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = col_vector)+
theme_bw(base_size = 14)
#######
detach(package:plyr) # plyr messes with the mutate function
taxa.names<-taxa_names(meerkat_nocontams)
sand_contaminants<-row.names(subset(df.pa, contaminant == TRUE))
length(sand_contaminants)
## [1] 2658
ToKeep <- taxa.names[!(taxa.names %in% sand_contaminants)]
meerkat_nosandcontams<-prune_taxa(ToKeep, meerkat_nocontams)
meerkat_nocontams
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31576 taxa and 1222 samples ]:
## sample_data() Sample Data: [ 1222 samples by 10 sample variables ]:
## tax_table() Taxonomy Table: [ 31576 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 31576 tips and 31574 internal nodes ]:
## taxa are rows
meerkat_nosandcontams
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 28918 taxa and 1222 samples ]:
## sample_data() Sample Data: [ 1222 samples by 10 sample variables ]:
## tax_table() Taxonomy Table: [ 28918 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 28918 tips and 28917 internal nodes ]:
## taxa are rows
paste(100-((sum(taxa_sums(meerkat_nosandcontams))/sum(taxa_sums(meerkat_nocontams))) *100), "% reads removed")
## [1] "4.34253158851055 % reads removed"
paste(100-((sum(taxa_sums(meerkat_nosandcontams))/sum(taxa_sums(meerkat_original))) *100), "% reads removed")
## [1] "6.04056886555094 % reads removed"
### 28918 ASVs removed as sand contaminants
#########
unique(sample_data(meerkat_nosandcontams)$Storage )
## [1] FROZEN FREEZEDRIED FREEZEDRIED_RNALATER
## [4] SAND EXTRACTION CONTROL PCR CONTROL
## [7] SAND-SAMPLE
## 7 Levels: EXTRACTION CONTROL FREEZEDRIED FREEZEDRIED_RNALATER ... SAND-SAMPLE
meerkat_final<-subset_samples(meerkat_nosandcontams, sample_data(meerkat_nosandcontams)$Storage == "FROZEN" | sample_data(meerkat_nosandcontams)$Storage == "FREEZEDRIED" |sample_data(meerkat_nosandcontams)$Storage == "FREEZEDRIED_RNALATER")
min(taxa_sums(meerkat_final))
## [1] 0
meerkat_final <- prune_taxa(taxa_sums(meerkat_final) > 0, meerkat_final)
100 - (sum(taxa_sums(meerkat_final))/sum(taxa_sums(meerkat_nocontams)))*100
## [1] 5.129891
sum(taxa_sums(meerkat_final))/sum(taxa_sums(meerkat_original))
## [1] 0.9318605
## overal 7% of reads were removed during filtering process
imtechella<-subset_taxa(meerkat_final, Genus =="Imtechella")
allobacillus<-subset_taxa(meerkat_final, Genus =="Allobacillus")
sample_data(meerkat_final)$Imtechella<-sample_sums(imtechella)
sample_data(meerkat_final)$Allobacillus<-sample_sums(allobacillus)
sample_data(meerkat_final)$Seq_depth<-sample_sums(meerkat_final)
ggplot(sample_data(meerkat_final), aes(x = sample_data(meerkat_final)$Imtechella, y = sample_data(meerkat_final)$Allobacillus))+
geom_point(aes(fill = sample_data(meerkat_final)$Storage), pch = 21, col = "black", size = 1.5)+
theme_bw(base_size = 14)+
#scale_x_log10()+
#scale_y_log10()+
theme(legend.position = "none")+
ylab("No. Allobacillus reads")+
xlab("No. Imtechella reads")+
ggtitle("Correlation between Imtechella and Allobacillus")+
geom_abline(slope = 1)+
ylim(0,NA)+
xlim(0,NA)
ggplot(sample_data(meerkat_final), aes(x = sample_data(meerkat_final)$Imtechella, y = sample_data(meerkat_final)$Allobacillus))+
geom_point(aes(fill = sample_data(meerkat_final)$Storage), pch = 21, col = "black", size = 1.5)+
theme_bw(base_size = 14)+
scale_x_log10(limits = c(1e1,1e5))+
scale_y_log10(limits = c(1e1,1e5))+
theme(legend.position = "none")+
ylab("No. Allobacillus reads")+
xlab("No. Imtechella reads")+
ggtitle("Correlation between Imtechella and Allobacillus")+
geom_abline(slope = 1)
####################
############### remove two samples with low spike in counts (these have replicates anyway in next runs)
meerkat_final<- subset_samples(meerkat_final, Imtechella > 0 & Allobacillus > 0)
sample_data(meerkat_final)$Mean_spikein<-(sample_data(meerkat_final)$Imtechella+ sample_data(meerkat_final)$Allobacillus)/2
########## generate scaling factor: this is the number to multiply all reads with so that theoretically the abundance of ALlobacillus would be equal across all samples
sample_data(meerkat_final)$scaling_factor<-mean(sample_data(meerkat_final)$Allobacillus) /sample_data(meerkat_final)$Allobacillus
################## now remove all spike in from phylo object
################## now remove all spike in from phylo object
################## now remove all spike in from phylo object
################## now remove all spike in from phylo object
spikein<- subset_taxa(meerkat_final, Genus =="Imtechella" | Genus =="Allobacillus")
tax_table(spikein)
## Taxonomy Table: [ 13 taxa by 7 taxonomic ranks ]:
## Kingdom Phylum Class Order Family Genus Species
## <fct> <fct> <fct> <fct> <fct> <fct> <fct>
## 1 1c4da9ec7e9217~ Bacteria Firmicu~ Bacil~ Bacill~ Bacilla~ Allob~ <NA>
## 2 b92c7cfb137fee~ Bacteria Firmicu~ Bacil~ Bacill~ Bacilla~ Allob~ D_6__Alloba~
## 3 098b1d1068844c~ Bacteria Firmicu~ Bacil~ Bacill~ Bacilla~ Allob~ D_6__Alloba~
## 4 973fa08cb5cc57~ Bacteria Firmicu~ Bacil~ Bacill~ Bacilla~ Allob~ D_6__Alloba~
## 5 8d64369815c986~ Bacteria Firmicu~ Bacil~ Bacill~ Bacilla~ Allob~ D_6__Alloba~
## 6 b1b62a9ef68483~ Bacteria Firmicu~ Bacil~ Bacill~ Bacilla~ Allob~ <NA>
## 7 2d904743b53167~ Bacteria Bactero~ Bacte~ Flavob~ Flavoba~ Imtec~ D_6__Imtech~
## 8 69822a606cf2da~ Bacteria Bactero~ Bacte~ Flavob~ Flavoba~ Imtec~ D_6__Imtech~
## 9 59f5254a41f617~ Bacteria Bactero~ Bacte~ Flavob~ Flavoba~ Imtec~ D_6__Imtech~
## 10 e62d5872faf30e~ Bacteria Bactero~ Bacte~ Flavob~ Flavoba~ Imtec~ D_6__Imtech~
## 11 941351349c123e~ Bacteria Bactero~ Bacte~ Flavob~ Flavoba~ Imtec~ D_6__Imtech~
## 12 8d4c77cdd516a4~ Bacteria Bactero~ Bacte~ Flavob~ Flavoba~ Imtec~ D_6__Imtech~
## 13 71037fedb85db4~ Bacteria Bactero~ Bacte~ Flavob~ Flavoba~ Imtec~ D_6__Imtech~
otu_table(spikein)
## OTU Table: [ 13 taxa and 1170 samples ]:
## Taxa are rows
## `1012_run2` `1012` `10158_run2` `10158` `10789_run2` `10789`
## 1 1c4da9ec7~ 0 0 0 0 0 0
## 2 b92c7cfb1~ 396 331 486 562 372 301
## 3 098b1d106~ 0 0 0 0 0 0
## 4 973fa08cb~ 0 0 0 0 0 0
## 5 8d6436981~ 0 0 0 0 0 0
## 6 b1b62a9ef~ 0 0 0 0 0 0
## 7 2d904743b~ 314 247 497 539 383 252
## 8 69822a606~ 0 0 0 0 0 0
## 9 59f5254a4~ 0 0 0 0 0 0
## 10 e62d5872f~ 0 0 0 0 0 0
## 11 941351349~ 0 0 0 0 0 0
## 12 8d4c77cdd~ 0 0 0 0 0 0
## 13 71037fedb~ 0 0 0 0 0 0
## # ... with 11 more samples (columns)
spikein_list<-taxa_names(spikein)
taxa.names<-taxa_names(meerkat_final)
ToKeep <- taxa.names[!(taxa.names %in% spikein_list)]
meerkat_final<-prune_taxa(ToKeep, meerkat_final)
### add sample density to metadata file
sample_data(meerkat_final)$Seq_depth_nospikein<-sample_sums(meerkat_final)
meerkat_nocontams<-meerkat_final ## save copy
spikein<-data.frame(sample_data(meerkat_final))
spikein$spikein_total<-spikein$Imtechella + spikein$Allobacillus
spikein$Percent_spikein<-spikein$spikein_total/ spikein$Seq_depth
summary(spikein$Percent_spikein)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001973 0.015558 0.033224 0.104153 0.113640 0.918437
ggplot(spikein, aes(x= Percent_spikein))+
geom_histogram(bins = 50, fill = "grey", col = "black")+
theme_light(base_size = 14)+
# ylim(0,1)+
# theme(axis.text.x = element_blank())+
ylab("Frequency")+
xlab("Proportion spike-in")+
ggtitle("Histogram of spike-in proportion per sample")