This script plots Figure 3abc: Additive drug effects on the microbiome features.

Scatterplots show microbiome features (a. Gene richness; b. Total abundance of antibiotic resistance genes; c. The first principal component of gut species composition) significantly associated with the number of antibiotics courses in the last 5 years in control and T2D subjects separately (with lines and gray area 95% CI for linear regression).

Boxplots (box showing median and quartiles, whiskers 1.5 interquartile range) show the comparisons in antibiotics-naïve and antibiotics-exposed control and T2D subjects, respectively, with pairwise significances (two-sided MWU tests, FDR-adjusted).

Required file in the input_data folder:

Supplementary Tables 1 to 4 include information on the cohort characteristics and drug intake with source metadata. Source metadata for the MetaCardis cohort includes disease group, gender, age, study center, body mass index (BMI, kg/m2), alternative healthy eating index (aHEI), diet diversity score (DDS), and dietary approaches to stop hypertension (DASH), physical activity and manual work levels, and smoking status. Source data includes information on the drug intake, drug combinations, dosage and antibiotic use analyzed in the MetaCardis cohort.

File with source data on summary microbiome features per sample (such as gene richness, abundance of antibiotic resistance genes, etc.)

File with source data on microbial species abundances estimated with mOTU software.

Load necessary libraries.

library (factoextra)
library (vegan)
library (reshape2)
library (gtools)
library (ggplot2)
library (orddom)
library (cowplot)
library (readxl)

Read antibiotic course data from Supplementary Table 2d.

fileFolder = './input_data/'
md <- read_excel(paste0(fileFolder,
                             "Supplementary_Tables_1_to_4_2019-09-13434.xlsx"), 
                      sheet = "ST 2d")

Leave only patients with information on antibiotic courses (column ANTIBIOTICS_TOTAL)

md <- subset (md, ! is.na (ANTIBIOTICS_TOTAL))
md <- as.data.frame(md)
row.names (md) <- md$SampleID

Get microbiome diversity data from files.

ssd <- read.table (file = paste0(fileFolder,
                                 "hub.microbiome.summary.down.10000000.r"),
                   header = T, check.names = F, sep = "\t")
ssdata <- dcast (ssd, value.var = "FeatureValue", formula = SampleID ~ Feature)
row.names (ssdata) <- ssdata$SampleID

Keep only samples with both antibiotic and microbiome data.

md <- subset (md, SampleID %in% intersect (row.names (md), row.names (ssdata)))
ssdata <- subset (ssdata, SampleID %in% intersect (row.names (md), row.names (ssdata)))

md <- merge (ssdata, md, by = "SampleID")
row.names (md) <- md$SampleID

Restrict analysis to diabetics without AB exposure and to controls

comp1 <- subset (md, (ANTIBIOTICS_TOTAL == 0 & PatientGroup == "3") | PatientGroup == "8")$SampleID

Load metagenome data from file.

data <- read.table (file = paste0(fileFolder, 
                                  "hub.cellcount.motu.Species.v2.data.frame.r"),
                    header = T, sep = "\t")
data <- subset (data, SampleID %in% row.names (md))

# subset samples from the first comparison
data1 <- subset (data, SampleID %in% comp1)

#reshape into matrices
ddata <- dcast (data, formula = SampleID ~ Feature, value.var = "FeatureValue")
ddata1 <- dcast (data, formula = SampleID ~ Feature, value.var = "FeatureValue")
row.names (ddata) <- ddata$SampleID
row.names (ddata1) <- ddata1$SampleID
ddata <- ddata [, -1]
ddata1 <- ddata1 [, -1]
ddata <- na.omit (ddata)
ddata1 <- na.omit (ddata1)

md <- subset (md, SampleID %in% row.names (ddata))

Perform PCA analysis

cds <- prcomp (ddata1, retx = T)

Calculate proportion of variance explained by each component

cds.var.explained = cds$sdev^2
cds.var.explained <- cds.var.explained/sum(cds.var.explained)
# display how much variance explained by the first and second components
print(paste0('PC1 explains ', round(cds.var.explained[1]*100), '% of variance'))
## [1] "PC1 explains 45% of variance"
print(paste0('PC2 explains ', round(cds.var.explained[2]*100), '% of variance'))
## [1] "PC2 explains 8% of variance"

Project all samples into the PC space

ccds <- as.matrix (ddata) %*% cds$rotation
ccds <- merge (ccds, md, by = 0)

Testing AB associations

# corr between AB courses and PC in controls
ccdssub1 <- subset (ccds, PatientGroup == "8")
ccc <- cor.test (ccdssub1$PC1, ccdssub1$ANTIBIOTICS_TOTAL, method = "spearman")
print(paste0('Size of control group: N = ',
             dim(ccdssub1)[1]))
## [1] "Size of control group: N = 256"
print(paste0('Spearman corr between PC1 and AB courses in controls is ',
             ccc$estimate, ' (p-value=', ccc$p.value,')'))
## [1] "Spearman corr between PC1 and AB courses in controls is 0.265087871023232 (p-value=1.72512473276787e-05)"
# corr between AB courses and PC in T2D patients
ccdssub2 <- subset (ccds, PatientGroup == "3")
ccc<-cor.test (ccdssub2$PC1, ccdssub2$ANTIBIOTICS_TOTAL, method = "spearman")
print(paste0('Size of T2D group: N = ',
             dim(ccdssub2)[1]))
## [1] "Size of T2D group: N = 456"
print(paste0('Spearman corr between PC1 and AB courses T2D group is ',
             ccc$estimate, ' (p-value=', ccc$p.value,')'))
## [1] "Spearman corr between PC1 and AB courses T2D group is 0.158001297661567 (p-value=0.000709151882900177)"

Perform Wilcoxon test for principal components between patient and control groups on ABX and not.

s8na <- subset (ccds, ANTIBIOTICS_TOTAL == 0 & PatientGroup == "8")
s8a <- subset (ccds, ANTIBIOTICS_TOTAL > 0 & PatientGroup == "8")
s3na <- subset (ccds, ANTIBIOTICS_TOTAL == 0 & PatientGroup == "3")
s3a <- subset (ccds, ANTIBIOTICS_TOTAL > 0 & PatientGroup == "3")

print(paste0('Size of CTRL no-abx subgroup: N = ', dim(s8na)[1]))
## [1] "Size of CTRL no-abx subgroup: N = 148"
print(paste0('Size of CTRL abx subgroup: N = ', dim(s8a)[1]))
## [1] "Size of CTRL abx subgroup: N = 108"
print(paste0('Size of T2D no-abx subgroup: N = ', dim(s3na)[1]))
## [1] "Size of T2D no-abx subgroup: N = 274"
print(paste0('Size of T2D abx subgroup: N = ', dim(s3a)[1]))
## [1] "Size of T2D abx subgroup: N = 182"
print (paste ("PC1 in T2D no-abx vs T2D abx Wilcoxon p-value=", wilcox.test (s3na$PC1, s3a$PC1)$p.value))
## [1] "PC1 in T2D no-abx vs T2D abx Wilcoxon p-value= 0.000126116209498228"
print (paste ("PC1 in CTRL no-abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s8na$PC1, s8a$PC1)$p.value))
## [1] "PC1 in CTRL no-abx vs CTRL abx Wilcoxon p-value= 0.000138652362988215"
print (paste ("PC1 in T2D no-abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s3na$PC1, s8a$PC1)$p.value))
## [1] "PC1 in T2D no-abx vs CTRL abx Wilcoxon p-value= 1.29899642577588e-05"
print (paste ("PC1 in T2D abx vs CTRL no-abx Wilcoxon p-value=", wilcox.test (s3a$PC1, s8na$PC1)$p.value))
## [1] "PC1 in T2D abx vs CTRL no-abx Wilcoxon p-value= 0.00216343461661439"
print (paste ("PC1 in T2D abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s8a$PC1, s3a$PC1)$p.value))
## [1] "PC1 in T2D abx vs CTRL abx Wilcoxon p-value= 0.514068173552819"
print (paste ("PC1 in T2D no-abx vs CTRL no-abx Wilcoxon p-value=", wilcox.test (s3na$PC1, s8na$PC1)$p.value))
## [1] "PC1 in T2D no-abx vs CTRL no-abx Wilcoxon p-value= 0.564143371164403"

Calculate correlation between ABX courses and gene richness.

# corr between AB courses and Gene richness in controls
ccdssub1 <- subset (ccds, PatientGroup == "8")
ccc <- cor.test (ccdssub1$GeneCount10M, ccdssub1$ANTIBIOTICS_TOTAL, 
                 method = "spearman")
print(paste0('Spearman corr between Gene richness and AB courses in controls is ',
             ccc$estimate, ' (p-value=', ccc$p.value,')'))
## [1] "Spearman corr between Gene richness and AB courses in controls is -0.254968562752882 (p-value=3.65992459560624e-05)"
# corr between AB courses and Gene richness in T2D patients
ccdssub2 <- subset (ccds, PatientGroup == "3")
ccc<-cor.test (ccdssub2$GeneCount10M, ccdssub2$ANTIBIOTICS_TOTAL, method = "spearman")
print(paste0('Spearman corr between Gene richness and AB courses T2D group is ',
             ccc$estimate, ' (p-value=', ccc$p.value,')'))
## [1] "Spearman corr between Gene richness and AB courses T2D group is -0.103221157477783 (p-value=0.0275214518917414)"

Perform Wilcoxon test for gene richness between patient and control groups on ABX and not.

print (paste ("Gene richness in T2D no-abx vs T2D abx Wilcoxon p-value=", wilcox.test (s3na$GeneCount10M, s3a$GeneCount10M)$p.value))
## [1] "Gene richness in T2D no-abx vs T2D abx Wilcoxon p-value= 0.0357604376912736"
print (paste ("Gene richness in CTRL no-abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s8na$GeneCount10M, s8a$GeneCount10M)$p.value))
## [1] "Gene richness in CTRL no-abx vs CTRL abx Wilcoxon p-value= 8.54803194228686e-05"
print (paste ("Gene richness in T2D no-abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s3na$GeneCount10M, s8a$GeneCount10M)$p.value))
## [1] "Gene richness in T2D no-abx vs CTRL abx Wilcoxon p-value= 1.14031373317808e-07"
print (paste ("Gene richness in T2D abx vs CTRL no-abx Wilcoxon p-value=", wilcox.test (s3a$GeneCount10M, s8na$GeneCount10M)$p.value))
## [1] "Gene richness in T2D abx vs CTRL no-abx Wilcoxon p-value= 4.38434451226163e-25"
print (paste ("Gene richness in T2D abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s8a$GeneCount10M, s3a$GeneCount10M)$p.value))
## [1] "Gene richness in T2D abx vs CTRL abx Wilcoxon p-value= 2.21639033163604e-11"
print (paste ("Gene richness in T2D no-abx vs CTRL no-abx Wilcoxon p-value=", wilcox.test (s3na$GeneCount10M, s8na$GeneCount10M)$p.value))
## [1] "Gene richness in T2D no-abx vs CTRL no-abx Wilcoxon p-value= 7.86774007682829e-21"

Calculate correlation between ABX courses and CARD antibiotic resistance genes.

# corr between AB courses and AMR gene abundance in controls
ccdssub1 <- subset (ccds, PatientGroup == "8")
ccc <- cor.test (ccdssub1$CARD10M, ccdssub1$ANTIBIOTICS_TOTAL, 
                 method = "spearman")
print(paste0('Spearman corr between AMR gene abundance and AB courses in controls is ',
             ccc$estimate, ' (p-value=', ccc$p.value,')'))
## [1] "Spearman corr between AMR gene abundance and AB courses in controls is 0.301253926985785 (p-value=9.05956673768446e-07)"
# corr between AB courses and AMR gene abundance in T2D patients
ccdssub2 <- subset (ccds, PatientGroup == "3")
ccc<-cor.test (ccdssub2$CARD10M, ccdssub2$ANTIBIOTICS_TOTAL, method = "spearman")
print(paste0('Spearman corr between AMR gene abundance and AB courses T2D group is ',
             ccc$estimate, ' (p-value=', ccc$p.value,')'))
## [1] "Spearman corr between AMR gene abundance and AB courses T2D group is 0.197946108988909 (p-value=2.06660198522477e-05)"

Perform Wilcoxon test for CARD antibiotic resistance genes between patient and control groups on ABX and not.

print (paste ("AMR gene abundance in T2D no-abx vs T2D abx Wilcoxon p-value=", wilcox.test (s3na$CARD10M, s3a$CARD10M)$p.value))
## [1] "AMR gene abundance in T2D no-abx vs T2D abx Wilcoxon p-value= 4.6524066925419e-06"
print (paste ("AMR gene abundance in CTRL no-abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s8na$CARD10M, s8a$CARD10M)$p.value))
## [1] "AMR gene abundance in CTRL no-abx vs CTRL abx Wilcoxon p-value= 3.69284597309138e-05"
print (paste ("AMR gene abundance in T2D no-abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s3na$CARD10M, s8a$CARD10M)$p.value))
## [1] "AMR gene abundance in T2D no-abx vs CTRL abx Wilcoxon p-value= 0.123594822473376"
print (paste ("AMR gene abundance in T2D abx vs CTRL no-abx Wilcoxon p-value=", wilcox.test (s3a$CARD10M, s8na$CARD10M)$p.value))
## [1] "AMR gene abundance in T2D abx vs CTRL no-abx Wilcoxon p-value= 1.83041910837966e-11"
print (paste ("AMR gene abundance in T2D abx vs CTRL abx Wilcoxon p-value=", wilcox.test (s8a$CARD10M, s3a$CARD10M)$p.value))
## [1] "AMR gene abundance in T2D abx vs CTRL abx Wilcoxon p-value= 0.00389285796963969"
print (paste ("AMR gene abundance in T2D no-abx vs CTRL no-abx Wilcoxon p-value=", wilcox.test (s3na$CARD10M, s8na$CARD10M)$p.value))
## [1] "AMR gene abundance in T2D no-abx vs CTRL no-abx Wilcoxon p-value= 0.0211832721124605"

Select subgroups for plotting.

ccdssub2 <- subset (ccds, ANTIBIOTICS_TOTAL == 0 & (PatientGroup == "8" | PatientGroup == "3"))
# wilcox.test (ccdssub2$PC1 ~ ccdssub2$PatientGroup)
# orddom (subset (ccdssub2, PatientGroup == "8")$PC2, subset (ccdssub2, PatientGroup == "3")$PC2) [13]

ccdssub3 <- subset (ccds, PatientGroup == "8" | PatientGroup == "3")
ccdssub3$Status <- paste (as.character (10 - as.numeric (as.character(ccdssub3$PatientGroup))), as.numeric (ccdssub3$ANTIBIOTICS_TOTAL > 0))
# wilcox.test (ccdssub3$PC2 ~ ccdssub3$PatientGroup)
# orddom (subset (ccdssub3, PatientGroup == "8")$PC2, subset (ccdssub3, PatientGroup == "3")$PC2) [13]

Plot AB courses vs PC, diversity, AMR abundance

p_abx_pc1 <- ggplot (ccdssub3, aes (x = ANTIBIOTICS_TOTAL, y = PC1)) + 
  theme_classic () + theme (legend.position = "none") + 
  geom_point (size = 0.5, alpha = 0.5, aes (colour = as.factor (PatientGroup))) + 
  geom_smooth (method = "lm", aes (colour = as.factor (PatientGroup))) + 
  coord_cartesian (ylim = c (-35000000000, 35000000000))#35000000000)) 

p_t2d_pc1 <- ggplot (ccdssub3, aes (x = as.factor (Status), y = PC1)) + 
  theme_classic () + theme (legend.position = "none") + 
  geom_boxplot (aes (fill = as.factor (PatientGroup))) + 
  coord_cartesian (ylim = c (-35000000000, 35000000000)) + 
  scale_x_discrete(labels= c('CTR noAB', 'CTR AB', 'T2D noAB', 'T2D AB'))

p_abx_gc <- ggplot (ccdssub3, aes (x = ANTIBIOTICS_TOTAL, y = GeneCount10M)) + 
  theme_classic () + theme (legend.position = "none") + 
  geom_point (size = 0.5, alpha = 0.5, aes (colour = as.factor (PatientGroup))) + 
  geom_smooth (method = "lm", aes (colour = as.factor (PatientGroup))) + 
  coord_cartesian (ylim = c (100000, 1000000))

p_t2d_gc <- ggplot (ccdssub3, aes (x = as.factor (Status), y = GeneCount10M)) + 
  theme_classic () + theme (legend.position = "none") + 
  geom_boxplot (aes (fill = as.factor (PatientGroup))) + 
  coord_cartesian (ylim = c (100000, 1000000)) + 
  scale_x_discrete(labels= c('CTR noAB', 'CTR AB', 'T2D noAB', 'T2D AB'))

p_abx_abx <- ggplot (ccdssub3, aes (x = ANTIBIOTICS_TOTAL, y = CARD10M)) + 
  theme_classic () + theme (legend.position = "none") + 
  geom_point (size = 0.5, alpha = 0.5, aes (colour = as.factor (PatientGroup))) + 
  geom_smooth (method = "lm", aes (colour = as.factor (PatientGroup))) + 
  coord_cartesian (ylim = c (85000, 165000)) 

p_t2d_abx <- ggplot (ccdssub3, aes (x = as.factor (Status), y = CARD10M)) + 
  theme_classic () + theme (legend.position = "none") + 
  geom_boxplot (aes (fill = as.factor (PatientGroup))) + 
  #geom_jitter(color="black", size=0.4, alpha=0.9) +
  coord_cartesian (ylim = c (85000, 165000)) + 
  scale_x_discrete(labels= c('CTR noAB', 'CTR AB', 'T2D noAB', 'T2D AB'))

Make the plot

pg <- plot_grid (p_abx_gc, p_t2d_gc, p_abx_abx, p_t2d_abx, p_abx_pc1, p_t2d_pc1,
                 ncol = 2, nrow = 3, 
                 labels = c ("", "", "", "", "", ""), align = "v")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
print (pg)

# UNCOMMENT TO SAVE FIGURE TO FILE
#pdf (file = "fig3abc_abx_courses_associations.pdf", width = 6, height = 10)
#print (pg)
#dev.off ()
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.3.1     cowplot_1.1.1    orddom_3.1       psych_2.1.3     
##  [5] gtools_3.8.2     reshape2_1.4.4   vegan_2.5-7      lattice_0.20-41 
##  [9] permute_0.9-5    factoextra_1.0.7 ggplot2_3.3.3   
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.0  xfun_0.22         bslib_0.2.4       purrr_0.3.4      
##  [5] splines_3.6.1     colorspace_2.0-0  vctrs_0.3.6       generics_0.1.0   
##  [9] htmltools_0.5.1.1 yaml_2.2.0        mgcv_1.8-34       utf8_1.2.1       
## [13] rlang_0.4.10      jquerylib_0.1.4   pillar_1.6.0      glue_1.4.2       
## [17] withr_2.4.2       DBI_1.1.1         lifecycle_1.0.0   plyr_1.8.6       
## [21] stringr_1.4.0     cellranger_1.1.0  munsell_0.5.0     gtable_0.3.0     
## [25] evaluate_0.14     labeling_0.4.2    knitr_1.33        parallel_3.6.1   
## [29] fansi_0.4.2       highr_0.9         Rcpp_1.0.6        scales_1.1.1     
## [33] jsonlite_1.7.2    tmvnsim_1.0-2     farver_2.1.0      mnormt_2.0.2     
## [37] digest_0.6.27     stringi_1.5.3     dplyr_1.0.5       ggrepel_0.9.1    
## [41] grid_3.6.1        tools_3.6.1       magrittr_2.0.1    sass_0.3.1       
## [45] tibble_3.1.0      cluster_2.1.1     crayon_1.4.1      pkgconfig_2.0.3  
## [49] ellipsis_0.3.1    MASS_7.3-53.1     Matrix_1.3-2      assertthat_0.2.1 
## [53] rmarkdown_2.7     R6_2.5.0          nlme_3.1-152      compiler_3.6.1