Load RData object containing prepared data objects
load(url("https://github.com/ipb-halle/iESTIMATE/blob/main/use-cases/mtbls709/iestimate_mtbls709.RData?raw=true"))
library(parallel) # Detect number of cpu cores
library(foreach) # For multicore parallel
library(doMC) # For multicore parallel
library(RColorBrewer) # For colors
library(multtest) # For diffreport
library(MSnbase) # MS features
library(xcms) # Swiss army knife for metabolomics
library(CAMERA) # Metabolite Profile Annotation
library(Spectra) # Spectra package needed for XCMS3
library(mixOmics) # Statistics for various Omics
# ERROR: vegan conflicts with mda and klaR, unload packages before using any of the analyses !!!
if ("package:mda" %in% search()) detach(package:mda, unload=TRUE)
if ("package:klaR" %in% search()) detach(package:klaR, unload=TRUE)
library(vegan)
library(multcomp) # For Tukey test
library(Hmisc) # For correlation test
library(gplots) # For fancy heatmaps
library(lme4) # Linear Models with Random Effects
library(lmerTest) # Create p-values for LMER
library(ape) # Phylogeny
library(pvclust) # Phylogeny
library(dendextend) # Phylogeny
library(cba) # Phylogeny
library(phangorn) # Phylogeny
library(ontologyIndex) # Reading obo ontology files
library(webchem) # Converting InChI to InChIKey
library(circlize) # For sunburst plot
library(plotrix) # For sunburst plot
library(Boruta) # Random-Forest BORUTA
library(rpart) # Regression trees
library(caret) # Swiss-army knife for statistics
library(pROC) # Evaluation metrics
library(PRROC) # Evaluation metrics
library(multiROC) # Evaluation metrics
hist(as.numeric(feat_list_pos))
model_pca_pos <- prcomp(feat_list_pos)
plot(model_pca_pos$x[, 1], model_pca_pos$x[,2], pch=19, main="PCA",
xlab=paste0("PC1: ", format(summary(model_pca_pos)$importance[2, 2] * 100, digits=3), " % variance"),
ylab=paste0("PC2: ", format(summary(model_pca_pos)$importance[2, 3] * 100, digits=3), " % variance"),
col=mzml_pheno_colors_samples_pos, bg=mzml_pheno_colors_samples_pos, cex=2)
grid()
text(model_pca_pos$x[, 1], model_pca_pos$x[,2], labels=mzml_names_pos, col=mzml_pheno_colors_samples_pos, pos=3, cex=0.5)
Chemical richness
boxplot(model_div_pos$features ~ mzml_pheno_samples_pos, col=mzml_pheno_colors_pos, names=NA, main="Number of features", xlab="treatment", ylab="number of features")
text(1:length(unique(mzml_pheno_samples_pos)), par("usr")[3]-(par("usr")[4]-par("usr")[3])/14, srt=-22.5, adj=0.5, labels=levels(mzml_pheno_samples_pos), xpd=TRUE, cex=0.9)
div_tukey <- tukey.test(response=model_div_pos$features, term=as.factor(mzml_pheno_samples_pos))
text(1:length(unique(mzml_pheno_samples_pos)), par("usr")[4]+(par("usr")[4]-par("usr")[3])/40, adj=0.5, labels=div_tukey[,1], xpd=TRUE, cex=0.8)
boxplot(model_div_pos$shannon ~ mzml_pheno_samples_pos, col=mzml_pheno_colors_pos, names=NA, main="Shannon diversity (H\')", xlab="treatment", ylab="Shannon diversity index (H\')")
text(1:length(unique(mzml_pheno_samples_pos)), par("usr")[3]-(par("usr")[4]-par("usr")[3])/14, srt=-22.5, adj=0.5, labels=levels(mzml_pheno_samples_pos), xpd=TRUE, cex=0.9)
div_tukey <- tukey.test(response=model_div_pos$shannon, term=as.factor(mzml_pheno_samples_pos))
text(1:length(unique(mzml_pheno_samples_pos)), par("usr")[4]+(par("usr")[4]-par("usr")[3])/40, adj=0.5, labels=div_tukey[,1], xpd=TRUE, cex=0.8)
boxplot(model_div_pos$pielou ~ mzml_pheno_samples_pos, col=mzml_pheno_colors_pos, names=NA, main="Pielou\'s evenness", xlab="treatment", ylab="Pielou diversity index (J)")
text(1:length(unique(mzml_pheno_samples_pos)), par("usr")[3]-(par("usr")[4]-par("usr")[3])/14, srt=-22.5, adj=0.5, labels=levels(mzml_pheno_samples_pos), xpd=TRUE, cex=0.9)
div_tukey <- tukey.test(response=model_div_pos$pielou, term=as.factor(mzml_pheno_samples_pos))
text(1:length(unique(mzml_pheno_samples_pos)), par("usr")[4]+(par("usr")[4]-par("usr")[3])/40, adj=0.5, labels=div_tukey[,1], xpd=TRUE, cex=0.8)
Selection of features using PLS
# PLS
sel_pls_pos <- f.select_features_pls(feat_matrix=feat_list_pos, sel_factor=as.factor(mzml_pheno_samples_pos), sel_colors=mzml_pheno_colors_pos, components=length(unique(mzml_pheno_samples_pos))-1, tune_length=10, quantile_threshold=0.982, plot_roc_filename=NULL)
## [1] "Number of chosen components: 3"
print(paste("Number of selected features:", f.count.selected_features(sel_feat=sel_pls_pos$`_selected_variables_`)))
## [1] "Number of selected features: 465"
f.heatmap.selected_features(feat_list=feat_list_pos, sel_feat=sel_pls_pos$`_selected_variables_`, sel_names=paste0(" ",sel_pls_pos$`_selected_variables_`), filename=NULL, main="PLS", plot_width=6, plot_height=5, cex_col=0.5, cex_row=0.4)
Are we able to annotate some of these compounds? Let’s use the SIRIUS compound identification data for that
sel_pls_pos <- f.select_features_pls(feat_matrix=comp_list_pos, sel_factor=as.factor(mzml_pheno_samples_pos), sel_colors=mzml_pheno_colors_pos, components=length(unique(mzml_pheno_samples_pos))-1, tune_length=10, quantile_threshold=0.982, plot_roc_filename=NULL)
## [1] "Number of chosen components: 3"
print(paste("Number of selected features:", f.count.selected_features(sel_feat=sel_pls_pos$`_selected_variables_`)))
## [1] "Number of selected features: 115"
sel_var_pos <- sel_pls_pos$`_selected_variables_`
for (i in 1:length(sel_var_pos)) {
if (ms1_def_pos[sel_var_pos[i], "identification"] != "") sel_var_pos[i] <- ms1_def_pos[sel_var_pos[i], "identification"]
if (nchar(sel_var_pos[i]) > 20) sel_var_pos[i] <- substr(sel_var_pos[i], 1, 20)
}
f.heatmap.selected_features(feat_list=comp_list_pos, sel_feat=sel_pls_pos$`_selected_variables_`, sel_names=sel_var_pos, filename=NULL, main="PLS", plot_width=6, plot_height=5, cex_col=0.3, cex_row=0.4)
hist(as.numeric(unlist(class_list_pos)), main="Histogram", xlab="class_list")
model_pca_pos <- prcomp(class_list_pos)
plot(model_pca_pos$x[, 1], model_pca_pos$x[, 2], pch=19, main="PCA",
xlab=paste0("PC2: ", format(summary(model_pca_pos)$importance[2, 1] * 100, digits=3), " % variance"),
ylab=paste0("PC3: ", format(summary(model_pca_pos)$importance[2, 2] * 100, digits=3), " % variance"),
col=mzml_pheno_colors_samples_pos, bg=mzml_pheno_colors_samples_pos, cex=2)
grid()
text(model_pca_pos$x[, 1], model_pca_pos$x[, 2], labels=mzml_names_pos, col=mzml_pheno_colors_samples_pos, pos=3, cex=0.5)
Selection of compound classes using PLS
# PLS
sel_pls_pos <- f.select_features_pls(feat_matrix=class_list_pos, sel_factor=mzml_pheno_samples_pos, sel_colors=mzml_pheno_colors_pos, components=length(unique(mzml_pheno_samples_pos))-1, tune_length=10, quantile_threshold=0.9, plot_roc_filename=NULL)
## [1] "Number of chosen components: 3"
print(paste("Number of selected features:", f.count.selected_features(sel_feat=sel_pls_pos$`_selected_variables_`)))
## [1] "Number of selected features: 40"
f.heatmap.selected_features(feat_list=class_list_pos, sel_feat=sel_pls_pos$`_selected_variables_`, sel_names=paste0(" ",sel_pls_pos$`_selected_variables_`), filename=NULL, main="PLS", plot_width=6, plot_height=5, cex_col=0.5, cex_row=0.4)
model_pca_pos <- prcomp(superclass_list_pos)
plot(model_pca_pos$x[, 1], model_pca_pos$x[, 2], pch=19, main="PCA",
xlab=paste0("PC2: ", format(summary(model_pca_pos)$importance[2, 1] * 100, digits=3), " % variance"),
ylab=paste0("PC3: ", format(summary(model_pca_pos)$importance[2, 2] * 100, digits=3), " % variance"),
col=mzml_pheno_colors_samples_pos, bg=mzml_pheno_colors_samples_pos, cex=2)
grid()
text(model_pca_pos$x[, 1], model_pca_pos$x[, 2], labels=mzml_names_pos, col=mzml_pheno_colors_samples_pos, pos=3, cex=0.5)
# PLS
sel_pls_pos <- f.select_features_pls(feat_matrix=superclass_list_pos, sel_factor=mzml_pheno_samples_pos, sel_colors=mzml_pheno_colors_pos, components=length(unique(mzml_pheno_samples_pos))-1, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
## [1] "Number of chosen components: 3"
print(paste("Number of selected features:", f.count.selected_features(sel_feat=sel_pls_pos$`_selected_variables_`)))
## [1] "Number of selected features: 31"
f.heatmap.selected_features(feat_list=superclass_list_pos, sel_feat=sel_pls_pos$`_selected_variables_`, filename=NULL, main="PLS", plot_width=6, plot_height=5, cex_col=0.5, cex_row=0.4)