#library(GEOquery) ## go to https://github.com/seandavi/GEOquery for installation details
#library(R.utils)
library(reshape2)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyverse)
#library(DMwR)
library(readxl)
library(MultiAssayExperiment)
library(S4Vectors)
library(SummarizedExperiment)
library(DT)
library(randomForest)
library(e1071)
library("bnstruct")
library(rminer)
library(ggpubr)
setwd("~/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/clean_up_folder/analysis")
# install.packages("VANData_1.0.0.tgz", repos=NULL,type="source")
# install.packages("VAN_1.0.0.tgz", repos=NULL,type="source")
library(R.utils)
library(reshape2)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyverse)
library(DMwR)
library(readxl)
library(magrittr)
library(tidyverse)
library(gage)
library(knitr)
library(DT)
library(janitor)
library(KEGGREST)
library(MASS)
library("AnnotationDbi")
library("org.Hs.eg.db")
library(dplyr)
library(reshape)
library(ggplot2)
library(plyr)
library(gtable)
library(grid)
library(pheatmap)
library(reshape2)
library(plotly)
library(UpSetR)
library(MatchIt)
require(pathview)
library(STRINGdb)
## Kegg sets data
require(gage)
require(gageData)
library(clusterProfiler)
library(enrichplot)
kegg = kegg.gsets(species = "hsa", id.type = "kegg")
kegg.sets.hsa = kegg$kg.sets
library(ggrepel)
this file is to get supp figures for the paper:
using the v5 data object with updated sample info
supp figure S2: same as figure2 but use DCM instead, notice the change of donors here: using the designed donors for dcm comparison due to: DCM-DM is a much rarer heart sample. It’s a genetic disease without ischemia but also with diabetes because of that we have few patients and a couple of them are young; 22 and 31 I think. To account for those younger ages, some younger Donor samples were added to the 47yo+ donor group.
supp figure S3: same as figure2 but use both IHD and DCM to compare with donors: need to check those donors with Ben in email
supp figure S1: some MDS plots with all five groups (ihd-dm, ihd-no dm, dcm-dm, dcm-no dm, donors) and upset plots for those 4 comparisons above (ihd-dm, ihd-no dm, dcm-dm, dcm-no dm compare to corresponding donors in those above analysis), notice those are for multi-omics
supp figure S4: check the layout picture Ben sent, but mainly compare male and female donors
load("all_omics_lv_v6.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
#changed 20220224: no overall filtering
protein_expression2=protein_expression1
dim(protein_expression2)
## [1] 9037 109
## [1] 4077 109
infodt1 = colData(all_omics)
infodt1$Age=as.numeric(infodt1$Age)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="Yes",1,ifelse(infodt1$Condition=="Donor",0,2))))
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 29 14
## [1] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [4] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [7] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [10] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [13] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [16] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [19] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [22] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [25] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [28] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 29
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=protein_expression2[,union(grep("_DCM_Yes",colnames(protein_expression2)),grep("Donor_No",colnames(protein_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 9037 29
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4051 29
diabetes=grep("Yes",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "31_LV_DCM_Yes_22_F" "33_LV_DCM_Yes_53_M" "37_LV_DCM_Yes_59_M"
## [4] "32_LV_DCM_Yes_31_F" "35_LV_DCM_Yes_56_M" "39_LV_DCM_Yes_63_F"
## [7] "34_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M" "38_LV_DCM_Yes_61_F"
## [10] "110_LV_Donor_No_65_F" "101_LV_Donor_No_54_M" "91_LV_Donor_No_23_F"
## [13] "94_LV_Donor_No_47_M" "98_LV_Donor_No_51_F" "105_LV_Donor_No_56_M"
## [16] "90_LV_Donor_No_21_M" "100_LV_Donor_No_53_F" "109_LV_Donor_No_65_M"
## [19] "99_LV_Donor_No_53_M" "93_LV_Donor_No_25_F" "97_LV_Donor_No_49_F"
## [22] "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F" "102_LV_Donor_No_54_F"
## [25] "96_LV_Donor_No_48_M" "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F"
## [28] "107_LV_Donor_No_62_M" "104_LV_Donor_No_55_F"
## [1] 2.175502
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 4051 10
## [1] 4037 10
# g1=ggplot2::ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
# geom_point(aes(x=logFC,y=neglogP_value,color=colorcode)) +
# xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-DM vs Donor") +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1) +
# geom_hline(yintercept = adj_to_p(df1), linetype = "dashed", colour = "red")+scale_color_manual(values = c("blue","gray","red"))
#
# datatable(df1)
# #ggsave(filename="plot1.pdf",g1)
#write.csv(df1,file="dcm_dm vs donor protein.csv")
g1=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>2.5|-logFC>2.5|neglogP_value>6)))
set1=df1[which(df1$P.Value<0.05),"name"]
set1_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set1_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
# #pathway analysis
# set.seed(202204)
# experiment_data_imputation=knn.impute(t(as.matrix(experiment_data_noimputation)),3)
# experiment_data_pathway=as.data.frame(t(experiment_data_imputation))
# dim(experiment_data_pathway)
#
# ##With voom for finding pathways
# groupname<-as.factor(experiment_data_noimputation1$y)
# design <- model.matrix(~ groupname + 0)
# log2.cpm <- voom(experiment_data_pathway, design, normalize.method = "quantile")
# fit <- lmFit(log2.cpm, design)
# cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
# fit2 <- contrasts.fit(fit, cont.matrix)
# fit2 <- eBayes(fit2)
# limma.res=topTable(fit2,n=Inf,sort="p", adjust.method = "fdr")
# fc_new <- limma.res$logFC
# limma.res$entrez = mapIds(org.Hs.eg.db,
# keys=row.names(limma.res),
# column="ENTREZID",
# keytype=keytypes(org.Hs.eg.db)[2],
# multiVals="first")
# names(fc_new) <- limma.res$entrez
#
# kegg_gene_list<-na.omit(fc_new)
# kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)
# kk2 <- gseKEGG(geneList = kegg_gene_list,
# organism = "hsa",
# nPerm = 10000,
# minGSSize = 3,
# maxGSSize = 800,
# pvalueCutoff = 0.05,
# pAdjustMethod = "BH",
# keyType = "ncbi-geneid")
# datatable(as.data.frame(kk2))
# clusterProfiler::dotplot(kk2, showCategory=10, split=".sign") + facet_grid(.~.sign)
load("all_omics_lv_v6.RData")
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="Yes",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 29 14
## [1] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [4] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [7] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [10] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [13] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [16] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [19] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [22] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [25] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [28] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
metabolite_expression1=metabolite_expression1[,colnames(metabolite_expression1)%in%indexx]
dim(metabolite_expression1)
## [1] 155 29
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=metabolite_expression1[,union(grep("_DCM_Yes",colnames(metabolite_expression1)),grep("Donor_No",colnames(metabolite_expression1)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 155 29
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 154 29
diabetes=grep("Yes",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [4] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [7] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [10] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [13] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [16] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [19] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [22] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [25] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [28] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
## [1] 2.792392
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 154 10
## [1] 154 10
g2=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>0.5|-logFC>0.5|neglogP_value>2)))
datatable(df1)
#write.csv(df1,file="dcm_dm vs donor metabolite.csv")
set2=df1[which(df1$P.Value<0.05),"name"]
set2_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set2_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
# #pathway analysis: name mapping to change the metabolites names
# nameref=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/BH 2020 scMerge_impute AMIDE and HILIC 14.12.21_Normalised.xlsx",sheet = 4)
# sum(rownames(df1) %in% nameref$Metabolite)
#
# df1_new=df1[which(rownames(df1) %in% nameref$Metabolite),]
# df1_new$name=rownames(df1_new)
# df1_new$name[as.numeric(na.omit(match(nameref$Metabolite,df1_new$name,nomatch = NA)))] <- nameref$`KEGG ID`[as.numeric(na.omit(match(df1_new$name,nameref$Metabolite,nomatch = NA)))]
# dim(df1_new)
# df2=df1_new[!is.na(df1_new$name),] #not matching are noted as "-" so that i didnt delete anything
# dim(df2)
# fc=df2$logFC
# names(fc)=df2$name
# fc=sort(fc,decreasing = TRUE)
#
# #name mapping for pathways
# meta_pathway_name=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/2020 Human Heart Metabolomic functionally annotated metabolites.xlsx")
# meta_pathway_name=meta_pathway_name[,c("KEGG pathway","KEGG")]
# sum(meta_pathway_name$KEGG%in% names(fc))
#
# library(clusterProfiler)
# kk<-GSEA(fc, TERM2GENE=meta_pathway_name,pvalueCutoff = 1,seed = 12,minGSSize=2,pAdjustMethod = "BH")
# datatable(as.data.frame(kk))
# clusterProfiler::dotplot(kk, showCategory=10, split=".sign") + facet_grid(.~.sign)
# kk2<-GSEA(fc, TERM2GENE=meta_pathway_name,pvalueCutoff = 1,seed = 12,minGSSize=2,pAdjustMethod = "none")
# datatable(as.data.frame(kk2))
# clusterProfiler::dotplot(kk2, showCategory=10, split=".sign") + facet_grid(.~.sign)
# #pAdjustMethod one of “holm”, “hochberg”, “hommel”, “bonferroni”, “BH”, “BY”, “fdr”, “none”
# kk3<-GSEA(fc, TERM2GENE=meta_pathway_name,pvalueCutoff = 1,seed = 12,minGSSize=2,pAdjustMethod = "fdr")
# datatable(as.data.frame(kk3))
# clusterProfiler::dotplot(kk3, showCategory=10, split=".sign") + facet_grid(.~.sign)
load("all_omics_lv_v6.RData")
lipid_expression=assay(all_omics@ExperimentList$lipid, "log2")
lipid_expression1=as.data.frame(lipid_expression)
lipid_expression1=lipid_expression1[-which(rownames(lipid_expression1)%in% c('Total PC', 'TOTAL TG','TOTAL SM','TOTAL PI','TOTAL PG','TOTAL LPC', 'TOTAL LPE', 'TOTAL HC' ,'TOTAL DG','TOTAL CL', 'TOTAL ACCA', 'TOTAL PE','TOTAL CER')),]
dim(lipid_expression1)
## [1] 341 105
## [1] 341 105
## [1] 341 105
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="Yes",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 29 14
## [1] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [4] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [7] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [10] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [13] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [16] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [19] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [22] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [25] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [28] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
lipid_expression2=lipid_expression2[,colnames(lipid_expression2)%in%indexx]
dim(lipid_expression2)
## [1] 341 28
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=lipid_expression2[,union(grep("_DCM_Yes",colnames(lipid_expression2)),grep("Donor_No",colnames(lipid_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 341 28
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 341 28
diabetes=grep("Yes",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [4] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [7] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [10] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [13] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [16] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [19] "99_LV_Donor_No_53_M" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [22] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [25] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [28] "110_LV_Donor_No_65_F"
## [1] 3.835056
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 341 10
## [1] 341 10
g3=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>1|-logFC>2.5|neglogP_value>1.8)))
datatable(df1)
load("all_omics_lv_v6.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
#changed 20220224: no overall filtering
protein_expression2=protein_expression1
dim(protein_expression2)
## [1] 9037 109
## [1] 4077 109
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="No",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 38 14
## [1] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [4] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [7] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [10] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [13] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [16] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [19] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [22] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [25] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [28] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [31] "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [34] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [37] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 36
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=protein_expression2[,union(grep("_DCM_No",colnames(protein_expression2)),grep("Donor_No",colnames(protein_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 9037 36
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4037 36
diabetes=grep("DCM",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "50_LV_DCM_No_55_M" "46_LV_DCM_No_53_F" "49_LV_DCM_No_54_M"
## [4] "55_LV_DCM_No_62_M" "44_LV_DCM_No_50_F" "40_LV_DCM_No_43_F"
## [7] "41_LV_DCM_No_43_M" "54_LV_DCM_No_60_F" "52_LV_DCM_No_58_F"
## [10] "48_LV_DCM_No_54_M" "45_LV_DCM_No_51_M" "47_LV_DCM_No_53_M"
## [13] "53_LV_DCM_No_58_M" "56_LV_DCM_No_52_M" "43_LV_DCM_No_49_M"
## [16] "42_LV_DCM_No_45_M" "110_LV_Donor_No_65_F" "101_LV_Donor_No_54_M"
## [19] "91_LV_Donor_No_23_F" "94_LV_Donor_No_47_M" "98_LV_Donor_No_51_F"
## [22] "105_LV_Donor_No_56_M" "90_LV_Donor_No_21_M" "100_LV_Donor_No_53_F"
## [25] "109_LV_Donor_No_65_M" "99_LV_Donor_No_53_M" "93_LV_Donor_No_25_F"
## [28] "97_LV_Donor_No_49_F" "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F"
## [31] "102_LV_Donor_No_54_F" "96_LV_Donor_No_48_M" "106_LV_Donor_No_60_M"
## [34] "95_LV_Donor_No_47_F" "107_LV_Donor_No_62_M" "104_LV_Donor_No_55_F"
## [1] 2.138558
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 4037 10
## [1] 4035 10
g4=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-no T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>2.2|-logFC>2.5|neglogP_value>7.5)))
datatable(df1)
#write.csv(df1,file="dcm_nodm vs donor protein.csv")
set4=df1[which(df1$P.Value<0.05),"name"]
set4_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set4_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
# #pathway analysis
# set.seed(202204)
# experiment_data_imputation=knn.impute(t(as.matrix(experiment_data_noimputation)),3)
# experiment_data_pathway=as.data.frame(t(experiment_data_imputation))
# dim(experiment_data_pathway)
#
# ##With voom for finding pathways
# groupname<-as.factor(experiment_data_noimputation1$y)
# design <- model.matrix(~ groupname + 0)
# log2.cpm <- voom(experiment_data_pathway, design, normalize.method = "quantile")
# fit <- lmFit(log2.cpm, design)
# cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
# fit2 <- contrasts.fit(fit, cont.matrix)
# fit2 <- eBayes(fit2)
# limma.res=topTable(fit2,n=Inf,sort="p", adjust.method = "fdr")
# fc_new <- limma.res$logFC
# limma.res$entrez = mapIds(org.Hs.eg.db,
# keys=row.names(limma.res),
# column="ENTREZID",
# keytype=keytypes(org.Hs.eg.db)[2],
# multiVals="first")
# names(fc_new) <- limma.res$entrez
#
# kegg_gene_list<-na.omit(fc_new)
# kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)
# kk2 <- gseKEGG(geneList = kegg_gene_list,
# organism = "hsa",
# nPerm = 10000,
# minGSSize = 3,
# maxGSSize = 800,
# pvalueCutoff = 0.05,
# pAdjustMethod = "BH",
# keyType = "ncbi-geneid")
# datatable(as.data.frame(kk2))
# clusterProfiler::dotplot(kk2, showCategory=10, split=".sign") + facet_grid(.~.sign)
load("all_omics_lv_v6.RData")
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="No",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 38 14
## [1] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [4] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [7] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [10] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [13] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [16] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [19] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [22] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [25] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [28] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [31] "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [34] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [37] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
metabolite_expression1=metabolite_expression1[,colnames(metabolite_expression1)%in%indexx]
dim(metabolite_expression1)
## [1] 155 38
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=metabolite_expression1[,union(grep("_DCM_No",colnames(metabolite_expression1)),grep("Donor_No",colnames(metabolite_expression1)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 155 38
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 154 38
diabetes=grep("DCM",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [4] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [7] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [10] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [13] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [16] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "103_LV_DCM_No_44_M"
## [19] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [22] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [25] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [28] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [31] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [34] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [37] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
## [1] 2.41218
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 154 10
## [1] 154 10
g5=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-no T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>0.5|-logFC>0.5|neglogP_value>2)))
datatable(df1)
# write.csv(df1,file="dcm_nodm vs donor metabolite.csv")
set5=df1[which(df1$P.Value<0.05),"name"]
set5_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set5_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
# #pathway analysis: name mapping to change the metabolites names
# nameref=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/BH 2020 scMerge_impute AMIDE and HILIC 14.12.21_Normalised.xlsx",sheet = 4)
# sum(rownames(df1) %in% nameref$Metabolite)
#
# df1_new=df1[which(rownames(df1) %in% nameref$Metabolite),]
# df1_new$name=rownames(df1_new)
# df1_new$name[as.numeric(na.omit(match(nameref$Metabolite,df1_new$name,nomatch = NA)))] <- nameref$`KEGG ID`[as.numeric(na.omit(match(df1_new$name,nameref$Metabolite,nomatch = NA)))]
# dim(df1_new)
# df2=df1_new[!is.na(df1_new$name),]
# dim(df2)
# fc=df2$logFC
# names(fc)=df2$name
# fc=sort(fc,decreasing = TRUE)
#
# #name mapping for pathways
# meta_pathway_name=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/2020 Human Heart Metabolomic functionally annotated metabolites.xlsx")
# meta_pathway_name=meta_pathway_name[,c("KEGG pathway","KEGG")]
# sum(meta_pathway_name$KEGG%in% names(fc))
#
# library(clusterProfiler)
# kk<-GSEA(fc, TERM2GENE=meta_pathway_name,pvalueCutoff = 1,seed = 12,minGSSize=2,pAdjustMethod = "BH")
# datatable(as.data.frame(kk))
# clusterProfiler::dotplot(kk, showCategory=10, split=".sign") + facet_grid(.~.sign)
# kk2<-GSEA(fc, TERM2GENE=meta_pathway_name,pvalueCutoff = 1,seed = 12,minGSSize=2,pAdjustMethod = "none")
# datatable(as.data.frame(kk2))
# clusterProfiler::dotplot(kk2, showCategory=10, split=".sign") + facet_grid(.~.sign)
# kk3<-GSEA(fc, TERM2GENE=meta_pathway_name,pvalueCutoff = 1,seed = 12,minGSSize=2,pAdjustMethod = "fdr")
# datatable(as.data.frame(kk3))
# clusterProfiler::dotplot(kk3, showCategory=10, split=".sign") + facet_grid(.~.sign)
load("all_omics_lv_v6.RData")
lipid_expression=assay(all_omics@ExperimentList$lipid, "log2")
lipid_expression1=as.data.frame(lipid_expression)
lipid_expression1=lipid_expression1[-which(rownames(lipid_expression1)%in% c('Total PC', 'TOTAL TG','TOTAL SM','TOTAL PI','TOTAL PG','TOTAL LPC', 'TOTAL LPE', 'TOTAL HC' ,'TOTAL DG','TOTAL CL', 'TOTAL ACCA', 'TOTAL PE','TOTAL CER')),]
dim(lipid_expression1)
## [1] 341 105
## [1] 341 105
## [1] 341 105
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="No",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 38 14
## [1] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [4] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [7] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [10] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [13] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [16] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [19] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [22] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [25] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [28] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [31] "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [34] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [37] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
lipid_expression2=lipid_expression2[,colnames(lipid_expression2)%in%indexx]
dim(lipid_expression2)
## [1] 341 36
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=lipid_expression2[,union(grep("_DCM_No",colnames(lipid_expression2)),grep("Donor_No",colnames(lipid_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 341 36
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 341 36
diabetes=grep("DCM",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [4] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [7] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [10] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [13] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [16] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [19] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [22] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [25] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [28] "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F"
## [31] "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M"
## [34] "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
## [1] 3.835056
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 341 10
## [1] 341 10
g6=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-no T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>1|-logFC>1|neglogP_value>2)))
datatable(df1)
#write.csv(df1,file="dcm_nodm vs donor lipid 2303.csv")
set6=df1[which(df1$P.Value<0.05),"name"]
set6_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set6_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
#ggsave(filename="dcm_nodm vs donor lipid2303.pdf",g6)
library(RColorBrewer)
myCol <- brewer.pal(3, "Pastel2")[c(1, 2)]
library(VennDiagram)
# Chart
v1=venn.diagram(
x = list(set1_restricted, set4_restricted),
category.names = c("DCM-T2DM" , "DCM-NO T2DM" ),
cat.cex = 2,
cat.default.pos = "text",
filename = NULL,
output=TRUE,
# Output features
height = 14 ,
width = 15 ,
resolution = 300,
compression = "lzw",
# Circles
lwd = 2,
lty = 'blank',
fill = myCol,
# Numbers
cex = 2,
fontface = "bold",
fontfamily = "sans"
)
display_venn <- function(x, ...){
library(VennDiagram)
grid.newpage()
venn_object <- venn.diagram(x, filename = NULL, ...)
grid.draw(venn_object)
}
display_venn(x = list(set1_restricted, set4_restricted),
category.names = c("DCM-T2DM" , "DCM-NO T2DM" ),fill = myCol,lwd = 0,
lty = 'blank')
# pdf(file="s2protein_venn.pdf")
# grid.draw(v1)
# dev.off()
v2=venn.diagram(
x = list(set2_restricted, set5_restricted),
category.names = c("DCM-T2DM" , "DCM-NO T2DM" ),
cat.cex = 2,
cat.default.pos = "text",
filename = NULL,
output=TRUE,
# Output features
height = 14 ,
width = 15 ,
resolution = 300,
compression = "lzw",
# Circles
lwd = 2,
lty = 'blank',
fill = myCol,
# Numbers
cex = 2,
fontface = "bold",
fontfamily = "sans"
)
# pdf(file="s2meta_venn.pdf")
# grid.draw(v2)
# dev.off()
display_venn(x = list(set2_restricted, set5_restricted),
category.names = c("DCM-T2DM" , "DCM-NO T2DM" ),fill = myCol,lwd = 0,
lty = 'blank')
set2_restricted
set5_restricted
v3=venn.diagram(
x = list(set3, set6),
category.names = c("DCM-T2DM" , "DCM-NO T2DM" ),
cat.cex = 2,
cat.default.pos = "text",
filename = NULL,
output=TRUE,
# Output features
height = 14 ,
width = 15 ,
resolution = 300,
compression = "lzw",
# Circles
lwd = 2,
lty = 'blank',
fill = myCol,
# Numbers
cex = 2,
fontface = "bold",
fontfamily = "sans"
)
# pdf(file="supp3lipid_venn2303.pdf")
# grid.draw(v3)
# dev.off()
display_venn(x = list(set3, set6),
category.names = c("DCM-T2DM" , "DCM-NO T2DM" ),fill = myCol,lwd = 0,
lty = 'blank')
load("all_omics_lv_v6.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
#changed 20220224: no overall filtering
protein_expression2=protein_expression1
dim(protein_expression2)
## [1] 9037 109
## [1] 4077 109
infodt1 = colData(all_omics)
infodt1$Age=as.numeric(infodt1$Age)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD")&infodt1$Diabetes=="Yes",1,ifelse(infodt1$Condition=="Donor",0,2))))
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 43 14
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "31_LV_DCM_Yes_22_F"
## [16] "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M" "34_LV_DCM_Yes_56_M"
## [19] "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M" "37_LV_DCM_Yes_59_M"
## [22] "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F" "90_LV_Donor_No_21_M"
## [25] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [28] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [31] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [34] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [37] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [40] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [43] "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 43
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=protein_expression2[,union(grep("_Yes",colnames(protein_expression2)),grep("Donor_No",colnames(protein_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 9037 43
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4117 43
diabetes=grep("Yes",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "31_LV_DCM_Yes_22_F" "2_LV_IHD_Yes_45_M" "33_LV_DCM_Yes_53_M"
## [4] "3_LV_IHD_Yes_46_F" "4_LV_IHD_Yes_49_M" "6_LV_IHD_Yes_51_M"
## [7] "37_LV_DCM_Yes_59_M" "5_LV_IHD_Yes_50_M" "32_LV_DCM_Yes_31_F"
## [10] "35_LV_DCM_Yes_56_M" "7_LV_IHD_Yes_51_M" "14_LV_IHD_Yes_65_M"
## [13] "39_LV_DCM_Yes_63_F" "9_LV_IHD_Yes_55_M" "12_LV_IHD_Yes_59_M"
## [16] "34_LV_DCM_Yes_56_M" "13_LV_IHD_Yes_59_M" "36_LV_DCM_Yes_58_M"
## [19] "10_LV_IHD_Yes_56_M" "38_LV_DCM_Yes_61_F" "11_LV_IHD_Yes_56_M"
## [22] "8_LV_IHD_Yes_53_M" "1_LV_IHD_Yes_41_M" "110_LV_Donor_No_65_F"
## [25] "101_LV_Donor_No_54_M" "91_LV_Donor_No_23_F" "94_LV_Donor_No_47_M"
## [28] "98_LV_Donor_No_51_F" "105_LV_Donor_No_56_M" "90_LV_Donor_No_21_M"
## [31] "100_LV_Donor_No_53_F" "109_LV_Donor_No_65_M" "99_LV_Donor_No_53_M"
## [34] "93_LV_Donor_No_25_F" "97_LV_Donor_No_49_F" "92_LV_Donor_No_23_M"
## [37] "108_LV_Donor_No_62_F" "102_LV_Donor_No_54_F" "96_LV_Donor_No_48_M"
## [40] "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F" "107_LV_Donor_No_62_M"
## [43] "104_LV_Donor_No_55_F"
## [1] 1.778046
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 4117 10
## [1] 4108 10
# g1=ggplot2::ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
# geom_point(aes(x=logFC,y=neglogP_value,color=colorcode)) +
# xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-DM vs Donor") +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1) +
# geom_hline(yintercept = adj_to_p(df1), linetype = "dashed", colour = "red")+scale_color_manual(values = c("blue","gray","red"))
#
# datatable(df1)
# #ggsave(filename="plot1.pdf",g1)
g1=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("HF-T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>2.5|-logFC>2.5|neglogP_value>8)))
#write.csv(df1,file="hf_dm vs donor protein.csv")
set1=df1[which(df1$P.Value<0.05),"name"]
set1_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set1_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
load("all_omics_lv_v6.RData")
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD")&infodt1$Diabetes=="Yes",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 43 14
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "31_LV_DCM_Yes_22_F"
## [16] "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M" "34_LV_DCM_Yes_56_M"
## [19] "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M" "37_LV_DCM_Yes_59_M"
## [22] "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F" "90_LV_Donor_No_21_M"
## [25] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [28] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [31] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [34] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [37] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [40] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [43] "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
metabolite_expression1=metabolite_expression1[,colnames(metabolite_expression1)%in%indexx]
dim(metabolite_expression1)
## [1] 155 43
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=metabolite_expression1[,union(grep("_Yes",colnames(metabolite_expression1)),grep("Donor_No",colnames(metabolite_expression1)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 155 43
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 154 43
diabetes=grep("Yes",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "31_LV_DCM_Yes_22_F"
## [16] "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M" "34_LV_DCM_Yes_56_M"
## [19] "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M" "37_LV_DCM_Yes_59_M"
## [22] "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F" "90_LV_Donor_No_21_M"
## [25] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [28] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [31] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [34] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [37] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [40] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [43] "110_LV_Donor_No_65_F"
## [1] 2.169142
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 154 10
## [1] 154 10
g2=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("HF-T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>0.5|-logFC>0.5|neglogP_value>2)))
datatable(df1)
#write.csv(df1,file="hf_dm vs donor metabolite.csv")
set2=df1[which(df1$P.Value<0.05),"name"]
set2_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set2_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
load("all_omics_lv_v6.RData")
lipid_expression=assay(all_omics@ExperimentList$lipid, "log2")
lipid_expression1=as.data.frame(lipid_expression)
lipid_expression1=lipid_expression1[-which(rownames(lipid_expression1)%in% c('Total PC', 'TOTAL TG','TOTAL SM','TOTAL PI','TOTAL PG','TOTAL LPC', 'TOTAL LPE', 'TOTAL HC' ,'TOTAL DG','TOTAL CL', 'TOTAL ACCA', 'TOTAL PE','TOTAL CER')),]
dim(lipid_expression1)
## [1] 341 105
## [1] 341 105
## [1] 341 105
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD")&infodt1$Diabetes=="Yes",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 43 14
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "31_LV_DCM_Yes_22_F"
## [16] "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M" "34_LV_DCM_Yes_56_M"
## [19] "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M" "37_LV_DCM_Yes_59_M"
## [22] "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F" "90_LV_Donor_No_21_M"
## [25] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [28] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [31] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [34] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [37] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [40] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [43] "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
lipid_expression2=lipid_expression2[,colnames(lipid_expression2)%in%indexx]
dim(lipid_expression2)
## [1] 341 42
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=lipid_expression2[,union(grep("_Yes",colnames(lipid_expression2)),grep("Donor_No",colnames(lipid_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 341 42
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 341 42
diabetes=grep("Yes",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "31_LV_DCM_Yes_22_F"
## [16] "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M" "34_LV_DCM_Yes_56_M"
## [19] "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M" "37_LV_DCM_Yes_59_M"
## [22] "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F" "90_LV_Donor_No_21_M"
## [25] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [28] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [31] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [34] "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F"
## [37] "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M"
## [40] "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
## [1] 2.658965
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 341 10
## [1] 341 10
g3=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("HF-T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>1|-logFC>2.5|neglogP_value>1.8)))
datatable(df1)
load("all_omics_lv_v6.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
#changed 20220224: no overall filtering
protein_expression2=protein_expression1
dim(protein_expression2)
## [1] 9037 109
## [1] 4077 109
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD")&infodt1$Diabetes=="No",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 54 14
## [1] "15_LV_IHD_No_41_M" "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F"
## [4] "18_LV_IHD_No_45_M" "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F"
## [7] "21_LV_IHD_No_49_F" "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M"
## [10] "24_LV_IHD_No_54_M" "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F"
## [13] "27_LV_IHD_No_62_F" "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M"
## [16] "30_LV_IHD_No_66_M" "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M"
## [19] "42_LV_DCM_No_45_M" "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F"
## [22] "45_LV_DCM_No_51_M" "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M"
## [25] "48_LV_DCM_No_54_M" "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M"
## [28] "51_LV_DCM_No_56_M" "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M"
## [31] "54_LV_DCM_No_60_F" "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M"
## [34] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [37] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [40] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [43] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [46] "102_LV_Donor_No_54_F" "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F"
## [49] "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M"
## [52] "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 52
## [1] 9037 52
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4045 52
diabetes=grep("Donor",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,0,1)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "110_LV_Donor_No_65_F" "50_LV_DCM_No_55_M" "17_LV_IHD_No_43_F"
## [4] "26_LV_IHD_No_54_F" "27_LV_IHD_No_62_F" "46_LV_DCM_No_53_F"
## [7] "49_LV_DCM_No_54_M" "55_LV_DCM_No_62_M" "24_LV_IHD_No_54_M"
## [10] "44_LV_DCM_No_50_F" "101_LV_Donor_No_54_M" "16_LV_IHD_No_42_F"
## [13] "22_LV_IHD_No_50_M" "40_LV_DCM_No_43_F" "21_LV_IHD_No_49_F"
## [16] "41_LV_DCM_No_43_M" "91_LV_Donor_No_23_F" "94_LV_Donor_No_47_M"
## [19] "98_LV_Donor_No_51_F" "54_LV_DCM_No_60_F" "52_LV_DCM_No_58_F"
## [22] "19_LV_IHD_No_47_F" "105_LV_Donor_No_56_M" "90_LV_Donor_No_21_M"
## [25] "20_LV_IHD_No_48_F" "48_LV_DCM_No_54_M" "100_LV_Donor_No_53_F"
## [28] "109_LV_Donor_No_65_M" "45_LV_DCM_No_51_M" "47_LV_DCM_No_53_M"
## [31] "23_LV_IHD_No_50_M" "28_LV_IHD_No_62_M" "99_LV_Donor_No_53_M"
## [34] "53_LV_DCM_No_58_M" "15_LV_IHD_No_41_M" "29_LV_IHD_No_62_M"
## [37] "18_LV_IHD_No_45_M" "93_LV_Donor_No_25_F" "56_LV_DCM_No_52_M"
## [40] "97_LV_Donor_No_49_F" "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F"
## [43] "102_LV_Donor_No_54_F" "96_LV_Donor_No_48_M" "30_LV_IHD_No_66_M"
## [46] "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F" "107_LV_Donor_No_62_M"
## [49] "25_LV_IHD_No_54_M" "104_LV_Donor_No_55_F" "43_LV_DCM_No_49_M"
## [52] "42_LV_DCM_No_45_M"
## [1] 2.22772
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 4045 10
## [1] 4042 10
g4=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("HF-no T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>2|-logFC>2|neglogP_value>7)))
datatable(df1)
#write.csv(df1,file="hf_nodm vs donor protein.csv")
set4=df1[which(df1$P.Value<0.05),"name"]
set4_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set4_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
load("all_omics_lv_v6.RData")
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD")&infodt1$Diabetes=="No",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 54 14
## [1] "15_LV_IHD_No_41_M" "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F"
## [4] "18_LV_IHD_No_45_M" "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F"
## [7] "21_LV_IHD_No_49_F" "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M"
## [10] "24_LV_IHD_No_54_M" "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F"
## [13] "27_LV_IHD_No_62_F" "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M"
## [16] "30_LV_IHD_No_66_M" "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M"
## [19] "42_LV_DCM_No_45_M" "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F"
## [22] "45_LV_DCM_No_51_M" "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M"
## [25] "48_LV_DCM_No_54_M" "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M"
## [28] "51_LV_DCM_No_56_M" "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M"
## [31] "54_LV_DCM_No_60_F" "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M"
## [34] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [37] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [40] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [43] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [46] "102_LV_Donor_No_54_F" "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F"
## [49] "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M"
## [52] "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
metabolite_expression1=metabolite_expression1[,colnames(metabolite_expression1)%in%indexx]
dim(metabolite_expression1)
## [1] 155 54
## [1] 155 54
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 154 54
diabetes=grep("Donor",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,0,1)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "15_LV_IHD_No_41_M" "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F"
## [4] "18_LV_IHD_No_45_M" "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F"
## [7] "21_LV_IHD_No_49_F" "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M"
## [10] "24_LV_IHD_No_54_M" "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F"
## [13] "27_LV_IHD_No_62_F" "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M"
## [16] "30_LV_IHD_No_66_M" "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M"
## [19] "42_LV_DCM_No_45_M" "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F"
## [22] "45_LV_DCM_No_51_M" "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M"
## [25] "48_LV_DCM_No_54_M" "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M"
## [28] "51_LV_DCM_No_56_M" "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M"
## [31] "54_LV_DCM_No_60_F" "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M"
## [34] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [37] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [40] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [43] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [46] "102_LV_Donor_No_54_F" "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F"
## [49] "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M"
## [52] "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
## [1] 2.028964
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 154 10
## [1] 154 10
g5=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("HF-no T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>0.5|-logFC>0.5|neglogP_value>3)))
datatable(df1)
#write.csv(df1,file="hf_nodm vs donor metabolite.csv")
set5=df1[which(df1$P.Value<0.05),"name"]
set5_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set5_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
load("all_omics_lv_v6.RData")
lipid_expression=assay(all_omics@ExperimentList$lipid, "log2")
lipid_expression1=as.data.frame(lipid_expression)
lipid_expression1=lipid_expression1[-which(rownames(lipid_expression1)%in% c('Total PC', 'TOTAL TG','TOTAL SM','TOTAL PI','TOTAL PG','TOTAL LPC', 'TOTAL LPE', 'TOTAL HC' ,'TOTAL DG','TOTAL CL', 'TOTAL ACCA', 'TOTAL PE','TOTAL CER')),]
dim(lipid_expression1)
## [1] 341 105
## [1] 341 105
## [1] 341 105
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD")&infodt1$Diabetes=="No",1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 54 14
## [1] "15_LV_IHD_No_41_M" "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F"
## [4] "18_LV_IHD_No_45_M" "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F"
## [7] "21_LV_IHD_No_49_F" "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M"
## [10] "24_LV_IHD_No_54_M" "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F"
## [13] "27_LV_IHD_No_62_F" "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M"
## [16] "30_LV_IHD_No_66_M" "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M"
## [19] "42_LV_DCM_No_45_M" "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F"
## [22] "45_LV_DCM_No_51_M" "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M"
## [25] "48_LV_DCM_No_54_M" "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M"
## [28] "51_LV_DCM_No_56_M" "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M"
## [31] "54_LV_DCM_No_60_F" "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M"
## [34] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [37] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [40] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [43] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [46] "102_LV_Donor_No_54_F" "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F"
## [49] "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M"
## [52] "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
lipid_expression2=lipid_expression2[,colnames(lipid_expression2)%in%indexx]
dim(lipid_expression2)
## [1] 341 52
## [1] 341 52
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 341 52
diabetes=grep("Donor",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,0,1)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "15_LV_IHD_No_41_M" "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F"
## [4] "18_LV_IHD_No_45_M" "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F"
## [7] "21_LV_IHD_No_49_F" "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M"
## [10] "24_LV_IHD_No_54_M" "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F"
## [13] "27_LV_IHD_No_62_F" "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M"
## [16] "30_LV_IHD_No_66_M" "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M"
## [19] "42_LV_DCM_No_45_M" "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F"
## [22] "45_LV_DCM_No_51_M" "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M"
## [25] "48_LV_DCM_No_54_M" "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M"
## [28] "51_LV_DCM_No_56_M" "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M"
## [31] "54_LV_DCM_No_60_F" "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M"
## [34] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [37] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [40] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [43] "99_LV_Donor_No_53_M" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [46] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [49] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [52] "110_LV_Donor_No_65_F"
## [1] 2.437116
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 341 10
## [1] 341 10
g6=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("HF-no T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>1.2|-logFC>1.2|neglogP_value>2)))
datatable(df1)
#write.csv(df1,file="hf_nodm vs donor lipid2303.csv")
set6=df1[which(df1$P.Value<0.05),"name"]
set6_relaxed=df1[which(abs(df1$logFC)>1),"name"]
set6_restricted=df1[which(df1$adj.P.Val<0.05),"name"]
#ggsave(filename="hf_nodm vs donor lipid2303.pdf",g6)
library(RColorBrewer)
myCol <- brewer.pal(3, "Pastel2")[c(1, 2)]
library(VennDiagram)
# Chart
v1=venn.diagram(
x = list(set1_restricted, set4_restricted),
category.names = c("HF-T2DM" , "HF-NO T2DM" ),
cat.cex = 2,
cat.default.pos = "text",
filename = NULL,
output=TRUE,
# Output features
height = 14 ,
width = 15 ,
resolution = 300,
compression = "lzw",
# Circles
lwd = 2,
lty = 'blank',
fill = myCol,
# Numbers
cex = 2,
fontface = "bold",
fontfamily = "sans"
)
display_venn <- function(x, ...){
library(VennDiagram)
grid.newpage()
venn_object <- venn.diagram(x, filename = NULL, ...)
grid.draw(venn_object)
}
display_venn(x = list(set1_restricted, set4_restricted),
category.names = c("HF-T2DM" , "HF-NO T2DM" ),fill = myCol,lwd = 0,
lty = 'blank')
# pdf(file="s3protein_venn.pdf")
# grid.draw(v1)
# dev.off()
v2=venn.diagram(
x = list(set2_restricted, set5_restricted),
category.names = c("HF-T2DM" , "HF-NO T2DM" ),
cat.cex = 2,
cat.default.pos = "text",
filename = NULL,
output=TRUE,
# Output features
height = 14 ,
width = 15 ,
resolution = 300,
compression = "lzw",
# Circles
lwd = 2,
lty = 'blank',
fill = myCol,
# Numbers
cex = 2,
fontface = "bold",
fontfamily = "sans"
)
# pdf(file="s3meta_venn.pdf")
# grid.draw(v2)
# dev.off()
display_venn(x = list(set2_restricted, set5_restricted),
category.names = c("HF-T2DM" , "HF-NO T2DM" ),fill = myCol,lwd = 0,
lty = 'blank')
set2_restricted
set5_restricted
v3=venn.diagram(
x = list(set3, set6),
category.names = c("HF-T2DM" , "HF-NO T2DM" ),
cat.cex = 2,
cat.default.pos = "text",
filename = NULL,
output=TRUE,
# Output features
height = 14 ,
width = 15 ,
resolution = 300,
compression = "lzw",
# Circles
lwd = 2,
lty = 'blank',
fill = myCol,
# Numbers
cex = 2,
fontface = "bold",
fontfamily = "sans"
)
# pdf(file="supp1lipid_venn2303.pdf")
# grid.draw(v3)
# dev.off()
display_venn(x = list(set3, set6),
category.names = c("HF-T2DM" , "HF-NO T2DM" ),fill = myCol,lwd = 0,
lty = 'blank')
# #an limma mds plot example
# # First 50 probes are differentially expressed in second group
# sd <- 0.3*sqrt(4/rchisq(1000,df=4))
# x <- matrix(rnorm(1000*6,sd=sd),1000,6)
# rownames(x) <- paste("Gene",1:1000)
# x[1:50,4:6] <- x[1:50,4:6] + 2
# # without labels, indexes of samples are plotted.
# mds <- plotMDS(x, col=c(rep("black",3), rep("red",3)) )
# # or labels can be provided, here group indicators:
# plotMDS(mds, col=c(rep("black",3), rep("red",3)), labels= c(rep("Grp1",3), rep("Grp2",3)))
load("all_omics_lv_v6.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
## [1] 9037 109
IHD_DM=grep("_IHD_Yes",colnames(protein_expression2))
col_IHD_DM=colnames(protein_expression2)[IHD_DM]
IHD_NO=grep("_IHD_No",colnames(protein_expression2))
col_IHD_NO=colnames(protein_expression2)[IHD_NO]
DCM_DM=grep("_DCM_Yes",colnames(protein_expression2))
col_DCM_DM=colnames(protein_expression2)[DCM_DM]
DCM_NO=grep("_DCM_No",colnames(protein_expression2))
col_DCM_NO=colnames(protein_expression2)[DCM_NO]
Donor=grep("Donor",colnames(protein_expression2))
col_Donor=colnames(protein_expression2)[Donor]
col_Donor=col_Donor[-c(2,6,10,11,13,18,20,24)]#hard coding here to remove those <21 donors
Donor=Donor[-c(2,6,10,11,13,18,20,24)]
col_young_donor=col_Donor[c(grep("90",col_Donor),grep("91",col_Donor),grep("92",col_Donor),grep("93",col_Donor))]
col_other_donor=col_Donor[-c(grep("90",col_Donor),grep("91",col_Donor),grep("92",col_Donor),grep("93",col_Donor))]
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%c(col_IHD_DM,col_IHD_NO,col_DCM_DM,col_DCM_NO,col_young_donor,col_other_donor)]
dim(protein_expression2)
## [1] 9037 75
## [1] "110_LV_Donor_No_65_F" "50_LV_DCM_No_55_M" "31_LV_DCM_Yes_22_F"
## [4] "17_LV_IHD_No_43_F" "26_LV_IHD_No_54_F" "27_LV_IHD_No_62_F"
## [7] "46_LV_DCM_No_53_F" "49_LV_DCM_No_54_M" "55_LV_DCM_No_62_M"
## [10] "2_LV_IHD_Yes_45_M" "33_LV_DCM_Yes_53_M" "3_LV_IHD_Yes_46_F"
## [13] "24_LV_IHD_No_54_M" "44_LV_DCM_No_50_F" "101_LV_Donor_No_54_M"
## [16] "16_LV_IHD_No_42_F" "22_LV_IHD_No_50_M" "40_LV_DCM_No_43_F"
## [19] "4_LV_IHD_Yes_49_M" "21_LV_IHD_No_49_F" "41_LV_DCM_No_43_M"
## [22] "6_LV_IHD_Yes_51_M" "91_LV_Donor_No_23_F" "94_LV_Donor_No_47_M"
## [25] "98_LV_Donor_No_51_F" "54_LV_DCM_No_60_F" "52_LV_DCM_No_58_F"
## [28] "37_LV_DCM_Yes_59_M" "19_LV_IHD_No_47_F" "105_LV_Donor_No_56_M"
## [31] "5_LV_IHD_Yes_50_M" "32_LV_DCM_Yes_31_F" "90_LV_Donor_No_21_M"
## [34] "20_LV_IHD_No_48_F" "35_LV_DCM_Yes_56_M" "48_LV_DCM_No_54_M"
## [37] "7_LV_IHD_Yes_51_M" "100_LV_Donor_No_53_F" "14_LV_IHD_Yes_65_M"
## [40] "39_LV_DCM_Yes_63_F" "109_LV_Donor_No_65_M" "45_LV_DCM_No_51_M"
## [43] "47_LV_DCM_No_53_M" "23_LV_IHD_No_50_M" "28_LV_IHD_No_62_M"
## [46] "99_LV_Donor_No_53_M" "53_LV_DCM_No_58_M" "9_LV_IHD_Yes_55_M"
## [49] "12_LV_IHD_Yes_59_M" "15_LV_IHD_No_41_M" "29_LV_IHD_No_62_M"
## [52] "34_LV_DCM_Yes_56_M" "13_LV_IHD_Yes_59_M" "36_LV_DCM_Yes_58_M"
## [55] "18_LV_IHD_No_45_M" "10_LV_IHD_Yes_56_M" "93_LV_Donor_No_25_F"
## [58] "56_LV_DCM_No_52_M" "38_LV_DCM_Yes_61_F" "11_LV_IHD_Yes_56_M"
## [61] "97_LV_Donor_No_49_F" "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F"
## [64] "102_LV_Donor_No_54_F" "96_LV_Donor_No_48_M" "30_LV_IHD_No_66_M"
## [67] "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F" "8_LV_IHD_Yes_53_M"
## [70] "107_LV_Donor_No_62_M" "25_LV_IHD_No_54_M" "104_LV_Donor_No_55_F"
## [73] "1_LV_IHD_Yes_41_M" "43_LV_DCM_No_49_M" "42_LV_DCM_No_45_M"
color_index=ifelse(colnames(protein_expression2)%in%col_IHD_DM,"IHD_DM",ifelse(colnames(protein_expression2)%in%col_IHD_NO,"IHD_NO",ifelse(colnames(protein_expression2)%in%col_DCM_DM,"DCM_DM",ifelse(colnames(protein_expression2)%in%col_DCM_NO,"DCM_NO",ifelse(colnames(protein_expression2)%in%col_young_donor,"young_donor","other_donor")))))
real_color=ifelse(color_index=="IHD_DM","red",ifelse(color_index=="IHD_NO","green",ifelse(color_index=="DCM_DM","blue",ifelse(color_index=="DCM_NO","purple",ifelse(color_index=="young_donor","orange","pink")))))
mds <- plotMDS(protein_expression2, col=real_color ,cex=0.5)
mdsdata=as.data.frame(mds$x)
mdsdata$name=rownames(mdsdata)
mdsdata$y=mds$y
mdsdata$group=ifelse(mdsdata$name%in%col_IHD_DM,"IHD_DM",ifelse(mdsdata$name%in%col_IHD_NO,"IHD_NO",ifelse(mdsdata$name%in%col_DCM_DM,"DCM_DM",ifelse(mdsdata$name%in%col_DCM_NO,"DCM_NO",ifelse(mdsdata$name%in%col_young_donor,"young_donor","other_donor")))))
gg=ggplot(mdsdata,aes(x=mdsdata$`mds$x`,y=mdsdata$y,color=group))+geom_point()+theme_bw()+xlab(" ")+ylab(" ")
gg
#ggsave("s1a.pdf",gg,height = 5, width = 7)
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
IHD_DM=grep("_IHD_Yes",colnames(metabolite_expression1))
col_IHD_DM=colnames(metabolite_expression1)[IHD_DM]
IHD_NO=grep("_IHD_No",colnames(metabolite_expression1))
col_IHD_NO=colnames(metabolite_expression1)[IHD_NO]
DCM_DM=grep("_DCM_Yes",colnames(metabolite_expression1))
col_DCM_DM=colnames(metabolite_expression1)[DCM_DM]
DCM_NO=grep("_DCM_No",colnames(metabolite_expression1))
col_DCM_NO=colnames(metabolite_expression1)[DCM_NO]
Donor=grep("Donor",colnames(metabolite_expression1))
col_Donor=colnames(metabolite_expression1)[Donor]
col_Donor=col_Donor[-c(1:8)]#hard coding here to remove those <21 donors
Donor=Donor[-c(1:8)]
col_young_donor=col_Donor[c(grep("90",col_Donor),grep("91",col_Donor),grep("92",col_Donor),grep("93",col_Donor))]
col_other_donor=col_Donor[-c(grep("90",col_Donor),grep("91",col_Donor),grep("92",col_Donor),grep("93",col_Donor))]
metabolite_expression1=metabolite_expression1[,colnames(metabolite_expression1)%in%c(col_IHD_DM,col_IHD_NO,col_DCM_DM,col_DCM_NO,col_Donor)]
dim(metabolite_expression1)
## [1] 155 77
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "15_LV_IHD_No_41_M"
## [16] "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F" "18_LV_IHD_No_45_M"
## [19] "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F" "21_LV_IHD_No_49_F"
## [22] "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M" "24_LV_IHD_No_54_M"
## [25] "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F" "27_LV_IHD_No_62_F"
## [28] "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M" "30_LV_IHD_No_66_M"
## [31] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [34] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [37] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [40] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [43] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [46] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [49] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [52] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [55] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [58] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [61] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [64] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [67] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [70] "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [73] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [76] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
color_index=ifelse(colnames(metabolite_expression1)%in%col_IHD_DM,"IHD_DM",ifelse(colnames(metabolite_expression1)%in%col_IHD_NO,"IHD_NO",ifelse(colnames(metabolite_expression1)%in%col_DCM_DM,"DCM_DM",ifelse(colnames(metabolite_expression1)%in%col_DCM_NO,"DCM_NO",ifelse(colnames(metabolite_expression1)%in%col_young_donor,"young_donor","other_donor")))))
real_color=ifelse(color_index=="IHD_DM","red",ifelse(color_index=="IHD_NO","green",ifelse(color_index=="DCM_DM","blue",ifelse(color_index=="DCM_NO","purple",ifelse(color_index=="young_donor","orange","pink")))))
mds <- plotMDS(metabolite_expression1, col=real_color ,cex=0.5)
mdsdata=as.data.frame(mds$x)
mdsdata$name=rownames(mdsdata)
mdsdata$y=mds$y
mdsdata$group=ifelse(mdsdata$name%in%col_IHD_DM,"IHD_DM",ifelse(mdsdata$name%in%col_IHD_NO,"IHD_NO",ifelse(mdsdata$name%in%col_DCM_DM,"DCM_DM",ifelse(mdsdata$name%in%col_DCM_NO,"DCM_NO",ifelse(mdsdata$name%in%col_young_donor,"young_donor","other_donor")))))
gg=ggplot(mdsdata,aes(x=mdsdata$`mds$x`,y=mdsdata$y,color=group))+geom_point()+theme_bw()+xlab(" ")+ylab(" ")
gg
#ggsave("s1b.pdf",gg,height = 5, width = 7)
lipid_expression=assay(all_omics@ExperimentList$lipid, "log2")
lipid_expression1=as.data.frame(lipid_expression)
lipid_expression1=lipid_expression1[-which(rownames(lipid_expression1)%in% c('Total PC', 'TOTAL TG','TOTAL SM','TOTAL PI','TOTAL PG','TOTAL LPC', 'TOTAL LPE', 'TOTAL HC' ,'TOTAL DG','TOTAL CL', 'TOTAL ACCA', 'TOTAL PE','TOTAL CER')),]
dim(lipid_expression1)
## [1] 341 105
IHD_DM=grep("_IHD_Yes",colnames(lipid_expression1))
col_IHD_DM=colnames(lipid_expression1)[IHD_DM]
IHD_NO=grep("_IHD_No",colnames(lipid_expression1))
col_IHD_NO=colnames(lipid_expression1)[IHD_NO]
DCM_DM=grep("_DCM_Yes",colnames(lipid_expression1))
col_DCM_DM=colnames(lipid_expression1)[DCM_DM]
DCM_NO=grep("_DCM_No",colnames(lipid_expression1))
col_DCM_NO=colnames(lipid_expression1)[DCM_NO]
Donor=grep("Donor",colnames(lipid_expression1))
col_Donor=colnames(lipid_expression1)[Donor]
col_Donor=col_Donor[-c(1:8)]#hard coding here to remove those <21 donors
Donor=Donor[-c(1:8)]
col_young_donor=col_Donor[c(grep("90",col_Donor),grep("91",col_Donor),grep("92",col_Donor),grep("93",col_Donor))]
col_other_donor=col_Donor[-c(grep("90",col_Donor),grep("91",col_Donor),grep("92",col_Donor),grep("93",col_Donor))]
lipid_expression1=lipid_expression1[,colnames(lipid_expression1)%in%c(col_IHD_DM,col_IHD_NO,col_DCM_DM,col_DCM_NO,col_Donor)]
dim(lipid_expression1)
## [1] 341 75
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "15_LV_IHD_No_41_M"
## [16] "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F" "18_LV_IHD_No_45_M"
## [19] "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F" "21_LV_IHD_No_49_F"
## [22] "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M" "24_LV_IHD_No_54_M"
## [25] "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F" "27_LV_IHD_No_62_F"
## [28] "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M" "30_LV_IHD_No_66_M"
## [31] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [34] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [37] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [40] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [43] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [46] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [49] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [52] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [55] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [58] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [61] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [64] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [67] "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F"
## [70] "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M"
## [73] "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
color_index=ifelse(colnames(lipid_expression1)%in%col_IHD_DM,"IHD_DM",ifelse(colnames(lipid_expression1)%in%col_IHD_NO,"IHD_NO",ifelse(colnames(lipid_expression1)%in%col_DCM_DM,"DCM_DM",ifelse(colnames(lipid_expression1)%in%col_DCM_NO,"DCM_NO",ifelse(colnames(lipid_expression1)%in%col_young_donor,"young_donor","other_donor")))))
real_color=ifelse(color_index=="IHD_DM","red",ifelse(color_index=="IHD_NO","green",ifelse(color_index=="DCM_DM","blue",ifelse(color_index=="DCM_NO","purple",ifelse(color_index=="young_donor","orange","pink")))))
mds <- plotMDS(lipid_expression1, col=real_color,cex=0.5 )
mdsdata=as.data.frame(mds$x)
mdsdata$name=rownames(mdsdata)
mdsdata$y=mds$y
mdsdata$group=ifelse(mdsdata$name%in%col_IHD_DM,"IHD_DM",ifelse(mdsdata$name%in%col_IHD_NO,"IHD_NO",ifelse(mdsdata$name%in%col_DCM_DM,"DCM_DM",ifelse(mdsdata$name%in%col_DCM_NO,"DCM_NO",ifelse(mdsdata$name%in%col_young_donor,"young_donor","other_donor")))))
gg=ggplot(mdsdata,aes(x=mdsdata$`mds$x`,y=mdsdata$y,color=group))+geom_point()+theme_bw()+xlab(" ")+ylab(" ")
gg
#results <- decideTests(fit2,p.value =0.2, lfc= log2(1.2),adjust.method = "none")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
## [1] 9037 109
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD"),1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 77 14
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "15_LV_IHD_No_41_M"
## [16] "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F" "18_LV_IHD_No_45_M"
## [19] "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F" "21_LV_IHD_No_49_F"
## [22] "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M" "24_LV_IHD_No_54_M"
## [25] "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F" "27_LV_IHD_No_62_F"
## [28] "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M" "30_LV_IHD_No_66_M"
## [31] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [34] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [37] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [40] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [43] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [46] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [49] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [52] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [55] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [58] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [61] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [64] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [67] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [70] "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [73] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [76] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 75
## [1] 9037 75
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4085 75
IHD_DM=grep("_IHD_Yes",colnames(experiment_data_noimputation))
IHD_NO=grep("_IHD_No",colnames(experiment_data_noimputation))
DCM_DM=grep("_DCM_Yes",colnames(experiment_data_noimputation))
DCM_NO=grep("_DCM_No",colnames(experiment_data_noimputation))
Donor=grep("Donor",colnames(experiment_data_noimputation))
young_donor=Donor[which(Donor%in%c(grep("90",colnames(experiment_data_noimputation)),grep("91",colnames(experiment_data_noimputation)),grep("92",colnames(experiment_data_noimputation)),grep("93",colnames(experiment_data_noimputation))))]
other_donor=Donor[-which(Donor%in%c(grep("90",colnames(experiment_data_noimputation)),grep("91",colnames(experiment_data_noimputation)),grep("92",colnames(experiment_data_noimputation)),grep("93",colnames(experiment_data_noimputation))))]
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1)%in%IHD_DM,"IHD_DM",ifelse(rownames(experiment_data_noimputation1)%in%IHD_NO,"IHD_NO",ifelse(rownames(experiment_data_noimputation1)%in%DCM_DM,"DCM_DM",ifelse(rownames(experiment_data_noimputation1)%in%DCM_NO,"DCM_NO",ifelse(rownames(experiment_data_noimputation1)%in%young_donor,"young_donor","other_donor")))))
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts( IHD_DM_D="groupnameIHD_DM-groupnameother_donor",IHD_NO_D="groupnameIHD_NO-groupnameother_donor",DCM_DM_D="groupnameDCM_DM-groupnameother_donor-groupnameyoung_donor",DCM_NO_D="groupnameDCM_NO-groupnameother_donor-groupnameyoung_donor", levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
results <- decideTests(fit2,p.value =0.05, adjust.method = "BH")
results2=as.data.frame(results)
results2[results2==-1]=1 #need to change down regulated to regulated for upset plot
results2[is.na(results2)]=0
upset(as.data.frame(results2),nsets = 4)
grid.text("Summary plot for significantly expressed omics",x = 0.65, y=0.95, gp=gpar(fontsize=10))
#cant auto save as well
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition%in%c("DCM","IHD"),1,ifelse(infodt1$Condition=="Donor",0,2))))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 77 14
## [1] "1_LV_IHD_Yes_41_M" "2_LV_IHD_Yes_45_M" "3_LV_IHD_Yes_46_F"
## [4] "4_LV_IHD_Yes_49_M" "5_LV_IHD_Yes_50_M" "6_LV_IHD_Yes_51_M"
## [7] "7_LV_IHD_Yes_51_M" "8_LV_IHD_Yes_53_M" "9_LV_IHD_Yes_55_M"
## [10] "10_LV_IHD_Yes_56_M" "11_LV_IHD_Yes_56_M" "12_LV_IHD_Yes_59_M"
## [13] "13_LV_IHD_Yes_59_M" "14_LV_IHD_Yes_65_M" "15_LV_IHD_No_41_M"
## [16] "16_LV_IHD_No_42_F" "17_LV_IHD_No_43_F" "18_LV_IHD_No_45_M"
## [19] "19_LV_IHD_No_47_F" "20_LV_IHD_No_48_F" "21_LV_IHD_No_49_F"
## [22] "22_LV_IHD_No_50_M" "23_LV_IHD_No_50_M" "24_LV_IHD_No_54_M"
## [25] "25_LV_IHD_No_54_M" "26_LV_IHD_No_54_F" "27_LV_IHD_No_62_F"
## [28] "28_LV_IHD_No_62_M" "29_LV_IHD_No_62_M" "30_LV_IHD_No_66_M"
## [31] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [34] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [37] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [40] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [43] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [46] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [49] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [52] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [55] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [58] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [61] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [64] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [67] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [70] "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [73] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [76] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
metabolite_expression1=metabolite_expression1[,colnames(metabolite_expression1)%in%indexx]
dim(metabolite_expression1)
## [1] 155 77
## [1] 155 77
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 154 77
IHD_DM=grep("_IHD_Yes",colnames(experiment_data_noimputation))
IHD_NO=grep("_IHD_No",colnames(experiment_data_noimputation))
DCM_DM=grep("_DCM_Yes",colnames(experiment_data_noimputation))
DCM_NO=grep("_DCM_No",colnames(experiment_data_noimputation))
Donor=grep("Donor",colnames(experiment_data_noimputation))
young_donor=Donor[which(Donor%in%c(grep("90",colnames(experiment_data_noimputation)),grep("91",colnames(experiment_data_noimputation)),grep("92",colnames(experiment_data_noimputation)),grep("93",colnames(experiment_data_noimputation))))]
other_donor=Donor[-which(Donor%in%c(grep("90",colnames(experiment_data_noimputation)),grep("91",colnames(experiment_data_noimputation)),grep("92",colnames(experiment_data_noimputation)),grep("93",colnames(experiment_data_noimputation))))]
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1)%in%IHD_DM,"IHD_DM",ifelse(rownames(experiment_data_noimputation1)%in%IHD_NO,"IHD_NO",ifelse(rownames(experiment_data_noimputation1)%in%DCM_DM,"DCM_DM",ifelse(rownames(experiment_data_noimputation1)%in%DCM_NO,"DCM_NO",ifelse(rownames(experiment_data_noimputation1)%in%young_donor,"young_donor","other_donor")))))
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts( IHD_DM_D="groupnameIHD_DM-groupnameother_donor",IHD_NO_D="groupnameIHD_NO-groupnameother_donor",DCM_DM_D="groupnameDCM_DM-groupnameother_donor-groupnameyoung_donor",DCM_NO_D="groupnameDCM_NO-groupnameother_donor-groupnameyoung_donor", levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
results <- decideTests(fit2,p.value =0.05, adjust.method = "BH")
results2=as.data.frame(results)
results2[results2==-1]=1 #need to change down regulated to regulated for upset plot
results2[is.na(results2)]=0
upset(as.data.frame(results2),nsets = 4,nintersects = NA)
grid.text("Summary plot for significantly expressed omics",x = 0.65, y=0.95, gp=gpar(fontsize=10))
load("all_omics_lv_v6.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
protein_expression2=protein_expression1[,grep("Donor",colnames(protein_expression1))]
protein_expression2=protein_expression2[,-c(grep("82",colnames(protein_expression2)),grep("83",colnames(protein_expression2)),grep("84",colnames(protein_expression2)),grep("85",colnames(protein_expression2)),grep("86",colnames(protein_expression2)),grep("87",colnames(protein_expression2)),grep("88",colnames(protein_expression2)),grep("89",colnames(protein_expression2)))]
dim(protein_expression2)
## [1] 9037 20
## [1] "110_LV_Donor_No_65_F" "101_LV_Donor_No_54_M" "91_LV_Donor_No_23_F"
## [4] "94_LV_Donor_No_47_M" "98_LV_Donor_No_51_F" "105_LV_Donor_No_56_M"
## [7] "90_LV_Donor_No_21_M" "100_LV_Donor_No_53_F" "109_LV_Donor_No_65_M"
## [10] "99_LV_Donor_No_53_M" "93_LV_Donor_No_25_F" "97_LV_Donor_No_49_F"
## [13] "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F" "102_LV_Donor_No_54_F"
## [16] "96_LV_Donor_No_48_M" "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F"
## [19] "107_LV_Donor_No_62_M" "104_LV_Donor_No_55_F"
M=grep("M",colnames(protein_expression2))
col_M=colnames(protein_expression2)[M]
F=grep("F",colnames(protein_expression2))
col_F=colnames(protein_expression2)[F]
color_index=ifelse(colnames(protein_expression2)%in%col_M,"M","F")
real_color=ifelse(color_index=="M","red","blue")
mds <- plotMDS(protein_expression2, col=real_color,cex=0.5 )
mdsdata=as.data.frame(mds$x)
mdsdata$name=rownames(mdsdata)
mdsdata$y=mds$y
mdsdata$group=ifelse(mdsdata$name%in%col_M,"M","F")
gg=ggplot(mdsdata,aes(x=mdsdata$`mds$x`,y=mdsdata$y,color=group))+geom_point()+theme_bw()+xlab(" ")+ylab(" ")
gg
#ggsave("s4a.pdf",gg,height = 5, width = 7)
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
metabolite_expression1=metabolite_expression1[,grep("Donor",colnames(metabolite_expression1))]
metabolite_expression1=metabolite_expression1[,-c(grep("82",colnames(metabolite_expression1)),grep("83",colnames(metabolite_expression1)),grep("84",colnames(metabolite_expression1)),grep("85",colnames(metabolite_expression1)),grep("86",colnames(metabolite_expression1)),grep("87",colnames(metabolite_expression1)),grep("88",colnames(metabolite_expression1)),grep("89",colnames(metabolite_expression1)))]
dim(metabolite_expression1)
## [1] 155 20
## [1] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [4] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [7] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [10] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [13] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [16] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [19] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
M=grep("M",colnames(metabolite_expression1))
col_M=colnames(metabolite_expression1)[M]
F=grep("F",colnames(metabolite_expression1))
col_F=colnames(metabolite_expression1)[F]
color_index=ifelse(colnames(metabolite_expression1)%in%col_M,"M","F")
real_color=ifelse(color_index=="M","red","blue")
mds <- plotMDS(metabolite_expression1, col=real_color ,cex=0.5)
mdsdata=as.data.frame(mds$x)
mdsdata$name=rownames(mdsdata)
mdsdata$y=mds$y
mdsdata$group=ifelse(mdsdata$name%in%col_M,"M","F")
gg=ggplot(mdsdata,aes(x=mdsdata$`mds$x`,y=mdsdata$y,color=group))+geom_point()+theme_bw()+xlab(" ")+ylab(" ")
gg
#ggsave("s4b.pdf",gg,height = 5, width = 7)
lipid_expression=assay(all_omics@ExperimentList$lipid, "log2")
lipid_expression1=as.data.frame(lipid_expression)
lipid_expression1=lipid_expression1[-which(rownames(lipid_expression1)%in% c('Total PC', 'TOTAL TG','TOTAL SM','TOTAL PI','TOTAL PG','TOTAL LPC', 'TOTAL LPE', 'TOTAL HC' ,'TOTAL DG','TOTAL CL', 'TOTAL ACCA', 'TOTAL PE','TOTAL CER')),]
dim(lipid_expression1)
## [1] 341 105
lipid_expression1=lipid_expression1[,grep("Donor",colnames(lipid_expression1))]
lipid_expression1=lipid_expression1[,-c(grep("82",colnames(lipid_expression1)),grep("83",colnames(lipid_expression1)),grep("84",colnames(lipid_expression1)),grep("85",colnames(lipid_expression1)),grep("86",colnames(lipid_expression1)),grep("87",colnames(lipid_expression1)),grep("88",colnames(lipid_expression1)),grep("89",colnames(lipid_expression1)))]
dim(lipid_expression1)
## [1] 341 19
## [1] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [4] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [7] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [10] "99_LV_Donor_No_53_M" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [13] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [16] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [19] "110_LV_Donor_No_65_F"
M=grep("M",colnames(lipid_expression1))
col_M=colnames(lipid_expression1)[M]
F=grep("F",colnames(lipid_expression1))
col_F=colnames(lipid_expression1)[F]
color_index=ifelse(colnames(lipid_expression1)%in%col_M,"M","F")
real_color=ifelse(color_index=="M","red","blue")
mds <- plotMDS(lipid_expression1, col=real_color,cex=0.5 )
mdsdata=as.data.frame(mds$x)
mdsdata$name=rownames(mdsdata)
mdsdata$y=mds$y
mdsdata$group=ifelse(mdsdata$name%in%col_M,"M","F")
gg=ggplot(mdsdata,aes(x=mdsdata$`mds$x`,y=mdsdata$y,color=group))+geom_point()+theme_bw()+xlab(" ")+ylab(" ")
gg
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 109
#changed 20220224: no overall filtering
protein_expression2=protein_expression1
dim(protein_expression2)
## [1] 9037 109
## [1] 4077 109
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="Donor",0,2)))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 20 14
## [1] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [4] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [7] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [10] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [13] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [16] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [19] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 20
## [1] 9037 20
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4014 20
diabetes=grep("M",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "110_LV_Donor_No_65_F" "101_LV_Donor_No_54_M" "91_LV_Donor_No_23_F"
## [4] "94_LV_Donor_No_47_M" "98_LV_Donor_No_51_F" "105_LV_Donor_No_56_M"
## [7] "90_LV_Donor_No_21_M" "100_LV_Donor_No_53_F" "109_LV_Donor_No_65_M"
## [10] "99_LV_Donor_No_53_M" "93_LV_Donor_No_25_F" "97_LV_Donor_No_49_F"
## [13] "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F" "102_LV_Donor_No_54_F"
## [16] "96_LV_Donor_No_48_M" "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F"
## [19] "107_LV_Donor_No_62_M" "104_LV_Donor_No_55_F"
## [1] 4.904716
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 4014 10
## [1] 4013 10
g4=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("M vs F") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>2|-logFC>2|neglogP_value>2.5)))
datatable(df1)
#write.csv(df1,"fm_protein.csv")
load("all_omics_lv_v5.RData")
metabolite_expression=assay(all_omics@ExperimentList$metabolite, "log2_norm")
metabolite_expression1=as.data.frame(metabolite_expression)
metabolite_expression1=apply(metabolite_expression,2,as.numeric)
rownames(metabolite_expression1)=rownames(metabolite_expression)
dim(metabolite_expression1)
## [1] 155 107
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="Donor",0,2)))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 20 14
## [1] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [4] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [7] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [10] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [13] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [16] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [19] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
metabolite_expression1=metabolite_expression1[,colnames(metabolite_expression1)%in%indexx]
dim(metabolite_expression1)
## [1] 155 20
## [1] 155 20
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 154 20
diabetes=grep("M",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [4] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [7] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [10] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [13] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [16] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [19] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
## [1] 3.491362
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 154 10
## [1] 154 10
g5=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("M vs F") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>0.5|-logFC>0.5|neglogP_value>1.5)))
datatable(df1)
#write.csv(df1,"fm_metabolite.csv")
load("all_omics_lv_v6.RData")
lipid_expression=assay(all_omics@ExperimentList$lipid, "log2")
lipid_expression1=as.data.frame(lipid_expression)
lipid_expression1=lipid_expression1[-which(rownames(lipid_expression1)%in% c('Total PC', 'TOTAL TG','TOTAL SM','TOTAL PI','TOTAL PG','TOTAL LPC', 'TOTAL LPE', 'TOTAL HC' ,'TOTAL DG','TOTAL CL', 'TOTAL ACCA', 'TOTAL PE','TOTAL CER')),]
dim(lipid_expression1)
## [1] 341 105
## [1] 341 105
## [1] 341 105
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="Donor",0,2)))
infodt1$Age=as.numeric(infodt1$Age)
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 20 14
## [1] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [4] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [7] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [10] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [13] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [16] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [19] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
lipid_expression2=lipid_expression2[,colnames(lipid_expression2)%in%indexx]
dim(lipid_expression2)
## [1] 341 19
## [1] 341 19
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 341 19
diabetes=grep("M",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [4] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [7] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [10] "99_LV_Donor_No_53_M" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [13] "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M" "106_LV_Donor_No_60_M"
## [16] "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F" "109_LV_Donor_No_65_M"
## [19] "110_LV_Donor_No_65_F"
## [1] 3.835056
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 341 10
## [1] 341 10
g6=ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point() +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("M vs F") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1)+geom_text_repel(aes(x=logFC,y=neglogP_value,label=name),subset(highlight_df,(logFC>0.8|-logFC>0.8|neglogP_value>1)))
datatable(df1)