#library(GEOquery) ## go to https://github.com/seandavi/GEOquery for installation details
#library(R.utils)
library(reshape2)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyverse)
#library(DMwR)
library(readxl)
library(MultiAssayExperiment)
library(S4Vectors)
library(SummarizedExperiment)
library(DT)
library(randomForest)
library(e1071)
library("bnstruct")
library(rminer)
library(ggpubr)
setwd("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/clean_up_folder/analysis")
# install.packages("VANData_1.0.0.tgz", repos=NULL,type="source")
# install.packages("VAN_1.0.0.tgz", repos=NULL,type="source")
library(R.utils)
library(reshape2)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyverse)
library(DMwR)
library(readxl)
library(magrittr)
library(tidyverse)
library(gage)
library(knitr)
library(DT)
library(janitor)
library(KEGGREST)
library(MASS)
library("AnnotationDbi")
library("org.Hs.eg.db")
library(dplyr)
library(reshape)
library(ggplot2)
library(plyr)
library(gtable)
library(grid)
library(pheatmap)
library(reshape2)
library(plotly)
library(UpSetR)
library(MatchIt)
require(pathview)
library(STRINGdb)
## Kegg sets data
require(gage)
require(gageData)
kegg = kegg.gsets(species = "hsa", id.type = "kegg")
kegg.sets.hsa = kegg$kg.sets
# go.hs=go.gsets(species="human")
# go.bp=go.hs$go.sets[go.hs$go.subs$BP]
# go.mf=go.hs$go.sets[go.hs$go.subs$MF]
# go.bpmf=go.hs$go.sets[go.hs$go.subs$BP&go.hs$go.subs$MF] #not equal, wired
# go.bpmf2=append(go.bp,go.mf)
require(gage)
require(gageData)
library(clusterProfiler)
library(enrichplot)
library(GO.db)
# #failed install
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("GOfuncR")
#
# if (!requireNamespace("devtools", quietly=TRUE))
# install.packages("devtools")
# devtools::install_github("sgrote/GOfuncR")
load("all_omics_lv_v5.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 111
#changed 20220224: no overall filtering
protein_expression2=protein_expression1
dim(protein_expression2)
## [1] 9037 111
## [1] 4094 111
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$Age=as.numeric(infodt1$Age)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="Yes",1,ifelse(infodt1$Condition=="Donor",0,2))))
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 29 14
## [1] "31_LV_DCM_Yes_22_F" "32_LV_DCM_Yes_31_F" "33_LV_DCM_Yes_53_M"
## [4] "34_LV_DCM_Yes_56_M" "35_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M"
## [7] "37_LV_DCM_Yes_59_M" "38_LV_DCM_Yes_61_F" "39_LV_DCM_Yes_63_F"
## [10] "90_LV_Donor_No_21_M" "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M"
## [13] "93_LV_Donor_No_25_F" "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F"
## [16] "96_LV_Donor_No_48_M" "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F"
## [19] "99_LV_Donor_No_53_M" "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M"
## [22] "102_LV_Donor_No_54_F" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [25] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [28] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 29
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=protein_expression2[,union(grep("_DCM_Yes",colnames(protein_expression2)),grep("Donor_No",colnames(protein_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 9037 29
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4051 29
diabetes=grep("Yes",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
# g1=ggplot2::ggplot(df1, aes(logFC,neglogP_value,label=name))+
# geom_point(size = 1, alpha=1) +
# xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("IHD-T2DM vs Donor") +
# theme_bw(base_size = 16) +
# geom_hline(yintercept = 1.3, linetype = "dashed", colour = "brown3")+theme(aspect.ratio = 1)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "31_LV_DCM_Yes_22_F" "33_LV_DCM_Yes_53_M" "37_LV_DCM_Yes_59_M"
## [4] "32_LV_DCM_Yes_31_F" "35_LV_DCM_Yes_56_M" "39_LV_DCM_Yes_63_F"
## [7] "34_LV_DCM_Yes_56_M" "36_LV_DCM_Yes_58_M" "38_LV_DCM_Yes_61_F"
## [10] "110_LV_Donor_No_65_F" "101_LV_Donor_No_54_M" "91_LV_Donor_No_23_F"
## [13] "94_LV_Donor_No_47_M" "98_LV_Donor_No_51_F" "105_LV_Donor_No_56_M"
## [16] "90_LV_Donor_No_21_M" "100_LV_Donor_No_53_F" "109_LV_Donor_No_65_M"
## [19] "99_LV_Donor_No_53_M" "93_LV_Donor_No_25_F" "97_LV_Donor_No_49_F"
## [22] "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F" "102_LV_Donor_No_54_F"
## [25] "96_LV_Donor_No_48_M" "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F"
## [28] "107_LV_Donor_No_62_M" "104_LV_Donor_No_55_F"
## [1] 2.175502
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 4051 10
## [1] 4037 10
g1=ggplot2::ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point(aes(x=logFC,y=neglogP_value,color=colorcode)) +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-T2DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1) +
geom_hline(yintercept = adj_to_p(df1), linetype = "dashed", colour = "red")+scale_color_manual(values = c("blue","gray","red"))
ggplotly(g1)
#pathway analysis
#pathway analysis as in our shiny:delete missing values
experiment_data_pathway=na.omit(experiment_data_noimputation)
dim(experiment_data_pathway)
## [1] 2048 29
##With voom for finding pathways
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
log2.cpm <- voom(experiment_data_pathway, design, normalize.method = "quantile")
fit <- lmFit(log2.cpm, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
limma.res=topTable(fit2,n=Inf,sort="p", adjust.method = "fdr")
fc_new <- limma.res$logFC
limma.res$entrez = mapIds(org.Hs.eg.db,
keys=row.names(limma.res),
column="ENTREZID",
keytype=keytypes(org.Hs.eg.db)[2],
multiVals="first")
names(fc_new) <- limma.res$entrez
geneList<-na.omit(fc_new)
geneList = sort(geneList, decreasing = TRUE)
kk2 <- gseGO(geneList = geneList,
OrgDb = org.Hs.eg.db,
ont = "BP",
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE)
kk2_dt=as.data.frame(kk2)
kk2_dt[,c(4,5,7,8)]=round(kk2_dt[,c(4,5,7,8)],4)
datatable(kk2_dt)
BP_namelist=kk2_dt$Description
#https://yulab-smu.top/biomedical-knowledge-mining-book/clusterprofiler-go.html
#gene ontology over-representation test
ego2 <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego2_dt=as.data.frame(ego2)
ego2_dt[,5:7]=round(ego2_dt[,5:7],4)
datatable(ego2_dt)
# Let is suppose I have a collection of genesets called : HALLMARK Now let is suppose there is a specific geneset there called: E2F_targets
#
# BgRatio, M/N.
#
# M = size of the geneset (eg size of the E2F_targets); (is the number of genes within that distribution that are annotated (either directly or indirectly) to the node of interest).
#
# N = size of all of the unique genes in the collection of genesets (example the HALLMARK collection); (is the total number of genes in the background distribution (universe)
#
# GeneRatio is k/n.
#
# k = size of the overlap of 'a vector of gene id' you input with the specific geneset (eg E2F_targets), only unique genes; (the number of genes within that list n, which are annotated to the node.
#
# n = size of the overlap of 'a vector of gene id' you input with all the members of the collection of genesets (eg the HALLMARK collection),only unique genes; is the size of the list of genes of interest
#adj pval and qval
#https://support.bioconductor.org/p/96329/
#adj pval is the fdr adjusted pval, qval from another package is another adjustment method
#the BH approach is slightly more conservative than qvalue
#enrichgo:
# Over Representation Analysis (ORA) (Boyle et al. 2004) is a widely used approach to determine whether known biological functions or processes are over-represented (= enriched) in an experimentally-derived gene list, e.g. a list of differentially expressed genes (DEGs).
#
# The p-value can be calculated by hypergeometric distribution.
#Leading edge analysis reports Tags to indicate the percentage of genes contributing to the enrichment score, List to indicate where in the list the enrichment score is attained and Signal for enrichment signal strength.
#It would also be very interesting to get the core enriched genes that contribute to the enrichment. Our packages (clusterProfiler, DOSE, meshes and ReactomePA) support leading edge analysis and report core enriched genes in GSEA analysis.
#get pathway maps
result_path <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff =1,
qvalueCutoff =1)
result_path_dt=as.data.frame(result_path)
result_path_dt2=result_path_dt[,c("Description","geneID")]
result_path_dt3=data.frame(matrix(ncol=2))
for(i in 1:nrow(result_path_dt2)){
X2=unlist(strsplit(result_path_dt2$geneID[i],"/"))
X1=rep(result_path_dt2$Description[i],length(X2))
dt=cbind(X1,X2)
result_path_dt3=rbind(result_path_dt3,dt)
}
kk3 <- gseGO(geneList = geneList,
OrgDb = org.Hs.eg.db,
ont = "MF",
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE)
kk3_dt=as.data.frame(kk3)
kk3_dt[,c(4,5,7,8)]=round(kk3_dt[,c(4,5,7,8)],4)
datatable(as.data.frame(kk3_dt))
#https://yulab-smu.top/biomedical-knowledge-mining-book/clusterprofiler-go.html
#gene ontology over-representation test
ego3 <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego3_dt=as.data.frame(ego3)
ego3_dt[,5:7]=round(ego3_dt[,5:7],4)
datatable(as.data.frame(ego3_dt))
MF_namelist=kk3_dt$Description
#get pathway maps
MFresult_path <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff =1,
qvalueCutoff =1)
MFresult_path_dt=as.data.frame(MFresult_path)
MFresult_path_dt2=MFresult_path_dt[,c("Description","geneID")]
MFresult_path_dt3=data.frame(matrix(ncol=2))
for(i in 1:nrow(MFresult_path_dt2)){
X2=unlist(strsplit(MFresult_path_dt2$geneID[i],"/"))
X1=rep(MFresult_path_dt2$Description[i],length(X2))
dt=cbind(X1,X2)
MFresult_path_dt3=rbind(MFresult_path_dt3,dt)
}
#new things 20220907: add customised pathway analysis results
name=c("GO:0030198","GO:0055007","GO:0030239","GO:0060047","GO:0007010","GO:0031012","GO:0043292","GO:0005856")
AnnotationDbi::select(GO.db, keys=name, columns=c("TERM","ONTOLOGY"), keytype="GOID")
## GOID TERM ONTOLOGY
## 1 GO:0030198 extracellular matrix organization BP
## 2 GO:0055007 cardiac muscle cell differentiation BP
## 3 GO:0030239 myofibril assembly BP
## 4 GO:0060047 heart contraction BP
## 5 GO:0007010 cytoskeleton organization BP
## 6 GO:0031012 extracellular matrix CC
## 7 GO:0043292 contractile fiber CC
## 8 GO:0005856 cytoskeleton CC
pathwayIDs=c("extracellular matrix organization","cardiac muscle cell differentiation", "myofibril assembly", "heart contraction" ,"cytoskeleton organization","extracellular matrix" ,"contractile fiber", "cytoskeleton")
all_name=c(name,GOBPOFFSPRING[["GO:0030198"]],GOBPOFFSPRING[["GO:0055007"]],GOBPOFFSPRING[["GO:0030239"]],GOBPOFFSPRING[["GO:0060047"]],GOBPOFFSPRING[["GO:0007010"]],GOCCOFFSPRING[["GO:0031012"]],GOCCOFFSPRING[["GO:0043292"]],GOCCOFFSPRING[["GO:0005856"]])
all_name2=AnnotationDbi::select(GO.db, keys=all_name, columns=c("TERM","ONTOLOGY"), keytype="GOID")
GO_v<- AnnotationDbi::select(org.Hs.eg.db, keys=all_name2$GOID, columns=c("GO","ONTOLOGY","SYMBOL","ENTREZID"),keytype="GO")
pp2=merge(all_name2,GO_v,by.x="GOID",by.y="GO",all=TRUE)
datatable(pp2%>%group_by(GOID)%>%summarise(n=n()))
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
pathway_list3=pp2[,c("TERM","ENTREZID")]
colnames(pathway_list3)=c("pathwayIDs","geneIDs")
sum(names(gene_list)%in% pathway_list3$geneIDs)
## [1] 810
## [1] 1238
## [1] 2048
#pathway_list3%>%group_by(pathwayIDs)%>%summarise(n=n())
set.seed(20220907)
gse <- GSEA(gene_list, TERM2GENE = pathway_list3,nPerm = 1000, minGSSize = 3, maxGSSize = 500, pAdjustMethod = "none",pvalueCutoff = 0.05)
figure4a=as.data.frame(gse)
figure4av2=figure4a
figure4av2[,4:9]=round(figure4av2[,4:9],3)
datatable(figure4av2)
kegg_gene_list<-na.omit(fc_new)
kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)
set.seed(202205)
kk2 <- gseKEGG(geneList = kegg_gene_list,
organism = "hsa",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
keyType = "ncbi-geneid")
datatable(as.data.frame(kk2))
both2=as.data.frame(kk2)
both2=both2[-grep("disease",both2$Description),]
ggplot(both2,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("GSEA for significantly downregulated and upregulated pathways")+labs(y="Enrichment score")+
scale_colour_gradient(high = "red", low = "blue")
#get pathway maps
KEGGresult_path <- enrichKEGG(gene = names(geneList),
organism = "hsa", keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH",minGSSize = 0, maxGSSize = 50000000000, qvalueCutoff = 1)
KEGGresult_path_dt=as.data.frame(KEGGresult_path)
KEGGresult_path_dt2=KEGGresult_path_dt[,c("Description","geneID")]
KEGGresult_path_dt2=KEGGresult_path_dt2[-grep("disease",KEGGresult_path_dt2$Description),]
KEGGresult_path_dt3=data.frame(matrix(ncol=2))
for(i in 1:nrow(KEGGresult_path_dt2)){
X2=unlist(strsplit(KEGGresult_path_dt2$geneID[i],"/"))
X1=rep(KEGGresult_path_dt2$Description[i],length(X2))
dt=cbind(X1,X2)
KEGGresult_path_dt3=rbind(KEGGresult_path_dt3,dt)
}
print(paste("The total number of kegg pathways is",length(unique(KEGGresult_path_dt3$X1))))
## [1] "The total number of kegg pathways is 318"
#get all kegg pathways with the mapping
kegg_full_list=download_KEGG("hsa", keggType = "KEGG", keyType = "kegg")
head(kegg_full_list[[1]])
## from to
## 1 hsa00010 10327
## 2 hsa00010 124
## 3 hsa00010 125
## 4 hsa00010 126
## 5 hsa00010 127
## 6 hsa00010 128
## from to
## 1 hsa00010 Glycolysis / Gluconeogenesis
## 2 hsa00020 Citrate cycle (TCA cycle)
## 3 hsa00030 Pentose phosphate pathway
## 4 hsa00040 Pentose and glucuronate interconversions
## 5 hsa00051 Fructose and mannose metabolism
## 6 hsa00052 Galactose metabolism
## [1] 347
## [1] 558 2
kegg_full_dt=merge(kegg_full_list1,kegg_full_list2,by="from")
kegg_full_dt=kegg_full_dt[,c(3,2)]
length(unique(kegg_full_dt$to.y))
## [1] 347
## GSEA analysis
fc.kegg.p <- gage(fc_new, gsets = kegg.sets.hsa, ref = NULL, samp = NULL, rank.test = TRUE)
upreg <- data.frame(fc.kegg.p[[1]]) %>%
rownames_to_column(var = "pathway") %>%
mutate(KEGGpathway = substr(pathway, start = 1, stop = 8)) %>%
mutate(pathwayname = word(pathway, 2, -1)) %>%
select(pathway, KEGGpathway, pathwayname, set.size, stat.mean, p.val, q.val) %>%
mutate(pathid = substr(KEGGpathway, start = 4, stop = 8))
upreg_selected = upreg %>%
filter(q.val < 0.05) %>%
filter(set.size >= 5)
upreg_selected$neglogP_Value = -log10(upreg_selected$p.val)
upreg_selected$neglogQ_value = -log10(upreg_selected$q.val)
downreg <- data.frame(fc.kegg.p[[2]]) %>%
rownames_to_column(var = "pathway") %>%
mutate(KEGGpathway = substr(pathway, start = 1, stop = 8)) %>%
mutate(pathwayname = word(pathway, 2, -1)) %>%
select(pathway, KEGGpathway, pathwayname, set.size, stat.mean, p.val, q.val) %>%
mutate(pathid = substr(KEGGpathway, start = 4, stop = 8))
downreg_selected = downreg %>%
filter(q.val < 0.05) %>%
filter(set.size >= 5)
downreg_selected$neglogP_Value = -log10(downreg_selected$p.val)
downreg_selected$neglogQ_value = -log10(downreg_selected$q.val)
both = rbind(downreg_selected, upreg_selected)
both$absolute = abs(both$stat.mean)
#datatable(both)
both2 = both %>% filter(pathwayname != "Metabolic pathways" & pathwayname != "Retrograde endocannabinoid signaling")
both2=both2[-grep("disease",both2$pathway),]
both2[,c(5:7,9:11)]=round(both2[,c(5:7,9:11)],4)
datatable(both2)
#saveRDS(both, file = "both_selected.rds")
pv.out.list.fat <- sapply(both2$pathid[1:2],
function(pid) pathview(gene.data = fc_new,
pathway.id = pid,
species = "hsa",
out.suffix="human",
low = list(gene = "deepskyblue1", cpd = "green"),
high = list(gene = "red", cpd = "yellow")))
library(ggrepel )
ggplot(both2, aes(x = stat.mean, y = -log10(p.val))) + geom_point( aes(color = set.size), size = 3) + labs(x = "Enrichment score", y = "neglog10p", title = "GSEA for significantly downregulated and upregulated pathways") +
geom_text_repel(aes(label=pathwayname), size=3) +
scale_colour_gradient(high = "red", low = "blue") +
scale_x_continuous(
labels = scales::number_format(accuracy = 0.01))
ggplot(both2,aes(x = reorder(pathwayname, stat.mean),y=stat.mean))+geom_point(aes(color=-log10(p.val),size=-log10(p.val)))+coord_flip()+theme_bw()+ggtitle("GSEA for significantly downregulated and upregulated pathways")+labs(y="Enrichment score")+
scale_colour_gradient(high = "red", low = "blue")
library(clusterProfiler)
library(enrichplot)
performGSEA_mito <- function(gene_list,gene.pathway,pval){
# gse <- GSEA(gene_list,
# TERM2GENE = gene.pathway,
# pAdjustMethod = "none",
# scoreType = "pos",
# nPermSimple = 10000
# )
gse <- GSEA(gene_list, TERM2GENE = gene.pathway,nPerm = 1000, minGSSize = 5, maxGSSize = 500, pAdjustMethod = "BH",pvalueCutoff = pval)
GSEresults.rounded <- gse@result[,1:9]
GSEresults.rounded[,4:8] <- signif(GSEresults.rounded[,4:8],4)
GSEresults.rounded[,6:7] <- sapply(GSEresults.rounded[,6:7], format, scientific = TRUE)
table <- DT::datatable(GSEresults.rounded)
dot <- clusterProfiler::dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
gse <- enrichplot::pairwise_termsim(gse)
enrich <- emapplot(gse, showCategory = 10)
tree <- treeplot(gse) #https://github.com/YuLab-SMU/ggtree/issues/399 #might be a dplyr issue
return(list(table = table, dot = dot, enrich = enrich, tree = tree,table2=GSEresults.rounded))
}
###
mito_genes <- read_excel("Human.MitoCarta3.0.xls", sheet = 4)
mito_genes <- mito_genes %>% dplyr::select(MitoPathway,Genes) %>% na.omit
gene_sets <- mito_genes$Genes %>% strsplit(", ")
gene.pathway <- data.frame(geneIDs = character(), pathwayIDs = integer())
for(i in 1:length(gene_sets)){
new_data <- data.frame(geneIDs = gene_sets[[i]], pathwayIDs = i)
gene.pathway <- rbind(gene.pathway,new_data)
}
gene.pathway$geneIDs <- mapIds(org.Hs.eg.db, keys = gene.pathway$geneIDs, keytype = "SYMBOL", column = "ENTREZID")
pathway.names <- data.frame(pathwayIDs = 1:length(gene_sets), pathwayNames = mito_genes$MitoPathway)
####
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
gene.pathway2=gene.pathway
gene.pathway2$pathwayIDs=pathway.names$pathwayNames[match(gene.pathway2$pathwayIDs,pathway.names$pathwayIDs)]
gene.pathway2=gene.pathway2[,c(2,1)]#need to put term gene in order
##Ben's mito to delete unused previous mito pathways
ben_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/IHD-DM paper figure 3a mitochondrial pathway list.xlsx")
ben_mito$Description%in%gene.pathway2$pathwayIDs
deletion_index=which(!gene.pathway2$pathwayIDs%in%ben_mito$Description)
dim(gene.pathway2)
gene.pathway2=gene.pathway2[-deletion_index,]
dim(gene.pathway2)
###
colnames(result_path_dt3)=colnames(gene.pathway2)
colnames(MFresult_path_dt3)=colnames(gene.pathway2)
gene.pathway3=rbind.data.frame(gene.pathway2,result_path_dt3,MFresult_path_dt3)
set.seed(202205)
gseaResults_mito <-performGSEA_mito(gene_list,gene.pathway3,0.1)
gseaResults_mito$table
#gseaResults_mito$dot
#gseaResults_mito$enrich
#gseaResults_mito$tree
#Normalized enrichment scores (NES)
#https://www.biostars.org/p/169648/
#my plot
ggplotdt=gseaResults_mito$table2
ggplotdt$p.adjust=as.numeric(ggplotdt$p.adjust)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub[1:10,],ggplotdtdownsub[1:10,])
ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")+ylim(c(-1,1.2))
#plot all the significant mito pathways
ggplotdt2=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
ggplotdt2=ggplotdt2[ggplotdt2$Description%in%gene.pathway2$pathwayIDs,]
ggplot(ggplotdt2,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("mitochondrail pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")+ylim(c(-1,1))
library(clusterProfiler)
library(enrichplot)
performGSEA_mito <- function(gene_list,gene.pathway,pval){
# gse <- GSEA(gene_list,
# TERM2GENE = gene.pathway,
# pAdjustMethod = "none",
# scoreType = "pos",
# nPermSimple = 10000
# )
gse <- GSEA(gene_list, TERM2GENE = gene.pathway,nPerm = 1000, minGSSize = 5, maxGSSize = 500, pAdjustMethod = "BH",pvalueCutoff = pval)
GSEresults.rounded <- gse@result[,1:9]
GSEresults.rounded[,4:8] <- signif(GSEresults.rounded[,4:8],4)
GSEresults.rounded[,6:7] <- sapply(GSEresults.rounded[,6:7], format, scientific = TRUE)
table <- DT::datatable(GSEresults.rounded)
dot <- clusterProfiler::dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
gse <- enrichplot::pairwise_termsim(gse)
enrich <- emapplot(gse, showCategory = 10)
tree <- treeplot(gse) #https://github.com/YuLab-SMU/ggtree/issues/399 #might be a dplyr issue
return(list(table = table, dot = dot, enrich = enrich, tree = tree,table2=GSEresults.rounded))
}
###
mito_genes <- read_excel("Human.MitoCarta3.0.xls", sheet = 4)
mito_genes <- mito_genes %>% dplyr::select(MitoPathway,Genes) %>% na.omit
gene_sets <- mito_genes$Genes %>% strsplit(", ")
gene.pathway <- data.frame(geneIDs = character(), pathwayIDs = integer())
for(i in 1:length(gene_sets)){
new_data <- data.frame(geneIDs = gene_sets[[i]], pathwayIDs = i)
gene.pathway <- rbind(gene.pathway,new_data)
}
gene.pathway$geneIDs <- mapIds(org.Hs.eg.db, keys = gene.pathway$geneIDs, keytype = "SYMBOL", column = "ENTREZID")
pathway.names <- data.frame(pathwayIDs = 1:length(gene_sets), pathwayNames = mito_genes$MitoPathway)
####
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
gene.pathway2=gene.pathway
gene.pathway2$pathwayIDs=pathway.names$pathwayNames[match(gene.pathway2$pathwayIDs,pathway.names$pathwayIDs)]
gene.pathway2=gene.pathway2[,c(2,1)]#need to put term gene in order
##Ben's mito to delete unused previous mito pathways
ben_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/IHD-DM paper figure 3a mitochondrial pathway list.xlsx")
ben_mito$Description%in%gene.pathway2$pathwayIDs
deletion_index=which(!gene.pathway2$pathwayIDs%in%ben_mito$Description)
dim(gene.pathway2)
gene.pathway2=gene.pathway2[-deletion_index,]
dim(gene.pathway2)
###
#KEGGresult_path_dt3 is the old one
colnames(kegg_full_dt)=colnames(gene.pathway2)
gene.pathway3=rbind.data.frame(gene.pathway2,kegg_full_dt)
set.seed(202205)
gseaResults_mito <-performGSEA_mito(gene_list,gene.pathway3,0.05)
gseaResults_mito$table
#gseaResults_mito$dot
#gseaResults_mito$enrich
#gseaResults_mito$tree
#Normalized enrichment scores (NES)
#https://www.biostars.org/p/169648/
#my plot
ggplotdt=gseaResults_mito$table2
ggplotdt$p.adjust=as.numeric(ggplotdt$p.adjust)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub[1:10,],ggplotdtdownsub[1:10,])
ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")
ihddm_topnames=ggplotdt$Description
ihddm_dt=gseaResults_mito$table2
#plot all the significant mito pathways
ggplotdt2=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
ggplotdt2=ggplotdt2[ggplotdt2$Description%in%gene.pathway2$pathwayIDs,]
ggplot(ggplotdt2,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("mitochondrail pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")+ylim(c(-1,1))
library(clusterProfiler)
library(enrichplot)
performGSEA_mito <- function(gene_list,gene.pathway,pval){
# gse <- GSEA(gene_list,
# TERM2GENE = gene.pathway,
# pAdjustMethod = "none",
# scoreType = "pos",
# nPermSimple = 10000
# )
gse <- GSEA(gene_list, TERM2GENE = gene.pathway,nPerm = 1000, minGSSize = 3, maxGSSize = 500, pAdjustMethod = "none",pvalueCutoff = pval)
GSEresults.rounded <- gse@result[,1:9]
GSEresults.rounded[,4:8] <- signif(GSEresults.rounded[,4:8],4)
GSEresults.rounded[,6:7] <- sapply(GSEresults.rounded[,6:7], format, scientific = TRUE)
table <- DT::datatable(GSEresults.rounded)
dot <- clusterProfiler::dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
gse <- enrichplot::pairwise_termsim(gse)
enrich <- emapplot(gse, showCategory = 10)
tree <- treeplot(gse) #https://github.com/YuLab-SMU/ggtree/issues/399 #might be a dplyr issue
return(list(table = table, dot = dot, enrich = enrich, tree = tree,table2=GSEresults.rounded))
}
###
mito_genes <- read_excel("Human.MitoCarta3.0.xls", sheet = 4)
mito_genes <- mito_genes %>% dplyr::select(MitoPathway,Genes) %>% na.omit
gene_sets <- mito_genes$Genes %>% strsplit(", ")
gene.pathway <- data.frame(geneIDs = character(), pathwayIDs = integer())
for(i in 1:length(gene_sets)){
new_data <- data.frame(geneIDs = gene_sets[[i]], pathwayIDs = i)
gene.pathway <- rbind(gene.pathway,new_data)
}
gene.pathway$geneIDs <- mapIds(org.Hs.eg.db, keys = gene.pathway$geneIDs, keytype = "SYMBOL", column = "ENTREZID")
pathway.names <- data.frame(pathwayIDs = 1:length(gene_sets), pathwayNames = mito_genes$MitoPathway)
####
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
gene.pathway2=gene.pathway
gene.pathway2$pathwayIDs=pathway.names$pathwayNames[match(gene.pathway2$pathwayIDs,pathway.names$pathwayIDs)]
gene.pathway2=gene.pathway2[,c(2,1)]#need to put term gene in order
##Ben's mito to delete unused previous mito pathways
ben_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/clean_up_folder/analysis/IHD-noDM vs donor KEGG mito pathways 27.6.22.xlsx")
ben_mito$Description%in%gene.pathway2$pathwayIDs
## [1] TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [13] TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE
## [25] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE
## [37] TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
## [49] FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE
## [61] TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
## [73] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] TRUE FALSE FALSE FALSE FALSE FALSE
## [1] 3904 2
## [1] 2416 2
###
colnames(kegg_full_dt)=colnames(gene.pathway2)
gene.pathway3=rbind.data.frame(gene.pathway2,kegg_full_dt)
set.seed(202205)
gseaResults_mito <-performGSEA_mito(gene_list,gene.pathway3,1)
#my plot
ggplotdt=gseaResults_mito$table2
ggplotdt$p.adjust=as.numeric(ggplotdt$p.adjust)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(ggplotdtup$p.adjust),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$p.adjust),]
ggplotdt=rbind.data.frame(ggplotdtupsub[1:10,],ggplotdtdownsub[1:10,])
ggplotdt$pvalue=as.numeric(ggplotdt$pvalue)
gg=ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=pvalue,size=pvalue))+coord_flip()+theme_bw()+ggtitle("dcm-dm vs donor pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")+theme(text = element_text(size = 10))
gg
load("all_omics_lv_v5.RData")
protein_expression=assay(all_omics@ExperimentList$protein, "log2")
protein_expression1=as.data.frame(protein_expression)
dim(protein_expression1)
## [1] 9037 111
#changed 20220224: no overall filtering
protein_expression2=protein_expression1
dim(protein_expression2)
## [1] 9037 111
## [1] 4094 111
#20220321 add the age match as before
infodt1 = colData(all_omics)
infodt1$ihd=as.numeric(as.vector(ifelse(infodt1$Condition=="DCM"&infodt1$Diabetes=="No",1,ifelse(infodt1$Condition=="Donor",0,2))))
ihddt=infodt1[infodt1$ihd!=2,]
ihddt=ihddt[(ihddt$ihd==0&ihddt$Age>=21)|ihddt$ihd==1,]
dim(ihddt)
## [1] 38 14
## [1] "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M" "42_LV_DCM_No_45_M"
## [4] "43_LV_DCM_No_49_M" "44_LV_DCM_No_50_F" "45_LV_DCM_No_51_M"
## [7] "46_LV_DCM_No_53_F" "47_LV_DCM_No_53_M" "48_LV_DCM_No_54_M"
## [10] "49_LV_DCM_No_54_M" "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M"
## [13] "52_LV_DCM_No_58_F" "53_LV_DCM_No_58_M" "54_LV_DCM_No_60_F"
## [16] "55_LV_DCM_No_62_M" "56_LV_DCM_No_52_M" "90_LV_Donor_No_21_M"
## [19] "91_LV_Donor_No_23_F" "92_LV_Donor_No_23_M" "93_LV_Donor_No_25_F"
## [22] "94_LV_Donor_No_47_M" "95_LV_Donor_No_47_F" "96_LV_Donor_No_48_M"
## [25] "97_LV_Donor_No_49_F" "98_LV_Donor_No_51_F" "99_LV_Donor_No_53_M"
## [28] "100_LV_Donor_No_53_F" "101_LV_Donor_No_54_M" "102_LV_Donor_No_54_F"
## [31] "103_LV_DCM_No_44_M" "104_LV_Donor_No_55_F" "105_LV_Donor_No_56_M"
## [34] "106_LV_Donor_No_60_M" "107_LV_Donor_No_62_M" "108_LV_Donor_No_62_F"
## [37] "109_LV_Donor_No_65_M" "110_LV_Donor_No_65_F"
indexx=ihddt$new_filename
protein_expression2=protein_expression2[,colnames(protein_expression2)%in%indexx]
dim(protein_expression2)
## [1] 9037 38
#get the "lv" "ihd-dm" "ihd-no dm" non-imputed data
experiment_data_noimputation=protein_expression2[,union(grep("_DCM_No",colnames(protein_expression2)),grep("Donor_No",colnames(protein_expression2)))]
#colnames(experiment_data_noimputation)
dim(experiment_data_noimputation)
## [1] 9037 38
#filtering out proteins with >25% samples missing
experiment_data_noimputation=experiment_data_noimputation[which(rowMeans(!is.na(experiment_data_noimputation)) > 0.25), ]
dim(experiment_data_noimputation)
## [1] 4075 38
diabetes=grep("DCM",colnames(experiment_data_noimputation))
experiment_data_noimputation1=as.data.frame(t(experiment_data_noimputation))
rownames(experiment_data_noimputation1)=1:dim(experiment_data_noimputation1)[1]
experiment_data_noimputation1$y=ifelse(rownames(experiment_data_noimputation1) %in% diabetes,1,0)
#de analysis-- should be on the non-imputed data
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
fit <- lmFit(experiment_data_noimputation, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
tT <- topTable(fit2)
df1 <- topTable(fit2, number=nrow(fit2), genelist=rownames(experiment_data_noimputation))
df1$name=rownames(df1)
df1$neglogP_value = -log10(df1$P.Value)
# g1=ggplot2::ggplot(df1, aes(logFC,neglogP_value,label=name))+
# geom_point(size = 1, alpha=1) +
# xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("IHD-T2DM vs Donor") +
# theme_bw(base_size = 16) +
# geom_hline(yintercept = 1.3, linetype = "dashed", colour = "brown3")+theme(aspect.ratio = 1)
#add fdr plot
adj_to_p <- function(df) {
#df <- topTable(fit, coef = comparison, number = nrow(fit))
if ((df %>%filter(adj.P.Val < 0.05) %>% nrow()) == 0){
r <- 1
} else {
r <- df%>%mutate(rn = row_number()) %>% filter(adj.P.Val <= 0.05) %>% pull(rn)%>%max() + 1
}
n <- nrow(df) + 1
lp10 <- -log10(r / n * 0.05)
lp10
}
#add samples
colnames(experiment_data_noimputation)
## [1] "50_LV_DCM_No_55_M" "51_LV_DCM_No_56_M" "46_LV_DCM_No_53_F"
## [4] "49_LV_DCM_No_54_M" "55_LV_DCM_No_62_M" "103_LV_DCM_No_44_M"
## [7] "44_LV_DCM_No_50_F" "40_LV_DCM_No_43_F" "41_LV_DCM_No_43_M"
## [10] "54_LV_DCM_No_60_F" "52_LV_DCM_No_58_F" "48_LV_DCM_No_54_M"
## [13] "45_LV_DCM_No_51_M" "47_LV_DCM_No_53_M" "53_LV_DCM_No_58_M"
## [16] "56_LV_DCM_No_52_M" "43_LV_DCM_No_49_M" "42_LV_DCM_No_45_M"
## [19] "110_LV_Donor_No_65_F" "101_LV_Donor_No_54_M" "91_LV_Donor_No_23_F"
## [22] "94_LV_Donor_No_47_M" "98_LV_Donor_No_51_F" "105_LV_Donor_No_56_M"
## [25] "90_LV_Donor_No_21_M" "100_LV_Donor_No_53_F" "109_LV_Donor_No_65_M"
## [28] "99_LV_Donor_No_53_M" "93_LV_Donor_No_25_F" "97_LV_Donor_No_49_F"
## [31] "92_LV_Donor_No_23_M" "108_LV_Donor_No_62_F" "102_LV_Donor_No_54_F"
## [34] "96_LV_Donor_No_48_M" "106_LV_Donor_No_60_M" "95_LV_Donor_No_47_F"
## [37] "107_LV_Donor_No_62_M" "104_LV_Donor_No_55_F"
## [1] 1.97475
highlight_df=df1
highlight_df$colorcode=as.character(ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC>0,1,ifelse(highlight_df$neglogP_value>adj_to_p(df1)&highlight_df$logFC<0,-1,0)))
dim(highlight_df)
## [1] 4075 10
## [1] 4072 10
g1=ggplot2::ggplot(highlight_df, aes(logFC,neglogP_value,label=name))+
geom_point(aes(x=logFC,y=neglogP_value,color=colorcode)) +
xlab("log2 fold change") + ylab("-log10 p-value")+ggtitle("DCM-NO DM vs Donor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),aspect.ratio = 1) +
geom_hline(yintercept = adj_to_p(df1), linetype = "dashed", colour = "red")+scale_color_manual(values = c("blue","gray","red"))
ggplotly(g1)
#pathway analysis
#pathway analysis as in our shiny:delete missing values
experiment_data_pathway=na.omit(experiment_data_noimputation)
dim(experiment_data_pathway)
## [1] 1090 38
##With voom for finding pathways
groupname<-as.factor(experiment_data_noimputation1$y)
design <- model.matrix(~ groupname + 0)
log2.cpm <- voom(experiment_data_pathway, design, normalize.method = "quantile")
fit <- lmFit(log2.cpm, design)
cont.matrix <- makeContrasts(groupname1-groupname0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
limma.res=topTable(fit2,n=Inf,sort="p", adjust.method = "fdr")
fc_new <- limma.res$logFC
limma.res$entrez = mapIds(org.Hs.eg.db,
keys=row.names(limma.res),
column="ENTREZID",
keytype=keytypes(org.Hs.eg.db)[2],
multiVals="first")
names(fc_new) <- limma.res$entrez
geneList<-na.omit(fc_new)
geneList = sort(geneList, decreasing = TRUE)
kk2 <- gseGO(geneList = geneList,
OrgDb = org.Hs.eg.db,
ont = "BP",
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE)
kk2_dt=as.data.frame(kk2)
kk2_dt[,c(4,5,7,8)]=round(kk2_dt[,c(4,5,7,8)],4)
datatable(kk2_dt)
BP_namelist=kk2_dt$Description
#https://yulab-smu.top/biomedical-knowledge-mining-book/clusterprofiler-go.html
#gene ontology over-representation test
ego2 <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego2_dt=as.data.frame(ego2)
ego2_dt[,5:7]=round(ego2_dt[,5:7],4)
datatable(ego2_dt)
# Let is suppose I have a collection of genesets called : HALLMARK Now let is suppose there is a specific geneset there called: E2F_targets
#
# BgRatio, M/N.
#
# M = size of the geneset (eg size of the E2F_targets); (is the number of genes within that distribution that are annotated (either directly or indirectly) to the node of interest).
#
# N = size of all of the unique genes in the collection of genesets (example the HALLMARK collection); (is the total number of genes in the background distribution (universe)
#
# GeneRatio is k/n.
#
# k = size of the overlap of 'a vector of gene id' you input with the specific geneset (eg E2F_targets), only unique genes; (the number of genes within that list n, which are annotated to the node.
#
# n = size of the overlap of 'a vector of gene id' you input with all the members of the collection of genesets (eg the HALLMARK collection),only unique genes; is the size of the list of genes of interest
#adj pval and qval
#https://support.bioconductor.org/p/96329/
#adj pval is the fdr adjusted pval, qval from another package is another adjustment method
#the BH approach is slightly more conservative than qvalue
#get pathway maps
result_path <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff =1,
qvalueCutoff =1)
result_path_dt=as.data.frame(result_path)
result_path_dt2=result_path_dt[,c("Description","geneID")]
result_path_dt3=data.frame(matrix(ncol=2))
for(i in 1:nrow(result_path_dt2)){
X2=unlist(strsplit(result_path_dt2$geneID[i],"/"))
X1=rep(result_path_dt2$Description[i],length(X2))
dt=cbind(X1,X2)
result_path_dt3=rbind(result_path_dt3,dt)
}
kk3 <- gseGO(geneList = geneList,
OrgDb = org.Hs.eg.db,
ont = "MF",
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE)
kk3_dt=as.data.frame(kk3)
kk3_dt[,c(4,5,7,8)]=round(kk3_dt[,c(4,5,7,8)],4)
datatable(as.data.frame(kk3_dt))
#https://yulab-smu.top/biomedical-knowledge-mining-book/clusterprofiler-go.html
#gene ontology over-representation test
ego3 <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego3_dt=as.data.frame(ego3)
ego3_dt[,5:7]=round(ego3_dt[,5:7],4)
datatable(as.data.frame(ego3_dt))
MF_namelist=kk3_dt$Description
#get pathway maps
MFresult_path <- enrichGO(gene = names(geneList),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff =1,
qvalueCutoff =1)
MFresult_path_dt=as.data.frame(MFresult_path)
MFresult_path_dt2=MFresult_path_dt[,c("Description","geneID")]
MFresult_path_dt3=data.frame(matrix(ncol=2))
for(i in 1:nrow(MFresult_path_dt2)){
X2=unlist(strsplit(MFresult_path_dt2$geneID[i],"/"))
X1=rep(MFresult_path_dt2$Description[i],length(X2))
dt=cbind(X1,X2)
MFresult_path_dt3=rbind(MFresult_path_dt3,dt)
}
#new things 20220907: add customised pathway analysis results
name=c("GO:0030198","GO:0055007","GO:0030239","GO:0060047","GO:0007010","GO:0031012","GO:0043292","GO:0005856")
AnnotationDbi::select(GO.db, keys=name, columns=c("TERM","ONTOLOGY"), keytype="GOID")
## GOID TERM ONTOLOGY
## 1 GO:0030198 extracellular matrix organization BP
## 2 GO:0055007 cardiac muscle cell differentiation BP
## 3 GO:0030239 myofibril assembly BP
## 4 GO:0060047 heart contraction BP
## 5 GO:0007010 cytoskeleton organization BP
## 6 GO:0031012 extracellular matrix CC
## 7 GO:0043292 contractile fiber CC
## 8 GO:0005856 cytoskeleton CC
pathwayIDs=c("extracellular matrix organization","cardiac muscle cell differentiation", "myofibril assembly", "heart contraction" ,"cytoskeleton organization","extracellular matrix" ,"contractile fiber", "cytoskeleton")
all_name=c(name,GOBPOFFSPRING[["GO:0030198"]],GOBPOFFSPRING[["GO:0055007"]],GOBPOFFSPRING[["GO:0030239"]],GOBPOFFSPRING[["GO:0060047"]],GOBPOFFSPRING[["GO:0007010"]],GOCCOFFSPRING[["GO:0031012"]],GOCCOFFSPRING[["GO:0043292"]],GOCCOFFSPRING[["GO:0005856"]])
all_name2=AnnotationDbi::select(GO.db, keys=all_name, columns=c("TERM","ONTOLOGY"), keytype="GOID")
GO_v<- AnnotationDbi::select(org.Hs.eg.db, keys=all_name2$GOID, columns=c("GO","ONTOLOGY","SYMBOL","ENTREZID"),keytype="GO")
pp2=merge(all_name2,GO_v,by.x="GOID",by.y="GO",all=TRUE)
datatable(pp2%>%group_by(GOID)%>%summarise(n=n()))
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
pathway_list3=pp2[,c("TERM","ENTREZID")]
colnames(pathway_list3)=c("pathwayIDs","geneIDs")
sum(names(gene_list)%in% pathway_list3$geneIDs)
## [1] 401
## [1] 689
## [1] 1090
#pathway_list3%>%group_by(pathwayIDs)%>%summarise(n=n())
set.seed(20220907)
gse <- GSEA(gene_list, TERM2GENE = pathway_list3,nPerm = 1000, minGSSize = 3, maxGSSize = 500, pAdjustMethod = "none",pvalueCutoff = 0.05)
figure4b=as.data.frame(gse)
figure4bv2=figure4b
figure4bv2[,4:9]=round(figure4bv2[,4:9],3)
datatable(figure4bv2)
kegg_gene_list<-na.omit(fc_new)
kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)
set.seed(202205)
kk2 <- gseKEGG(geneList = kegg_gene_list,
organism = "hsa",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
keyType = "ncbi-geneid")
datatable(as.data.frame(kk2))
both2=as.data.frame(kk2)
both2=both2[-grep("disease",both2$Description),]
ggplot(both2,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("GSEA for significantly downregulated and upregulated pathways")+labs(y="Enrichment score")+
scale_colour_gradient(high = "red", low = "blue")
#get pathway maps
KEGGresult_path <- enrichKEGG(gene = names(geneList),
organism = "hsa", keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH",minGSSize = 0, maxGSSize = 3000, qvalueCutoff = 1)
KEGGresult_path_dt=as.data.frame(KEGGresult_path)
KEGGresult_path_dt2=KEGGresult_path_dt[,c("Description","geneID")]
KEGGresult_path_dt2=KEGGresult_path_dt2[-grep("disease",KEGGresult_path_dt2$Description),]
KEGGresult_path_dt3=data.frame(matrix(ncol=2))
for(i in 1:nrow(KEGGresult_path_dt2)){
X2=unlist(strsplit(KEGGresult_path_dt2$geneID[i],"/"))
X1=rep(KEGGresult_path_dt2$Description[i],length(X2))
dt=cbind(X1,X2)
KEGGresult_path_dt3=rbind(KEGGresult_path_dt3,dt)
}
## GSEA analysis
fc.kegg.p <- gage(fc_new, gsets = kegg.sets.hsa, ref = NULL, samp = NULL, rank.test = TRUE)
upreg <- data.frame(fc.kegg.p[[1]]) %>%
rownames_to_column(var = "pathway") %>%
mutate(KEGGpathway = substr(pathway, start = 1, stop = 8)) %>%
mutate(pathwayname = word(pathway, 2, -1)) %>%
select(pathway, KEGGpathway, pathwayname, set.size, stat.mean, p.val, q.val) %>%
mutate(pathid = substr(KEGGpathway, start = 4, stop = 8))
upreg_selected = upreg %>%
filter(q.val < 0.05) %>%
filter(set.size >= 5)
upreg_selected$neglogP_Value = -log10(upreg_selected$p.val)
upreg_selected$neglogQ_value = -log10(upreg_selected$q.val)
downreg <- data.frame(fc.kegg.p[[2]]) %>%
rownames_to_column(var = "pathway") %>%
mutate(KEGGpathway = substr(pathway, start = 1, stop = 8)) %>%
mutate(pathwayname = word(pathway, 2, -1)) %>%
select(pathway, KEGGpathway, pathwayname, set.size, stat.mean, p.val, q.val) %>%
mutate(pathid = substr(KEGGpathway, start = 4, stop = 8))
downreg_selected = downreg %>%
filter(q.val < 0.05) %>%
filter(set.size >= 5)
downreg_selected$neglogP_Value = -log10(downreg_selected$p.val)
downreg_selected$neglogQ_value = -log10(downreg_selected$q.val)
both = rbind(downreg_selected, upreg_selected)
both$absolute = abs(both$stat.mean)
#datatable(both)
both2 = both %>% filter(pathwayname != "Metabolic pathways" & pathwayname != "Retrograde endocannabinoid signaling")
both2=both2[-grep("disease",both2$pathway),]
both2[,c(5:7,9:11)]=round(both2[,c(5:7,9:11)],4)
datatable(both2)
#saveRDS(both, file = "both_selected.rds")
# pv.out.list.fat <- sapply(both$pathid,
# function(pid) pathview(gene.data = fc,
# pathway.id = pid,
# species = "hsa",
# out.suffix="human",
# low = list(gene = "deepskyblue1", cpd = "green"),
# high = list(gene = "red", cpd = "yellow")))
# library(ggrepel )
# ggplot(both2, aes(x = stat.mean, y = -log10(p.val))) + geom_point( aes(color = set.size), size = 3) + labs(x = "Enrichment score", y = "neglog10p", title = "GSEA for significantly downregulated and upregulated pathways") +
# geom_text_repel(aes(label=pathwayname), size=3) +
# scale_colour_gradient(high = "red", low = "blue") +
# scale_x_continuous(
# labels = scales::number_format(accuracy = 0.01))
ggplot(both2,aes(x = reorder(pathwayname, stat.mean),y=stat.mean))+geom_point(aes(color=-log10(p.val),size=-log10(p.val)))+coord_flip()+theme_bw()+ggtitle("GSEA for significantly downregulated and upregulated pathways")+labs(y="Enrichment score")+
scale_colour_gradient(high = "red", low = "blue")
library(clusterProfiler)
library(enrichplot)
performGSEA_mito <- function(gene_list,gene.pathway,pval){
# gse <- GSEA(gene_list,
# TERM2GENE = gene.pathway,
# pAdjustMethod = "none",
# scoreType = "pos",
# nPermSimple = 10000
# )
gse <- GSEA(gene_list, TERM2GENE = gene.pathway,nPerm = 1000, minGSSize = 5, maxGSSize = 500, pAdjustMethod = "BH",pvalueCutoff = pval)
GSEresults.rounded <- gse@result[,1:9]
GSEresults.rounded[,4:8] <- signif(GSEresults.rounded[,4:8],4)
GSEresults.rounded[,6:7] <- sapply(GSEresults.rounded[,6:7], format, scientific = TRUE)
table <- DT::datatable(GSEresults.rounded)
dot <- clusterProfiler::dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
gse <- enrichplot::pairwise_termsim(gse)
enrich <- emapplot(gse, showCategory = 10)
tree <- treeplot(gse) #https://github.com/YuLab-SMU/ggtree/issues/399 #might be a dplyr issue
return(list(table = table, dot = dot, enrich = enrich, tree = tree,table2=GSEresults.rounded))
}
###
mito_genes <- read_excel("Human.MitoCarta3.0.xls", sheet = 4)
mito_genes <- mito_genes %>% dplyr::select(MitoPathway,Genes) %>% na.omit
gene_sets <- mito_genes$Genes %>% strsplit(", ")
gene.pathway <- data.frame(geneIDs = character(), pathwayIDs = integer())
for(i in 1:length(gene_sets)){
new_data <- data.frame(geneIDs = gene_sets[[i]], pathwayIDs = i)
gene.pathway <- rbind(gene.pathway,new_data)
}
gene.pathway$geneIDs <- mapIds(org.Hs.eg.db, keys = gene.pathway$geneIDs, keytype = "SYMBOL", column = "ENTREZID")
pathway.names <- data.frame(pathwayIDs = 1:length(gene_sets), pathwayNames = mito_genes$MitoPathway)
####
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
gene.pathway2=gene.pathway
gene.pathway2$pathwayIDs=pathway.names$pathwayNames[match(gene.pathway2$pathwayIDs,pathway.names$pathwayIDs)]
gene.pathway2=gene.pathway2[,c(2,1)]#need to put term gene in order
##Ben's mito to delete unused previous mito pathways
ben_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/IHD-noDM vs donor KEGG mito pathways 27.6.22.xlsx")
ben_mito$Description%in%gene.pathway2$pathwayIDs
deletion_index=which(!gene.pathway2$pathwayIDs%in%ben_mito$Description)
dim(gene.pathway2)
gene.pathway2=gene.pathway2[-deletion_index,]
dim(gene.pathway2)
###
colnames(result_path_dt3)=colnames(gene.pathway2)
colnames(MFresult_path_dt3)=colnames(gene.pathway2)
gene.pathway3=rbind.data.frame(gene.pathway2,result_path_dt3,MFresult_path_dt3)
set.seed(202205)
gseaResults_mito <-performGSEA_mito(gene_list,gene.pathway3,0.1)
gseaResults_mito$table
#gseaResults_mito$dot
#gseaResults_mito$enrich
#gseaResults_mito$tree
#Normalized enrichment scores (NES)
#https://www.biostars.org/p/169648/
#my plot
ggplotdt=gseaResults_mito$table2
ggplotdt$p.adjust=as.numeric(ggplotdt$p.adjust)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub[1:10,],ggplotdtdownsub[1:10,])
ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")
#plot all the significant mito pathways
ggplotdt2=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
ggplotdt2=ggplotdt2[ggplotdt2$Description%in%gene.pathway2$pathwayIDs,]
ggplot(ggplotdt2,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("mitochondrail pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")+ylim(c(-1,1))
library(clusterProfiler)
library(enrichplot)
performGSEA_mito <- function(gene_list,gene.pathway,pval){
# gse <- GSEA(gene_list,
# TERM2GENE = gene.pathway,
# pAdjustMethod = "none",
# scoreType = "pos",
# nPermSimple = 10000
# )
gse <- GSEA(gene_list, TERM2GENE = gene.pathway,nPerm = 1000, minGSSize = 5, maxGSSize = 500, pAdjustMethod = "BH",pvalueCutoff = pval)
GSEresults.rounded <- gse@result[,1:9]
GSEresults.rounded[,4:8] <- signif(GSEresults.rounded[,4:8],4)
GSEresults.rounded[,6:7] <- sapply(GSEresults.rounded[,6:7], format, scientific = TRUE)
table <- DT::datatable(GSEresults.rounded)
dot <- clusterProfiler::dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
gse <- enrichplot::pairwise_termsim(gse)
enrich <- emapplot(gse, showCategory = 10)
tree <- treeplot(gse) #https://github.com/YuLab-SMU/ggtree/issues/399 #might be a dplyr issue
return(list(table = table, dot = dot, enrich = enrich, tree = tree,table2=GSEresults.rounded))
}
###
mito_genes <- read_excel("Human.MitoCarta3.0.xls", sheet = 4)
mito_genes <- mito_genes %>% dplyr::select(MitoPathway,Genes) %>% na.omit
gene_sets <- mito_genes$Genes %>% strsplit(", ")
gene.pathway <- data.frame(geneIDs = character(), pathwayIDs = integer())
for(i in 1:length(gene_sets)){
new_data <- data.frame(geneIDs = gene_sets[[i]], pathwayIDs = i)
gene.pathway <- rbind(gene.pathway,new_data)
}
gene.pathway$geneIDs <- mapIds(org.Hs.eg.db, keys = gene.pathway$geneIDs, keytype = "SYMBOL", column = "ENTREZID")
pathway.names <- data.frame(pathwayIDs = 1:length(gene_sets), pathwayNames = mito_genes$MitoPathway)
####
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
gene.pathway2=gene.pathway
gene.pathway2$pathwayIDs=pathway.names$pathwayNames[match(gene.pathway2$pathwayIDs,pathway.names$pathwayIDs)]
gene.pathway2=gene.pathway2[,c(2,1)]#need to put term gene in order
##Ben's mito to delete unused previous mito pathways
ben_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/IHD-noDM vs donor KEGG mito pathways 27.6.22.xlsx")
ben_mito$Description%in%gene.pathway2$pathwayIDs
deletion_index=which(!gene.pathway2$pathwayIDs%in%ben_mito$Description)
dim(gene.pathway2)
gene.pathway2=gene.pathway2[-deletion_index,]
dim(gene.pathway2)
###
colnames(kegg_full_dt)=colnames(gene.pathway2)
gene.pathway3=rbind.data.frame(gene.pathway2,kegg_full_dt)
set.seed(202205)
gseaResults_mito <-performGSEA_mito(gene_list,gene.pathway3,0.1)
gseaResults_mito$table
#gseaResults_mito$dot
#gseaResults_mito$enrich
#gseaResults_mito$tree
#Normalized enrichment scores (NES)
#https://www.biostars.org/p/169648/
#my plot
ggplotdt=gseaResults_mito$table2
ggplotdt$p.adjust=as.numeric(ggplotdt$p.adjust)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub[1:2,],ggplotdtdownsub[1:10,])
ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")
sum(unique(gene.pathway2$pathwayIDs)%in% ggplotdt$Description)/nrow(ggplotdt)
ihdnodm_topnames=ggplotdt$Description
ihdnodm_dt=gseaResults_mito$table2
#plot all the significant mito pathways
ggplotdt2=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
ggplotdt2=ggplotdt2[ggplotdt2$Description%in%gene.pathway2$pathwayIDs,]
ggplot(ggplotdt2,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=p.adjust,size=p.adjust))+coord_flip()+theme_bw()+ggtitle("mitochondrail pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")+ylim(c(-1,1))
library(clusterProfiler)
library(enrichplot)
performGSEA_mito <- function(gene_list,gene.pathway,pval){
# gse <- GSEA(gene_list,
# TERM2GENE = gene.pathway,
# pAdjustMethod = "none",
# scoreType = "pos",
# nPermSimple = 10000
# )
gse <- GSEA(gene_list, TERM2GENE = gene.pathway,nPerm = 1000, minGSSize = 3, maxGSSize = 500, pAdjustMethod = "none",pvalueCutoff = pval)
GSEresults.rounded <- gse@result[,1:9]
GSEresults.rounded[,4:8] <- signif(GSEresults.rounded[,4:8],4)
GSEresults.rounded[,6:7] <- sapply(GSEresults.rounded[,6:7], format, scientific = TRUE)
table <- DT::datatable(GSEresults.rounded)
dot <- clusterProfiler::dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
gse <- enrichplot::pairwise_termsim(gse)
enrich <- emapplot(gse, showCategory = 10)
tree <- treeplot(gse) #https://github.com/YuLab-SMU/ggtree/issues/399 #might be a dplyr issue
return(list(table = table, dot = dot, enrich = enrich, tree = tree,table2=GSEresults.rounded))
}
###
mito_genes <- read_excel("Human.MitoCarta3.0.xls", sheet = 4)
mito_genes <- mito_genes %>% dplyr::select(MitoPathway,Genes) %>% na.omit
gene_sets <- mito_genes$Genes %>% strsplit(", ")
gene.pathway <- data.frame(geneIDs = character(), pathwayIDs = integer())
for(i in 1:length(gene_sets)){
new_data <- data.frame(geneIDs = gene_sets[[i]], pathwayIDs = i)
gene.pathway <- rbind(gene.pathway,new_data)
}
gene.pathway$geneIDs <- mapIds(org.Hs.eg.db, keys = gene.pathway$geneIDs, keytype = "SYMBOL", column = "ENTREZID")
pathway.names <- data.frame(pathwayIDs = 1:length(gene_sets), pathwayNames = mito_genes$MitoPathway)
####
gene_list = sort(fc_new, decreasing = TRUE)
names(gene_list)=as.character(names(gene_list))
gene.pathway2=gene.pathway
gene.pathway2$pathwayIDs=pathway.names$pathwayNames[match(gene.pathway2$pathwayIDs,pathway.names$pathwayIDs)]
gene.pathway2=gene.pathway2[,c(2,1)]#need to put term gene in order
##Ben's mito to delete unused previous mito pathways
ben_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/clean_up_folder/analysis/IHD-noDM vs donor KEGG mito pathways 27.6.22.xlsx")
ben_mito$Description%in%gene.pathway2$pathwayIDs
## [1] TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [13] TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE
## [25] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE
## [37] TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
## [49] FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE
## [61] TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
## [73] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] TRUE FALSE FALSE FALSE FALSE FALSE
## [1] 3904 2
## [1] 2416 2
###
colnames(kegg_full_dt)=colnames(gene.pathway2)
gene.pathway3=rbind.data.frame(gene.pathway2,kegg_full_dt)
set.seed(202205)
gseaResults_mito <-performGSEA_mito(gene_list,gene.pathway3,1)
ggplotdt=gseaResults_mito$table2
ggplotdt$p.adjust=as.numeric(ggplotdt$p.adjust)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(ggplotdtup$p.adjust),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$p.adjust),]
ggplotdt=rbind.data.frame(ggplotdtupsub[1:10,],ggplotdtdownsub[1:10,])
ggplotdt$pvalue=as.numeric(ggplotdt$pvalue)
gg=ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=pvalue,size=pvalue))+coord_flip()+theme_bw()+ggtitle("dcm-no dm vs donor pathway enrichment analysis")+
scale_colour_gradient(high = "red", low = "blue")+theme(text = element_text(size = 10))
gg
## [1] 336 9
## [1] 285 9
common_name=c(ihddm_dt2$Description,ihdnodm_dt2$Description)
plotmerge=merge(ihddm_dt2,ihdnodm_dt2,by="Description")
plotmerge$pvalue.x=-log10(as.numeric(plotmerge$pvalue.x))
plotmerge$pvalue.y=-log10(as.numeric(plotmerge$pvalue.y))
annotation=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/Yunwei202110/IHD-DM paper figure 3a pathways to annotate.xlsx")
plotmerge$mito=as.character(ifelse(plotmerge$Description%in% ben_mito$Description,1,0))
plotmerge$annotation=as.character(ifelse(plotmerge$Description%in% annotation$`Amino acid metabolism`,1,0))
ggplotly(ggplot(plotmerge,aes(pvalue.x,pvalue.y,label=Description,color=mito))+geom_point()+ggtitle("kegg+mito pathways DCM-dm vs DCM-no dm")+xlab("neg_log10_pval in ind-dm vs donor")+ylab("neg_log10_pval in ind-no dm vs donor")+theme_bw())
library(ggrepel)
#gg=ggplot(plotmerge,aes(pvalue.x,pvalue.y,label=Description,color=mito))+geom_point()+ geom_text_repel(aes(x=pvalue.x,y=pvalue.y,label=Description),subset(plotmerge,((pvalue.x>1&pvalue.y<2)|(pvalue.x<1&pvalue.y>1)|(pvalue.x>2&pvalue.y>2))), size=3)+ggtitle("kegg+mito pathways ihd-dm vs ihd-no dm")+xlab("neg_log10_pval in ind-dm vs donor")+ylab("neg_log10_pval in ind-no dm vs donor")+ scale_color_manual(values=c("black", "seagreen"))+theme_bw()
gg=ggplot(plotmerge,aes(pvalue.x,pvalue.y,label=Description,color=mito))+geom_point()+ geom_text_repel(aes(x=pvalue.x,y=pvalue.y,label=Description),subset(plotmerge,plotmerge$annotation==1), size=3)+ggtitle("kegg+mito pathways DCM-dm vs DCM-no dm")+xlab("neg_log10_pval in ind-dm vs donor")+ylab("neg_log10_pval in ind-no dm vs donor")+ scale_color_manual(values=c("black", "seagreen"))+theme_bw()+theme(aspect.ratio = 1)
gg
selected_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/clean_up_folder/analysis/DCM Figure S5c and d mitochondrial pathways.xlsx")
datatable(ihddm_dt2)
names=selected_mito$Description
ggplotdt=ihddm_dt2[ihddm_dt2$Description%in%names,]
ggplotdt$pvalue=as.numeric(ggplotdt$pvalue)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
gg=ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=pvalue,size=pvalue))+coord_flip()+theme_bw()+ggtitle("DCM-dm vs donor pathway enrichment analysis")+
scale_colour_gradient(high = "blue", low = "red")+theme(text = element_text(size = 20))+guides(size = guide_legend(reverse=TRUE))+scale_size_continuous(trans = "reverse")
gg
selected_mito=read_excel("/Users/yzha0247/Dropbox (Sydney Uni)/Diabetic cardiomyopathy/YUNWEI USE THIS/clean_up_folder/analysis/DCM Figure S5c and d mitochondrial pathways.xlsx",sheet=2)
datatable(ihdnodm_dt2)
names=selected_mito$Description
ggplotdt=ihdnodm_dt2[ihdnodm_dt2$Description%in%names,]
ggplotdt$pvalue=as.numeric(ggplotdt$pvalue)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
gg=ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=pvalue,size=pvalue))+coord_flip()+theme_bw()+ggtitle("DCM-no dm vs donor pathway enrichment analysis")+
scale_colour_gradient(high = "blue", low = "red")+theme(text = element_text(size = 20))+guides(size = guide_legend(reverse=TRUE))+scale_size_continuous(trans = "reverse")
gg
ggplotdt=figure4a
ggplotdt$pvalue=as.numeric(ggplotdt$pvalue)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
gg=ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=pvalue,size=pvalue))+coord_flip()+theme_bw()+ggtitle("DCM-dm vs donor pathway enrichment analysis")+
scale_colour_gradient(high = "blue", low = "red")+theme(text = element_text(size = 20))+scale_size_continuous(trans = "reverse")+guides(size = guide_legend(reverse=TRUE))
gg
ggplotdt=figure4b
ggplotdt$pvalue=as.numeric(ggplotdt$pvalue)
ggplotdtup=ggplotdt[ggplotdt$enrichmentScore>0,]
ggplotdtupsub=ggplotdtup[order(-abs(ggplotdtup$enrichmentScore)),]
ggplotdtdown=ggplotdt[ggplotdt$enrichmentScore<0,]
ggplotdtdownsub=ggplotdtdown[order(ggplotdtdown$enrichmentScore),]
ggplotdt=rbind.data.frame(ggplotdtupsub,ggplotdtdownsub)
gg=ggplot(ggplotdt,aes(x = reorder(Description, enrichmentScore),y=enrichmentScore))+geom_point(aes(color=pvalue,size=pvalue))+coord_flip()+theme_bw()+ggtitle("DCM-no dm vs donor pathway enrichment analysis")+
scale_colour_gradient(high = "blue", low = "red")+theme(text = element_text(size = 20))+scale_size_continuous(trans = "reverse")+guides(size = guide_legend(reverse=TRUE))
gg