rm(list = ls())
graphics.off()

topDir = '~/comparison of tools/data/figures'

c_modelType <- "coral"

c_gfFile <- paste0(topDir, c_modelType, "_carveme_gf_vs_abundance.txt")
g_gfFile <- paste0(topDir, c_modelType, "_gapseq_gf_vs_abundance.txt")
k_gfFile <- paste0(topDir, c_modelType, "_kbase_gf_vs_abundance.txt")
co_gfFile <- paste0(topDir, c_modelType, "_consensus_gf_vs_abundance.txt")

c_gf_comp <- read.table(c_gfFile, header = T, row.names = 1, sep= "\t")
g_gf_comp <- read.table(g_gfFile, header = T, row.names = 1, sep= "\t")
k_gf_comp <- read.table(k_gfFile, header = T, row.names = 1, sep= "\t")
co_gf_comp <- read.table(co_gfFile, header = T, row.names = 1, sep= "\t")

library(outliers)
chisq.out.test(c_gf_comp$abundance)

c_gf_comp <- c_gf_comp[order(c_gf_comp$abundance)[1:45],]
g_gf_comp <- g_gf_comp[order(g_gf_comp$abundance)[1:45],]
k_gf_comp <- k_gf_comp[order(k_gf_comp$abundance)[1:45],]
co_gf_comp <- co_gf_comp[order(co_gf_comp$abundance)[1:45],]

library(ggplot2)
library(ggpubr)
######### carveme models ############
c_add_r <- ggscatter(c_gf_comp, x = "abundance", y = "added",
                title = 'a.',
                xlab = F,
                ylab = 'Number of added reaction',
                add = "reg.line",                                 # Add regression line
                conf.int = TRUE,                                  # Add confidence interval
                add.params = list(color = "blue",
                fill = "lightgray"))+
      ylim(1,150)+
      font("ylab", size = 12, face = 'bold')+
      font("title", face = 'bold')+
      font("x.text", size = 13)+
      font("y.text", size = 13)+
      stat_cor(method = "pearson", label.x = 1500, label.y = 140, size= 5.5)


c_imp <- ggscatter(c_gf_comp, x = "abundance", y = "imported",
                title = 'e.',
                xlab = F,
                ylab = 'Number of imported metabolite',
                add = "reg.line",                                 # Add regression line
                conf.int = TRUE,                                  # Add confidence interval
                add.params = list(color = "blue",
                                  fill = "lightgray"))+
  ylim(1,50)+
  font("ylab", size = 12, face = 'bold')+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 45, size= 5.5)


c_exp <- ggscatter(c_gf_comp, x = "abundance", y = "exported",
                 title = 'i.',
                 xlab = F,
                 ylab = 'Number of exported metabolite',
                 add = "reg.line",                                 # Add regression line
                 conf.int = TRUE,                                  # Add confidence interval
                 add.params = list(color = "blue",
                                   fill = "lightgray"))+
  ylim(1,80)+
  font("ylab", size = 12, face = 'bold')+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 70, size= 5.5)

######### gapseq models ############
g_add_r <- ggscatter(g_gf_comp, x = "abundance", y = "added",
                     title = 'b.',
                     xlab = F,
                     ylab = F,
                     add = "reg.line",                                 # Add regression line
                     conf.int = TRUE,                                  # Add confidence interval
                     add.params = list(color = "blue",
                                       fill = "lightgray"))+
  ylim(1,150)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 140, size= 5.5)


g_imp <- ggscatter(g_gf_comp, x = "abundance", y = "imported",
                   title = 'f.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,50)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 45, size= 5.5)


g_exp <- ggscatter(g_gf_comp, x = "abundance", y = "exported",
                   title = 'j.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,80)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1200, label.y = 70, size= 5.5)

######### kbase models ############
k_add_r <- ggscatter(k_gf_comp, x = "abundance", y = "added",
                     title = 'c.',
                     xlab = F,
                     ylab = F,
                     add = "reg.line",                                 # Add regression line
                     conf.int = TRUE,                                  # Add confidence interval
                     add.params = list(color = "blue",
                                       fill = "lightgray"))+
  ylim(1,150)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 140, size= 5.5)


k_imp <- ggscatter(k_gf_comp, x = "abundance", y = "imported",
                   title = 'g.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,50)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1200, label.y = 45, size= 5.5)


k_exp <- ggscatter(k_gf_comp, x = "abundance", y = "exported",
                   title = 'k.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,80)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1200, label.y = 70, size= 5.5)

######### consensus models ############
co_add_r <- ggscatter(co_gf_comp, x = "abundance", y = "added",
                     title = 'd.',
                     xlab = F,
                     ylab = F,
                     add = "reg.line",                                 # Add regression line
                     conf.int = TRUE,                                  # Add confidence interval
                     add.params = list(color = "blue",
                                       fill = "lightgray"))+
  ylim(1,150)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 140, size= 5.5)


co_imp <- ggscatter(co_gf_comp, x = "abundance", y = "imported",
                   title = 'h.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,50)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 45, size= 5.5)


co_exp <- ggscatter(co_gf_comp, x = "abundance", y = "exported",
                   title = 'l.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,80)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 1500, label.y = 70, size= 5.5)

ggarrange(c_add_r,g_add_r,k_add_r,co_add_r,
          c_imp, g_imp,k_imp,co_imp,
          c_exp, g_exp,k_exp,co_exp)


ggsave(filename = "~comparison of tools/Figure_3.svg",
       width = 35,height = 20,units = "cm")


rm(list = ls())
graphics.off()

topDir = '~/comparison of tools/data/figures'

c_modelType <- 'seawater'

c_gfFile <- paste0(topDir, c_modelType, "_carveme_gf_vs_abundance.txt")
g_gfFile <- paste0(topDir, c_modelType, "_gapseq_gf_vs_abundance.txt")
k_gfFile <- paste0(topDir, c_modelType, "_kbase_gf_vs_abundance.txt")
co_gfFile <- paste0(topDir, c_modelType, "_consensus_gf_vs_abundance.txt")

c_gf_comp <- read.table(c_gfFile, header = T, row.names = 1, sep= "\t")
g_gf_comp <- read.table(g_gfFile, header = T, row.names = 1, sep= "\t")
k_gf_comp <- read.table(k_gfFile, header = T, row.names = 1, sep= "\t")
co_gf_comp <- read.table(co_gfFile, header = T, row.names = 1, sep= "\t")

library(outliers)
chisq.out.test(c_gf_comp$abundance)

c_gf_comp <- c_gf_comp[order(c_gf_comp$abundance)[1:53],]
g_gf_comp <- g_gf_comp[order(g_gf_comp$abundance)[1:53],]
k_gf_comp <- k_gf_comp[order(k_gf_comp$abundance)[1:53],]
co_gf_comp <- co_gf_comp[order(co_gf_comp$abundance)[1:53],]

library(ggpubr)
######### carveme models ############
c_add_r <- ggscatter(c_gf_comp, x = "abundance", y = "added",
                     title = 'a.',
                     xlab = F,
                     ylab = 'Number of added reaction',
                     add = "reg.line",                                 # Add regression line
                     conf.int = TRUE,                                  # Add confidence interval
                     add.params = list(color = "blue",
                                       fill = "lightgray"))+
  ylim(1,170)+
  font("ylab", size = 12, face = 'bold')+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 160, size= 5.5)


c_imp <- ggscatter(c_gf_comp, x = "abundance", y = "imported",
                   title = 'e.',
                   xlab = F,
                   ylab = 'Number of imported metabolite',
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,50)+
  font("ylab", size = 12, face = 'bold')+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 45, size= 5.5)


c_exp <- ggscatter(c_gf_comp, x = "abundance", y = "exported",
                   title = 'i.',
                   xlab = F,
                   ylab = 'Number of exported metabolite',
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,70)+
  font("ylab", size = 12, face = 'bold')+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 60, size= 5.5)

######### gapseq models ############
g_add_r <- ggscatter(g_gf_comp, x = "abundance", y = "added",
                     title = 'b.',
                     xlab = F,
                     ylab = F,
                     add = "reg.line",                                 # Add regression line
                     conf.int = TRUE,                                  # Add confidence interval
                     add.params = list(color = "blue",
                                       fill = "lightgray"))+
  ylim(1,170)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 160, size= 5.5)


g_imp <- ggscatter(g_gf_comp, x = "abundance", y = "imported",
                   title = 'f.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,50)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 45, size= 5.5)


g_exp <- ggscatter(g_gf_comp, x = "abundance", y = "exported",
                   title = 'j.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,70)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 60, size= 5.5)

######### kbase models ############
k_add_r <- ggscatter(k_gf_comp, x = "abundance", y = "added",
                     title = 'c.',
                     xlab = F,
                     ylab = F,
                     add = "reg.line",                                 # Add regression line
                     conf.int = TRUE,                                  # Add confidence interval
                     add.params = list(color = "blue",
                                       fill = "lightgray"))+
  ylim(1,170)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 160, size= 5.5)


k_imp <- ggscatter(k_gf_comp, x = "abundance", y = "imported",
                   title = 'g.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,50)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 45, size= 5.5)


k_exp <- ggscatter(k_gf_comp, x = "abundance", y = "exported",
                   title = 'k.',
                   xlab = F,
                   ylab = F,
                   add = "reg.line",                                 # Add regression line
                   conf.int = TRUE,                                  # Add confidence interval
                   add.params = list(color = "blue",
                                     fill = "lightgray"))+
  ylim(1,70)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 60, size= 5.5)

######### consensus models ############
co_add_r <- ggscatter(co_gf_comp, x = "abundance", y = "added",
                      title = 'd.',
                      xlab = F,
                      ylab = F,
                      add = "reg.line",                                 # Add regression line
                      conf.int = TRUE,                                  # Add confidence interval
                      add.params = list(color = "blue",
                                        fill = "lightgray"))+
  ylim(1,170)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 160, size= 5.5)


co_imp <- ggscatter(co_gf_comp, x = "abundance", y = "imported",
                    title = 'h.',
                    xlab = F,
                    ylab = F,
                    add = "reg.line",                                 # Add regression line
                    conf.int = TRUE,                                  # Add confidence interval
                    add.params = list(color = "blue",
                                      fill = "lightgray"))+
  ylim(1,50)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 45, size= 5.5)


co_exp <- ggscatter(co_gf_comp, x = "abundance", y = "exported",
                    title = 'l.',
                    xlab = F,
                    ylab = F,
                    add = "reg.line",                                 # Add regression line
                    conf.int = TRUE,                                  # Add confidence interval
                    add.params = list(color = "blue",
                                      fill = "lightgray"))+
  ylim(1,70)+
  font("title", face = 'bold')+
  font("x.text", size = 13)+
  font("y.text", size = 13)+
  stat_cor(method = "pearson", label.x = 10000, label.y = 60, size= 5.5)

ggarrange(c_add_r,g_add_r,k_add_r,co_add_r,
          c_imp, g_imp,k_imp,co_imp,
          c_exp, g_exp,k_exp,co_exp)


ggsave(filename = "~/comparison of tools/Figure_S1.svg",
       width = 35,height = 20,units = "cm")

