library(dplyr)
library(tidyr)
library(ggpubr)
library(rstudioapi)

rootPath = dirname(rstudioapi::getActiveDocumentContext()$path)
working_dir = file.path(rootPath, 'results')
setwd(working_dir)

read_classes_input_file <- function(filepath, ontology, text_corpus) {
  file = read.csv2(filepath, header = T, sep = "\t", na.strings = "NaN", stringsAsFactors = F)
  file$Lexical_similarity = as.numeric(file$Lexical_similarity)
  file$Semantic_similarity = as.numeric(file$Semantic_similarity)
  file$Score = as.numeric(file$Score)
  # Replace -Inf by 0
  file <- file %>% mutate_if(is.numeric, function(x) ifelse(is.infinite(x), 0, x))
  file_uniq = select(file, Class_IRI, Score) %>% unique()
  file_uniq$Ontology = ontology
  file_uniq$TextCorpus = text_corpus
  return(file_uniq)
}

read_nouns_input_file <- function(filepath, ontology, text_corpus) {
  file = read.csv2(filepath, header = T, sep = "\t", na.strings = "NaN", stringsAsFactors = F)
  file$Lexical_similarity = as.numeric(file$Lexical_similarity)
  file$Semantic_similarity = as.numeric(file$Semantic_similarity)
  file$Score = as.numeric(file$Score)
  # Replace -Inf by 0
  file <- file %>% mutate_if(is.numeric, function(x) ifelse(is.infinite(x), 0, x))
  file_uniq = select(file, Noun_phrase, Score) %>% unique()
  file_uniq$Ontology = ontology
  file_uniq$TextCorpus = text_corpus
  return(file_uniq)
}

plotDensities <- function(a, label_a,b, label_b, c, label_c, title) {
  a_max_density = max(density(a$Score)$y)
  b_max_density = max(density(b$Score)$y)
  c_max_density = max(density(c$Score)$y)
  max_density = max(a_max_density, b_max_density, c_max_density)
  
  plot(density(a$Score), main=title, col="red", ylim=c(0, ceiling(max_density)))
  lines(density(b$Score), col="green")
  lines(density(c$Score), col="blue")
  legend(x="topright", legend=c(label_a, label_b, label_c), fill=c("red", "green", "blue"))
}



# Gene Ontology - text about genes
go_genes_nouns_uniq = read_nouns_input_file("go_genetext/text2class.tsv", 'GeneOntology', 'Genes text')


# Gene Ontology - text about law
go_legal_nouns_uniq = read_nouns_input_file("go_legaltext/text2class.tsv", 'GeneOntology', 'Legal text')


# Gene Ontology - text about food
go_food_nouns_uniq = read_nouns_input_file("go_foodtext/text2class.tsv", 'GeneOntology', 'Food text')

# Gene Ontology - text about medicine
go_medicine_nouns_uniq = read_nouns_input_file("go_medicaltext/text2class.tsv", 'GeneOntology', 'Medical text')


# Food Ontology - text about genes
foodon_genes_nouns_uniq = read_nouns_input_file("foodon_genetext/text2class.tsv", 'FoodOn', 'Genes text')


# Food Ontology - text about law
foodon_legal_nouns_uniq = read_nouns_input_file("foodon_legaltext/text2class.tsv", 'FoodOn', 'Legal text')


# Food Ontology - text about food
foodon_food_nouns_uniq = read_nouns_input_file("foodon_foodtext/text2class.tsv", 'FoodOn', 'Food text')

# Food Ontology - text about medicine
foodon_medicine_nouns_uniq = read_nouns_input_file("foodon_medicaltext/text2class.tsv", 'FoodOn', 'Medical text')



# LKIF Ontology - text about genes
lkif_genes_nouns_uniq = read_nouns_input_file("lkif_genetext/text2class.tsv", 'LKIF', 'Genes text')


# LKIF Ontology - text about law
lkif_legal_nouns_uniq = read_nouns_input_file("lkif_legaltext/text2class.tsv", 'LKIF', 'Legal text')


# LKIF Ontology - text about food
lkif_food_nouns_uniq = read_nouns_input_file("lkif_foodtext/text2class.tsv", 'LKIF', 'Food text')

# LKIF Ontology - text about medicine
lkif_medicine_nouns_uniq = read_nouns_input_file("lkif_medicaltext/text2class.tsv", 'LKIF', 'Medical text')


# SNOMED Ontology - text about genes
snomed_genes_nouns_uniq = read_nouns_input_file("snomed_genetext/text2class.tsv", 'SNOMED', 'Genes text')


# SNOMED Ontology - text about law
snomed_legal_nouns_uniq = read_nouns_input_file("snomed_legaltext/text2class.tsv", 'SNOMED', 'Legal text')


# SNOMED Ontology - text about food
snomed_food_nouns_uniq = read_nouns_input_file("snomed_foodtext/text2class.tsv", 'SNOMED', 'Food text')

# SNOMED Ontology - text about medicine
snomed_medicine_nouns_uniq = read_nouns_input_file("snomed_medicaltext/text2class.tsv", 'SNOMED', 'Medical text')

# Complete dataset
experiment_data_nouns = rbind(go_food_nouns_uniq, 
                              go_legal_nouns_uniq, 
                              go_genes_nouns_uniq, 
                              go_medicine_nouns_uniq,
                              foodon_food_nouns_uniq, 
                              foodon_legal_nouns_uniq, 
                              foodon_genes_nouns_uniq, 
                              foodon_medicine_nouns_uniq,
                              lkif_food_nouns_uniq, 
                              lkif_legal_nouns_uniq, 
                              lkif_genes_nouns_uniq,
                              lkif_medicine_nouns_uniq,
                              snomed_genes_nouns_uniq,
                              snomed_legal_nouns_uniq,
                              snomed_food_nouns_uniq,
                              snomed_medicine_nouns_uniq) %>% 
  select(Ontology, TextCorpus, Noun_phrase, Score)



# Boxplots Noun phrases
comparisons <- list( c("Food text", "Legal text"), c("Food text", "Genes text"), c("Food text", "Medical text"), c("Legal text", "Genes text"), c("Legal text", "Medical text"), c("Genes text", "Medical text"))
ggboxplot(experiment_data_nouns, x="TextCorpus", y="Score", color="TextCorpus", facet.by = "Ontology") +
  stat_compare_means(comparisons = comparisons, method = "wilcox.test") +
  ggtitle('Text corpora comparison for each ontology') +
  theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust=1)) +
  ylim(0.3, 1.5)

comparisons <- list( c("GeneOntology", "FoodOn"), c("GeneOntology", "LKIF"), c("GeneOntology", "SNOMED"), c("FoodOn", "LKIF"),c("FoodOn", "SNOMED"),c("LKIF", "SNOMED"))
ggboxplot(experiment_data_nouns, x="Ontology", y="Score", color="Ontology", facet.by = "TextCorpus") +
  stat_compare_means(comparisons = comparisons, method = "wilcox.test") +
  ggtitle('Ontology comparison for each text corpus') +
  theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust=1)) +
  ylim(0.3, 1.5)

