#Load dependencies----------------------------------------------------------------------------
library(vegan) #For diversity calculation
library(dplyr) #For data manipulation
set.seed(54) #Set seed 
#---------------------------------------------------------------------------------------------

#Inputs---------------------------------------------------------------------------------------
inpDir="/Volumes/TOSHI_EXT/Projects/Mycobiome/Data/OTU Tables/Archaea" #The directory containing the OTU tables, of which L8 will be pulled 
outDir="/Volumes/TOSHI_EXT/Projects/Mycobiome/Output/tOut_Rev" #Output directory
metaF<-"/Volumes/TOSHI_EXT/Projects/Mycobiome/Data/metadata.csv" #The meta data file containing group information
abrev="Archaea" #Substring used as an abbreviation when saving the output
#---------------------------------------------------------------------------------------------

#Functions----------------------------------------------------------------------------
#This function is designed to pull only the species level (L8) taxonomic file from the directory
#Please note that, if you are running this yourself, it needs to be pointed to a directory wiht "L8.csv" file for species level.
load_csv_file <- function(directory) {
  # Get a list of files in the directory
  files <- list.files(directory)
  
  # Find the file that ends with 'L8.csv'
  csv_file <- files[grep("L8.csv$", files)]
  
  # Check if a matching file is found
  if (length(csv_file) == 0) {
    stop("No file found with 'L8.csv' extension in the directory.")
  }
  
  # Load the csv file into a dataframe
  file_path <- file.path(directory, csv_file)
  df <- read.csv(file_path, row.names = 1, check.names = FALSE)
  
  return(df)
}
#---------------------------------------------------------------------------------------------

#Running----------------------------------------------------------------------------
#Load the files--- 
combDF=load_csv_file(inpDir) #First the OTU table (this time L8)
abund_tab_t<-t(combDF) #Transpose the table for downstream compatibility
meta_tab <- read.csv(metaF, check.names = FALSE) #Now load the meta data table

#Fixing some nomenclature issues (group is the variable that the data will be split on... just genotype)
#Replace instances of "DEFA" with "PC for consistency with previous work.
meta_tab <- meta_tab %>%
  mutate(Group = gsub("DEFA", "PC", Group))

#Use Vegan package function vegdist to get a dissimilary measure (Bray–Curtis dissimilarity) btwn rows (samples) of abundance df
bray<-vegdist(abund_tab_t, "bray")
#Based on the bray distances, again use the Vegan package to run an Analysis of Similarities (ANOSIM) based on genotype subgroups (don't split by sex).
fit_b <- anosim(bray, meta_tab$Group,permutations = 999)

#Create a custom order for the categories
genotype_levels <- c("Between","VDR-Loxp","VDR-IEC", "VDR-PC","VDR-Lyz")

# Modify the order of class.vec to match custom_order
class.vec_ordered <- factor(fit_b$class.vec, levels = genotype_levels)

# Update fit_b with the new order
fit_b$class.vec <- class.vec_ordered

#Plot
tOut=file.path(outDir,paste(abrev,"Both_Boxplots_L8.pdf", sep = ""))
pdf(file=tOut, width = 8, height = 6)
plot(fit_b,xlab="Genotypes",ylab="Mean dissimilarity ranks", 
     main="Dissimilarity Ranks between and within Groups", pch=17, cex=1.2,
     col=c("gray","darkblue","goldenrod","darkred","darkgreen"))

#End Connection
dev.off()