#' Import an Excel file
#' The imported file must contain  sheets labled "names" and "Aquatic-Export"
#' the names sheet must have 3 columns One called "r_name" and another called
#' "original_name". 
#'
#' @param fname The name of the Excel file to import without extension
#' @param extraData Additional data to include in the imported data frame with
#' an "ecotox_name" column in the data that matches the "fname" parameter
#' @param dir The directory where the Excel file is located
#'
#' @return A data frame with the imported data from the Excel file

importToxData <- function(fname, extraData, dir="./data/compound_tables/"){
  if(is.na(fname)){return(NULL)}
  fullName <- paste0(dir, fname, ".xlsx")
  col_types_main <- c("text", 
                "text", "text", "numeric", "numeric", 
                "numeric", "text", "text", "text", 
                "numeric", "numeric", "numeric", 
                "text", "text", "text", "numeric", 
                "numeric", "numeric", "numeric", 
                "text", "text", "text", "numeric", 
                "text", "numeric", "text", "text", 
                "numeric")
  
  names <- readxl::read_excel(fullName, sheet = "names",
                              col_types = c("text", "text", "text"))
  
  namesVec <- names$r_name
  names(namesVec) <- names$original_name
  
  data <- readxl::read_excel(fullName,  sheet = "Aquatic-Export")
  
  colnames(data) <- namesVec[colnames(data)]
  na_cols <- sapply(colnames(data), function(x, data){all(is.na(data[[x]]))},data=data)
  data <- data[,!na_cols]
  
  #data has to have at least 2 columns on each row
  na_rows <- rowSums(is.na(data)) < 3 
  data <- data[!na_rows,]

  ec_name <- unlist(extraData[extraData$ecotox_name %in% fname,
                       "ec_priority_substance_name"])
  data$ec_priority_substance_name <- ec_name
 
  data
}


#' Get factor values for a column in a data frame
#'
#' @param col The name of the column to get factor values for
#' @param data The data frame containing the column
#' @param extracol An optional extra column to include in the formula
#'
#' @return A data frame with the factor values for the specified column
#'
#' @examples
#' data <- data.frame(a = c(1, 2, 2, NA), b = c(3, 4, 4, 5))
#' getFactorValues("a", data)
getFactorValues <- function(col, data, extracol=""){
  data[is.na(data[[col]]), col] <- "NA"
  form <- formula(paste0("cbind(total_count=cas_number) ~ ", col,  extracol))
  res <- aggregate(form,data, FUN=length)
  
  # if NA is missing in any stop
  if(!("NA" %in%  res[[col]])){warning("Table: ", col, " - missing NA!\n")}
  
  res
  
}


#' Load all from Excel sheets into a list of data frames and return usable rows
#'
#' @param excelFname A string. The path to the Excel file.
#' @param toxData A data frame. The data frame to be processed.
#' @return A logical vector indicating the usable rows.
getUsableRows <- function(excelFname, toxData){
  
  #Load all from Excel sheets into a list of data frames
  sheets <- readxl::excel_sheets(path = excelFname)
  data_list <- lapply(sheets, function(x) readxl::read_xlsx(excelFname,
                                                            sheet = x))
  #name the data frames with sheet names.
  names(data_list) <- sheets
  
  values <- list()
  for(s in sheets){
    toxData[is.na(toxData[[s]]), s] <- "NA"
    nrs <- sapply(toxData[[s]], function(x, data_list, s){
      i <- which(data_list[[s]][[s]] == x, useNames = F)
      data_list[[s]][i,3]
      }, data_list, s)
    values[[s]] <- unlist(nrs)
    
  }
  garbageCollection <- gc(verbose = FALSE)
  values <- as.data.frame(values)
  values <- !is.na(values)
  rowSums(values, na.rm=TRUE) == length(sheets)
  
}


#' Replace column values in a data frame with values from an Excel file
#'
#' @param excelFname A string. The path to the Excel file.
#' @param toxData A data frame. The data frame to be processed.
#' @param colName A string. The name of the column to be replaced.
#' @param naOnly A logical. If TRUE, only NA values are replaced.
#' @param colNum An integer. The column number in the Excel file to use for replacement.
#' @return A vector with replaced column values. 
replaceColumnValues <- function(excelFname, toxData, colName, naOnly=F, colNum=3){
  s <- colName
  data <- readxl::read_xlsx(excelFname, sheet = s)
  naRows <- is.na(toxData[[s]])
  toxData[naRows, s] <- "NA"
  values <- sapply(toxData[[s]], function(x, data_list, s, colNum){
      i <- which(data_list[[s]] == x, useNames = F)
      data_list[i,colNum]
    }, data, s, colNum)
  
  if(naOnly){values[!naRows] <-  toxData[!naRows, s]}  
  
  garbageCollection <- gc(verbose = FALSE)
  unname(unlist(values))
  
}










