################################################################################
# This script provides the function to convert gene identifiers from PA numbers
# to gene names using a mapping file.
#
# Usage:
#       It should be imported by other scripts.
################################################################################

# the ID mapping file is downloaded from http://pseudomonas.com/ on Feb. 16 2016
IDFile <- '../data_collection/Pseudomonas_aeruginosa_PAO1_107.txt'

IDconverter <- function(IDFile, input.ID){
  # this function converts PAO1 gene identifiers from PA numbers to gene names
  #
  # Inputs:
  # IDFile: the mapping file that contains both PA numbers and gene names
  # input.ID: a vector of gene identifiers in PA numbers
  #
  # Return:
  # input.name: a vector of converted gene names

  ID.df <- read.table(IDFile, header = T, skip = 2, sep = '\t',
                           quote = "", stringsAsFactors = F)
  ID.df <- ID.df[, c('Locus.Tag', 'Name')]
  ID.df$Name.Final = ifelse(ID.df$Name == "", ID.df$Locus.Tag, ID.df$Name)
  ID.df$Name.Final = sub("\xa0", "", ID.df$Name.Final)
  ID.df$Name.Final <- make.names(ID.df$Name.Final, unique = TRUE)

  # convert PA number to gene name
  input.name <- ID.df$Name.Final[match(input.ID, ID.df$Locus.Tag)]
  # if an input PA number is not found in the mapping file,
  # keep the original PA number in the output
  input.name[is.na(input.name)] <- input.ID[is.na(input.name)]

  return(input.name)
}

