##########
# Functions designed to parse MaxQuant tables
##########

extractLocalisationScore_MQ <- function(PSMtable = PSMtable, PhosphoSTYProba = NULL, numberOfPhospho = NULL) {
  # Returns a list with the localisation scores of the phosphorylation in each PSM, ordered like the rows in the PSMtable
  ## Input error messages:
  #####
  if (is.null(PhosphoSTYProba) | is.null(numberOfPhospho)) {
    stop("Input the name for the field of localisation probabilities and the field for the number of phosphorylation in the PSM")
  }
  #####
  vecproba <- PSMtable[,names(PSMtable) == PhosphoSTYProba]
  vecnum <- PSMtable[,names(PSMtable) == numberOfPhospho]
  proba <- sapply(vecproba, strsplit, "(", fixed = T)
  proba <- sapply(proba, function(x) {
    x[grepl(")", x, fixed = T)]
  })
  proba <- lapply(which(vecnum > 0), function(x) {
    sort(
      as.numeric(
        sapply(proba[[x]], function(y) {
          strsplit(y, ")", fixed = T)[[1]][1]
        })
      ), decreasing = T
    )[1:vecnum[x]]
  })
  res <- vector(mode = "list", length = nrow(PSMtable))
  res[which(vecnum > 0)] <- proba
  return(res)
}


extractLocalisation_MQ <- function(PSMtable = PSMtable, ModifiedSequence = NULL) {
  # Returns a list with the localisation scores of the phosphorylation in each PSM, ordered like the rows in the PSMtable
  # (PHOSPHORYLATION POSITION INDICATED AS "(ph)" IN THE FIELD "Modified.sequence")
  ## Input error messages:
  #####
  if (is.null(ModifiedSequence)) {
    stop("Input the name for the field of modified sequence from the MaxQuant evidence table.")
  }
  #####
  pep <- PSMtable$Modified.sequence
  pep <- gsub("_", "", pep, fixed = T)
  pos <- gsub("S(ph)", "0", pep, fixed = T)
  pos <- gsub("T(ph)", "0", pos, fixed = T)
  pos <- gsub("Y(ph)", "0", pos, fixed = T)
  pos <- gsub("([a-z]{2})", "", pos)
  pos <- gsub("()", "", pos, fixed = T)
  pos <- sapply(pos, function(x) {
    as.character(as.numeric(gregexpr(x, pattern = "0", fixed = T)[[1]]))
  })
  pos <- sapply(pos, paste, collapse = "&")
  pos[pos == "-1"] <- ""
  return(pos)
}

# other alternative if there are discrepencies in the way the localisation is indicated in the field "Modified.Sequence":
extractLocalisation_MQ_2 <- function(PSMtable = PSMtable, ModifiedSequence = NULL) {
  # Returns a list with the localisation scores of the phosphorylation in each PSM, ordered like the rows in the PSMtable
  # (PHOSPHORYLATION POSITION INDICATED AS "(ph)" after OR "p" before IN THE FIELD "Modified.sequence")
  ## Input error messages:
  #####
  if (is.null(ModifiedSequence)) {
    stop("Input the name for the field of modified sequence from the MaxQuant evidence table.")
  }
  #####
  pep <- PSMtable$Modified.sequence
  pep <- gsub("_", "", pep, fixed = T)
  pep <- gsub("pS", "S(ph)", pep, fixed = T)
  pep <- gsub("pT", "T(ph)", pep, fixed = T)
  pep <- gsub("pY", "Y(ph)", pep, fixed = T)
  pos <- gsub("S(ph)", "0", pep, fixed = T)
  pos <- gsub("T(ph)", "0", pos, fixed = T)
  pos <- gsub("Y(ph)", "0", pos, fixed = T)
  pos <- gsub("([a-z]{2})", "", pos)
  pos <- gsub("()", "", pos, fixed = T)
  pos <- sapply(pos, function(x) {
    as.character(as.numeric(gregexpr(x, pattern = "0", fixed = T)[[1]]))
  })
  pos <- sapply(pos, paste, collapse = "&")
  pos[pos == "-1"] <- ""
  return(pos)
}

