## Pathway_scoring.R

Pathway_scoring <- function(pathway){
  if (length(pathway) == 1){
    goiset <- gmt.file[[pathway]]
    ids <- matrix(0L, nrow = length(goiset), ncol = 1)
    goiset <- as.data.frame(cbind(goiset, ids))
    colnames(goiset) <- c("symbol","id")
    temp <- genepc[genepc$Gene.stable.ID %in% rownames(logTPM_filtered),]
    for (i in 1:nrow(goiset)){
      symbol <- goiset$symbol[i]
      ids <- temp[temp$Gene.name == symbol,]
      if (nrow(ids)==1){
        goiset$id[i] <- ids$Gene.stable.ID[1]
      }
    }
    goiset <- goiset[goiset$id %in% rownames(logTPM_filtered),]
  }
  else {
    ids <- matrix(0L, nrow = length(pathway), ncol = 1)
    pathway <- as.data.frame(cbind(pathway, ids))
    colnames(pathway) <- c("symbol","id")
    temp <- genepc[genepc$Gene.stable.ID %in% rownames(logTPM_filtered),]
    for (i in 1:nrow(pathway)){
      symbol <- pathway$symbol[i]
      ids <- temp[temp$Gene.name == symbol,]
      if (nrow(ids)==1){
        pathway$id[i] <- ids$Gene.stable.ID[1]
      }
    }
    goiset <- pathway[pathway$id %in% rownames(logTPM_filtered),]
  }
  
  logTPM_select <- logTPM_filtered
  set.seed(150001)
  geneset <- goiset$id
  SC <- matrix(0L, nrow = 1, ncol = ncol(logTPM_select))
  colnames(SC) <- colnames(logTPM_select)
  
  normdata <- logTPM_select
  normdata.t <- t(logTPM_select)
  normdata <- as.data.frame(normdata)
  normdata.t <- as.data.frame(normdata.t)
  
  Ea <- as.data.frame(rowSums(logTPM_select))
  colnames(Ea) <- "Ea"
  Ea$bin <- matrix(0L, ncol = 1, nrow = nrow(Ea))
  Ea <- Ea[order(Ea$Ea),]
  Ea$bin <- c(rep(1,812),rep(2,811),rep(3,811),rep(4,812),rep(5,811),rep(6,811),rep(7,812),rep(8,811),rep(9,811),rep(10,812),rep(11,811),rep(12,811),rep(13,812),rep(14,811),rep(15,811),rep(16,812),rep(17,811),rep(18,811),rep(19,812),rep(20,811),rep(21,811),rep(22,811),rep(23,811),rep(24,811),rep(25,812))
  
  geneset <- geneset[which(geneset %in% colnames(normdata.t))]
  selectedgenes.t <- subset(normdata.t, select = geneset)
  selectedgenes <- as.data.frame(t(selectedgenes.t))
  
  binnum <- Ea[geneset[1],2]
  selectedbin <- Ea[Ea$bin == binnum,]
  selection <- sample.int(nrow(selectedbin),100,replace = FALSE)
  controlgenelist <- rownames(selectedbin[selection,])
  binnumbers <- binnum
  
  for (j in 2:length(geneset)){
    binnum <- Ea[geneset[j],2]
    selectedbin <- Ea[Ea$bin == binnum,]
    selection <- sample.int(nrow(selectedbin),100, replace = FALSE)
    controlgenes <- rownames(selectedbin[selection,])
    controlgenelist <- c(controlgenelist,controlgenes)
    binnumbers <- paste(binnumbers,binnum)
  }
  
  controlgenelist <- unique(controlgenelist)
  controlgenes.t <- subset(normdata.t, select = controlgenelist)
  controlgenes <- as.data.frame(t(controlgenes.t))
  
  i = 1
  j = 1
  for (i in 1:length(SC)){
    
    ErGji <- sum(selectedgenes[,i])/length(selectedgenes[,i])
    
    ErGjconti <- sum(controlgenes[,i])/length(controlgenes[,i])
    
    SC[i] <- ErGji - ErGjconti
  }
  return(t(SC))
}