## the correctLowConfCO.R method would result in lost of true COs
## this is the script that should be run
## author: Ruqian Lyu
## Date: 2020-10-09
## this generate vi-filtered.csv and vi-co-only-filtered.tsv that contains the 
## after filtering SNP and states and filtered COs




suppressPackageStartupMessages({
  library(dplyr)
  library(tidyr)
  library(ggplot2)
  library(doParallel)
  library(foreach)
})

args <- (commandArgs(trailingOnly = TRUE))
for (i in seq_len(length(args))) {
  eval(parse(text = args[[i]]))
}
print(chr)
print(segCOratio)
print(minSNP)
print(minRatio)
print(filePath)
print(bcfResult)

minSNP <- as.numeric(minSNP)
minRatio <- as.numeric(minRatio)


seg_ratio <- read.table(file=segCOratio,
                        header=T,
                        stringsAsFactors = F)
head(seg_ratio)

toCorrect <- (seg_ratio[seg_ratio$SNP_count<minSNP | seg_ratio$logllRatio<minRatio,])
ggplot(data=seg_ratio)+geom_point(mapping=aes(x=seg_len,y=logllRatio,color=(SNP_count<minSNP | logllRatio>minRatio) ))
if(grepl("BGI",filePath))
{
  ggsave(file=paste0("tmp/bgi-",chr,bcfResult,"-logllRatio.png"),width=12)

} else {
    ggsave(file=paste0("tmp/agrf-",chr,bcfResult,"-logllRatio.png"),width=12)

}

dim(toCorrect)
head(toCorrect)
dim(seg_ratio)

for(segID in 1:nrow(toCorrect)) {
  #print(segID)
  fromSid <- toCorrect[segID,"Sid"]

  vi_tsv_file <- paste0(filePath,fromSid,"/",fromSid,"_",chr,bcfResult,"_dp2_postvi.tsv")
  outfilteredTSV <- paste0(filePath,fromSid,"/",fromSid,"_",chr,bcfResult,"_dp2_postvi-filtered.tsv")
  outfilteredCOTSV <- paste0(filePath,fromSid,"/",fromSid,"_",chr,bcfResult,"_dp2_postvi-filtered-co-only.tsv")

  stopifnot(file.exists(vi_tsv_file))
  
  if(file.exists(outfilteredTSV)){
      vi_df <- read.table(file=outfilteredTSV,header=T,
                      stringsAsFactors = F)
  } else {
      vi_df <- read.table(file=vi_tsv_file,header=T,
                        stringsAsFactors = F)
  }
  dim(vi_df)
  vi_df <- vi_df[!vi_df$Pos %in% 
                  c(toCorrect[segID,"seg_start"]:toCorrect[segID,"seg_end"]),]
  
  vionly_tsv <- vi_df %>% mutate(CO = (lag(state) != state),
                             Prev = lag(Pos)) %>% filter(CO) 

  write.table(vionly_tsv,file=outfilteredCOTSV,
              col.names = T,row.names = F,quote = F)
  write.table(vi_df,file=outfilteredTSV,
              col.names = T,row.names = F,quote = F)
}
