.libPaths("/mnt/beegfs/mccarthy/backed_up/general/rlyu/Software/Rlibs/4.1/")

minSNP <- 10 
minlogllRatio <- 10
bpDist <- 1e5
maxRawCO <- 5 
minCellSNP <- 500
cores <- 3
biasTol <- 0.25

suppressPackageStartupMessages({
  library(comapr)
  library(ggplot2)
  # library(dplyr)
  library(Gviz)
  library(BiocParallel)
  library(SummarizedExperiment)
  library(gridExtra)
  library(grid)
  
})

BiocParallel::register(BiocParallel::MulticoreParam(workers = 3))
getCountsFirstBatch <- function(minSNP = 10,
                                minlogllRatio = 30,
                                bpDist = 1e3,
                                maxRawCO=10,
                                minCellSNP = 200,
                                biasTol =  0.25){
  path_loc <- "./output/firstBatch_march2022_50k/WC_522/"
  wc522_rse_state <- readHapState("WC_522",chroms=c(paste0("chr",seq(1:19))),
                                  path=path_loc,
                                  barcodeFile=paste0(path_loc,"WC_522_min50k.txt"),
                                  minSNP = minSNP,
                                  minlogllRatio = minlogllRatio,
                                  bpDist = bpDist,
                                  maxRawCO=maxRawCO,
                                  minCellSNP = minCellSNP,
                                  biasTol =  biasTol)
  wc522_rse_state$sampleGroup <- "WC_522"
  
  path_loc <- "./output/firstBatch_march2022_50k/WC_526/"
  wc526_rse_state <- readHapState("WC_526",chroms=c(paste0("chr",seq(1:19))),
                                  path=path_loc,barcodeFile=paste0(path_loc,"WC_526_min50k.txt"),
                                  minSNP = minSNP,
                                  minlogllRatio = minlogllRatio,
                                  bpDist = bpDist,
                                  maxRawCO=maxRawCO,
                                  minCellSNP = minCellSNP,
                                  biasTol =  biasTol)
  wc526_rse_state$sampleGroup <- "WC_526"
  
  firstBatchSamples <- combineHapState(wc522_rse_state,wc526_rse_state)
  firstBatchSamples
}

getCountsSecBatch <- function(minSNP = 10,
                              minlogllRatio = 30,
                              bpDist = 1e3,
                              maxRawCO=10,
                              minCellSNP = 200,
                              biasTol =  0.25){
  path_loc <- "output/secondBatch_mar2022_50k/WC_CNV_53/"
  cnv53_rse_state <- readHapState("WC_CNV_53",chroms=c(paste0("chr",seq(1:19))),
                                  path=path_loc,
                                  barcodeFile="output/secondBatch_mar2022_50k/WC_CNV_53/WC_CNV_53_min50k.txt",
                                  minSNP = minSNP,
                                  minlogllRatio = minlogllRatio,
                                  bpDist = bpDist,
                                  maxRawCO=maxRawCO,
                                  minCellSNP = minCellSNP,
                                  biasTol =  biasTol)
  
  
  path_loc <- "output/secondBatch_mar2022_50k/WC_CNV_42/"
  cnv42_rse_state <- readHapState("WC_CNV_42",chroms=c(paste0("chr",seq(1:19))),
                                  path=path_loc,
                                  barcodeFile="./output/secondBatch_mar2022_50k/WC_CNV_42/WC_CNV_42_min50k.txt",
                                  minSNP = minSNP,
                                  minlogllRatio = minlogllRatio,
                                  bpDist = bpDist,
                                  maxRawCO=maxRawCO,
                                  minCellSNP = minCellSNP,
                                  biasTol =  biasTol)
  
  
  path_loc <- "output/secondBatch_mar2022_50k/WC_CNV_43/"
  cnv43_rse_state <- readHapState("WC_CNV_43",chroms=c(paste0("chr",seq(1:19))),
                                  path=path_loc,
                                  barcodeFile="./output/secondBatch_mar2022_50k/WC_CNV_43/WC_CNV_43_min50k.txt",
                                  minSNP = minSNP,
                                  minlogllRatio = minlogllRatio,
                                  bpDist = bpDist,
                                  maxRawCO=maxRawCO,
                                  minCellSNP = minCellSNP,
                                  biasTol =  biasTol)
  
  
  path_loc <- "output/secondBatch_mar2022_50k/WC_CNV_44/"
  cnv44_rse_state <- readHapState("WC_CNV_44",chroms=c(paste0("chr",seq(1:19))),
                                  path=path_loc,
                                  barcodeFile="./output/secondBatch_mar2022_50k/WC_CNV_44/WC_CNV_44_min50k.txt",
                                  minSNP = minSNP,
                                  minlogllRatio = minlogllRatio,
                                  bpDist = bpDist,
                                  maxRawCO=maxRawCO,
                                  minCellSNP = minCellSNP,
                                  biasTol =  biasTol)
  cnv53_rse_state$sampleGroup <- "WC_CNV_53"
  cnv42_rse_state$sampleGroup <- "WC_CNV_42"
  cnv43_rse_state$sampleGroup <- "WC_CNV_43"
  cnv44_rse_state$sampleGroup <- "WC_CNV_44"
  fourSamples <- combineHapState(combineHapState(cnv53_rse_state, cnv42_rse_state),
                                 combineHapState(cnv43_rse_state,cnv44_rse_state))
  fourSamples
}
firstBatchSamples <- getCountsFirstBatch(minSNP = as.numeric(minSNP),
                                         minlogllRatio = as.numeric(minlogllRatio),
                                         bpDist = as.numeric(bpDist),
                                         maxRawCO = as.numeric(maxRawCO),
                                         minCellSNP = as.numeric(minCellSNP),
                                         biasTol =  as.numeric(0.25))
secBatchSamples <- getCountsSecBatch(minSNP = as.numeric(minSNP),
                                     minlogllRatio = as.numeric(minlogllRatio),
                                     bpDist = as.numeric(bpDist),
                                     maxRawCO = as.numeric(maxRawCO),
                                     minCellSNP = as.numeric(minCellSNP),
                                     biasTol =  as.numeric(0.25))

allSamples <- combineHapState(firstBatchSamples,secBatchSamples)
table(allSamples$sampleGroup)
dim(allSamples)
countsAll_50k <- countCOs(allSamples)

# 
# minSNP <- 10 
# minlogllRatio <- 20
# bpDist <- 1e5
# maxRawCO <- 5 
# minCellSNP <- 500
# cores <- 3
# biasTol <- 0.45
# setting 4.3 
# WC_522 WC_526 re-called
#  --cmPmb 0.0001 
# saveRDS(countsAll_50k,file = "output/outputR/analysisRDS/countsAll-settings4.3-scCNV-CO-counts_07-mar-2022.rds")

# 
# minSNP <- 10 
# minlogllRatio <- 10
# bpDist <- 1e5
# maxRawCO <- 5 
# minCellSNP <- 500
# cores <- 3
# biasTol <- 0.25
# setting 4.3 
# WC_522 WC_526 re-called
#  --cmPmb 0.0001 
# saveRDS(countsAll_50k,file = "output/outputR/analysisRDS/countsAll-settings4.4-scCNV-CO-counts_07-mar-2022.rds")



path_loc <- "output/firstBatch_march2022_50k//WC_522/"
WC_522_cellqc <- perCellChrQC("WC_522",chroms=c(paste0("chr",seq(1:19))),
                                path=path_loc,
                                barcodeFile=paste0(path_loc,"WC_522_min50k.txt"))
WC_522_cellqc$plot

plotCount( countsAll_50k,by_chr = F,group_by = "sampleGroup")+guides(color = "none")
countsAll_50k$sampleType <- plyr::mapvalues(countsAll_50k$sampleGroup,
                                            from =  c("WC_522", "WC_526",
                                                      "WC_CNV_53","WC_CNV_42", 
                                                      "WC_CNV_43", "WC_CNV_44"),
                                            to = c("Fancm_KO","Fancm_KO","Fancm_WT",
                                                   "Fancm_WT","Fancm_KO",
                                                   "Fancm_WT"))
countsAll_dist_bin <- calGeneticDist(countsAll_50k, bin_size = 1e7, 
                                     group_by = "sampleGroup")
countsAll_dist_type_bin <- calGeneticDist(countsAll_50k, bin_size = 1e7, 
                                          group_by = "sampleType")
colSums(as.matrix(mcols(countsAll_dist_bin)))
colSums(as.matrix(mcols(countsAll_dist_type_bin)))
