## get allele counts from bcf results

suppressPackageStartupMessages({library(ggplot2)
  library(tidyr)
  library(plyr)
  library(dplyr)})

args <- (commandArgs(trailingOnly = TRUE))

for (i in seq_len(length(args))) {
  eval(parse(text = args[[i]]))
}

print(input_txt)
print(chr)
print(sample_name)
print(out_snp_png)
print(out_bin_png)
print(out_bin_tsv)

print(co_out)
print(bin_size)
bin_size <- as.numeric(bin_size)

chr_gt <- readr::read_tsv(file = input_txt,
                          col_names = c("Pos","DP","AD","GT"))

head(chr_gt)

chr_gt %>% filter(GT %in% c("0/1","1/1")) %>% 
  mutate(GT = factor(GT, levels = c("1/1","0/1"))) %>%
  ggplot()+geom_point(mapping = aes(x =Pos, y =GT,color=GT),size = 0.1)+
  theme_bw()+
  xlab(chr)+ylab(sample_name)

ggsave(file = out_snp_png, width = 14, height = 4, dpi = 72)


count_clean_cell_gt_tidy <- chr_gt %>% filter(GT %in% c("0/1","1/1")) %>%
    mutate(binID = rep(1:ceiling(length(Pos)/bin_size),each = bin_size)[1:length(Pos)]) %>% 
    group_by(binID) %>% mutate(count_ref = sum(GT=="0/0"),
                                   count_het = sum(GT=="0/1"),
                                   count_alt = sum(GT=="1/1"),
                               het_ratio = count_het/(count_ref+count_alt+count_het))
  ## count HET bins
bin_df <- unique(data.frame(binID=count_clean_cell_gt_tidy$binID, 
                     het_ratio = count_clean_cell_gt_tidy$het_ratio))
  
ggplot(data = bin_df)+geom_point(mapping = aes(x =binID, y = het_ratio))+
    theme_bw()+xlab(chr)+ylab(paste0(sample_name," HET ratio in bin"))+ylim(0,1)

ggsave(file = out_bin_png, width = 14,height = 4,dpi = 72)
write.table(x = bin_df ,file = out_bin_tsv,
            quote = F,row.names = F,col.names = T)
chr_gt_f <- chr_gt %>% filter(GT %in% c("0/1","1/1"))


out_co <- chr_gt_f %>% mutate(CO = (lag(GT) != GT),Prev = lag(Pos)) %>% filter(CO) 

write.table(x = out_co ,file = co_out,
            quote = F,row.names = F,col.names = T)
# write.table(x = all_cell_het_ratio_wc_sp,file = "output/outputR/WC-Sp_noisy-cells.txt",
#             quote = F,row.names = F,col.names = T)
