######
# Aims: compute inversion frequencies in each parental strain
######

# ----- global variables, functions and packages ----- #
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/global_variables.R")
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/functions.R")

# ---- Inversion positions
inv <- read.table(paste(path_recomb, "/inversions_markers_dmel_Kapun_2014.csv", sep=""),h = T, sep = ";")
colnames(inv) <- c("inversion", "CHROM", "POS", "allele")
# inversions are reported on version 5.18 of the ref genome and we switched to 6
write.table(paste(inv$CHROM, ":", inv$POS, sep = ""),
paste(path_recomb, "/input_inversions_markers_dmel_Kapun_2014_from5_to_6.txt",  sep=""), quote=F,col.names = F, row.names = F)
coord_translated <- read.table(paste(path_recomb, "coordinates_flybase_inv_020720.tsv", sep=""), stringsAsFactors=F, h = F, sep = "\t")
inv$POS <- as.numeric(sapply(coord_translated$V2, function(x) strsplit(x, ":", fixed = T)[[1]][2]))

# ---- Inversion frequencies
inv_freq <- NULL
for(chr in unique(inv$CHROM)){ 
  tmp_sam <- fread(paste(path_sync, "/", chr, "_S_000_R00_X_RG.sync", sep = ""), sep = "\t",h=F) #SAM parent
  tmp_or <- fread(paste(path_sync, "/", chr, "_O_000_R00_X_RG.sync", sep = ""), sep = "\t",h=F) #OR parent
  
  idx <- which(inv$CHROM == chr); tmp_inv <- inv[idx, ]; idx <- order(tmp_inv$POS); tmp_inv <- tmp_inv[idx, ]
  idx <- which(tmp_sam$V2 %in% tmp_inv$POS); tmp_sam <- tmp_sam[idx, ]; idx <- order(tmp_sam$V2); tmp_sam <- tmp_sam[idx, ]
  idx <- which(tmp_or$V2 %in% tmp_inv$POS); tmp_or <- tmp_or[idx, ]; idx <- order(tmp_or$V2); tmp_or <- tmp_or[idx, ]

  fr_sam <- sapply(1:dim(tmp_inv)[1], function(x) {ct <- as.numeric(unlist(strsplit(tmp_sam$V4[x], ":", fixed = T)[[1]])); 
  ct[which(bp_sync == tmp_inv$allele[x])]/sum(ct)})
  fr_or <- sapply(1:dim(tmp_inv)[1], function(x) {ct <- as.numeric(unlist(strsplit(tmp_or$V4[x], ":", fixed = T)[[1]])); 
  ct[which(bp_sync == tmp_inv$allele[x])]/sum(ct)})
  
  inv_freq <-rbind(inv_freq, cbind(tmp_inv, fr_sam,fr_or))
}
saveRDS(inv_freq, paste(path_parental_files, "/inversion_freq_sam_or.rds",sep=""))