######
# Aim: compute O frequency per marker per sample at 29°C and convert positions to cM
######

# ----- global variables, functions and packages ----- #
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/global_variables.R")
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/functions.R")

# ----- Load recombination map file
recomb_map <- fread(paste(path_recomb, "/recomb_map_reformat.txt", sep=""),header=TRUE, sep = "\t")
   
# ---- concatenate count and freq dataframe   
mat_freq_count <- NULL 
for(chr_type in c("2", "3", "4", "X")){ 
  if(chr_type == "2"){chrom_Ne <- c("2L", "2R")}; if(chr_type == "3"){chrom_Ne <- c("3L", "3R")}
  if(chr_type == "4"){chrom_Ne <- "4"}; if(chr_type == "X"){chrom_Ne <- "X"}
  all <- NULL
  for(chr in chrom_Ne){
    # open parental SNPs 
    parent <- fread(paste(path_list, "chr", chr, "_parental.txt", sep = ""), sep = "\t", h = T)
    fr_cov <- NULL
    for(sample in prefix_samples){
      print(sample)
      data <- fread(paste(path_sync, chr, "_", sample, "_RG.sync", sep = ""), h = F)
      colnames(data) <- c("CHROM", "POS", "REF", sample)
      data <- merge(data, parent, by = c("CHROM", "POS", "REF")) # keep only parental SNP positions
      cov <- t(sapply(data[[sample]], function(x) as.numeric(strsplit(x, ":", fixed = T)[[1]])))
      colnames(cov) <- colnames(code)
      tot <- apply(cov, 1,function(x) sum(x))
      freq <- sapply(1:dim(data)[1], function(x) cov[x,data$or[x]]/tot[x])
      tmp <- strsplit(sample, "_", fixed = TRUE)[[1]]
      g <- as.numeric(tmp[2])
      r <- as.numeric(strsplit(tmp[3], "R")[[1]][2])
      data[[paste("F",g,".R",r,".freq.29",sep="")]] <- freq
      data[[paste("F",g,".R",r,".count.29",sep="")]] <- tot
      if(sample == prefix_samples[1]){
        fr_cov <- subset(data, select = c("CHROM", "POS", "REF", "ALT", "QUAL", "sam_GT", "or_GT",
        "sam_AO", "sam_AO_RO", "or_AO", "or_AO_RO", "sam", "or", paste("F",g,".R",r,".freq.29",sep=""),
        paste("F",g,".R",r,".count.29",sep="")))
      }else{
        fr_cov <- merge(fr_cov, subset(data, select = c("CHROM", "POS", "REF", paste("F",g,".R",r,".freq.29",sep=""),
        paste("F",g,".R",r,".count.29",sep=""))), by = c("CHROM", "POS", "REF"))      
      }
    }

    # ----- Convert positions on the whole arm
    fr_cov$CHROM_ALL <- chr_type; fr_cov$POS_ALL <-  fr_cov$POS
    fr_cov$CHROM <- fr_cov$CHROM; fr_cov$POS <-  fr_cov$POS
    if(chr == "2R"){ fr_cov$POS_ALL <- fr_cov$POS + length[1, "2L"] }
    if(chr == "3R"){ fr_cov$POS_ALL <- fr_cov$POS + length[1, "3L"] }  
    idx <- order(fr_cov$POS_ALL); fr_cov <- fr_cov[idx, ]
    all <- rbind(all, fr_cov)   
  }

  # --- Convert positions in recombination unit
  nb <- dim(all)[1]
  all$cM <- 0; all$cM_fit <- 0; all$cM_Mb <- 0; 
  if(chr_type != "4"){
    for(chr in chrom_Ne){
      tmp_recomb <- recomb_map[recomb_map$chr == chr, ]
      for(i in 1:dim(tmp_recomb)[1]){
        temp <- tmp_recomb[i, ]
        idx <- which(temp$start<all$POS & all$POS<=temp$end & all$CHROM == chr)
        if(length(idx)>0){ 
          pos <- c(temp$start, all$POS[idx])
          if(chr == "2R"){ pos <- pos + length[1, "2L"] }
          if(chr == "3R"){ pos <- pos + length[1, "3L"] }  
          rate <- temp$r
          cm <- (pos[2:length(pos)] - pos[1:(length(pos)-1)])*rate/1000000
          all$cM[idx] <- cm
          all$cM_Mb[idx] <- rate
        }
      }
    }
    all$cM <- cumsum(all$cM)
    temp <- data.frame(ID=as.character(1:nb), CHROM = rep(1, nb), POS=all$POS_ALL/1000000, cM=all$cM)
    test <- fill.in.snps(temp)
    all$cM_fit <- test$cM
  }
  mat_freq_count <- rbind(mat_freq_count, all) 
}  
saveRDS(mat_freq_count, paste(path_traj, "freq_cov_F20_29.RDS", sep = ""))    