######
# Aim: convert recombination map from Dmel v5 to Dmel v6
######

# ----- global variables, functions and packages ----- #
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/global_variables.R")
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/functions.R")

#https://flybase.org/convert/coordinates
length_arm_v5 <- matrix(c(23011544, 21146708, 24543557, 27905053, 22422827), ncol = 5, nrow = 1, 
dimnames = list(NULL, c("2L", "2R", "3L", "3R", "X")))
length_arm_v6 <- matrix(c(23513712, 25286936, 28110227, 32079331, 23542271), ncol = 5, nrow = 1, 
dimnames = list(NULL, c("2L", "2R", "3L", "3R", "X")))

# ---- load the Dmel v5
init_recomb_v5 <- fread(paste(path_recomb, "/02_recombination_map_moving-median_Dmel_501k.txt", sep = ""), h=FALSE, skip = 1)
r <- init_recomb_v5$V2
chr <- sapply(init_recomb_v5$V1, function(x) strsplit(x, ":", fixed = TRUE)[[1]][1])
start <- sapply(init_recomb_v5$V1, function(x) strsplit(strsplit(x, "..", fixed = TRUE)[[1]][1], ":", fixed = TRUE)[[1]][2])
end <- sapply(init_recomb_v5$V1, function(x) strsplit(x, "..", fixed = TRUE)[[1]][2])

# ---- load correspondences between v5 and v6 from Flybase
coord <- fread(paste(path_recomb,"coordinates_flybase_16062020.tsv", sep=""),h=FALSE, sep = "\t")
chr2 <- sapply(coord$V1, function(x) strsplit(x, ":", fixed = TRUE)[[1]][1])
new_map <- NULL
for(chromo in c("2L", "2R", "3L", "3R", "X")){
  idx <- which(chr2 == chromo); tmp <- coord[idx, ]
  test <- sapply(tmp$V2, function(x) length(strsplit(x, "?", fixed=TRUE)[[1]]))
  tmp <- tmp[which(test==1),]
  temp <- merge(tmp, init_recomb_v5, by = "V1")
  tmp_start <- as.numeric(sapply(temp$V1, function(x) strsplit(strsplit(x, "..", fixed = TRUE)[[1]][1], ":", fixed = TRUE)[[1]][2]))
  temp <- temp[order(tmp_start), ]
  colnames(temp) <- c("v5", "v6", "diag","cM.Mb")
  temp <- rbind(data.frame(v5 = c(NA, tmp$V1[1]),
  v6 = c(paste(chromo, ":1..", strsplit(strsplit(tmp$V2[1], "..", fixed = TRUE)[[1]][1], ":", fixed = TRUE)[[1]][2], sep = ""), tmp$V2[1]),
  diag = c(NA,tmp$V3[1]), cM.Mb = rep(temp$cM.Mb[1],2)), 
  temp,
  data.frame(v5=NA,v6=paste(chromo, ":", strsplit(tail(temp$v6,1), "..", fixed = TRUE)[[1]][2], "..", length_arm_v6[1,chromo], sep = ""),
  diag=NA, cM.Mb=tail(temp$cM.Mb,1)))
  if(length(which(temp$v6 == paste(chromo, ":1..1",sep="")))>0){
    temp <- temp[-1,]
  }
  new_map <- rbind(new_map, subset(temp, select = c("v6", "cM.Mb")))
}
write.table(new_map, paste(path_recomb,"02_recombination_map_moving-median_Dmel_501k_version6.txt",sep=""),col.names=F,row.names=F,quote=F, sep = "\t")