######
# Aim: generate selected positions with a given s on X
######

# ----- global variables, functions and packages ----- #
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/global_variables.R")
source("/Volumes/cluster/Claire/SO_PaperI/manuscript/scripts/functions.R")

chr <- "X"
h=0.5

# --- generate hapmat file only for X
parent <- fread(paste(path_list, "chr", chr, "_parental.txt", sep = ""), sep = "\t", header = T)
nb <- dim(parent)[1]
all <- data.frame(chr = parent$CHROM, pos = parent$POS, or = parent$or, 
or_sam = paste(parent$or, "/", parent$sam, sep = ""), 
geno = sapply(1:nb, function(x) paste0(c(rep(paste(parent$or[x], parent$or[x], sep = ""), 450), 
rep(paste(parent$sam[x], parent$sam[x], sep = ""), 1050)), collapse = " ")))
write.table(all, paste(path_simu_29_F20, "hap_30_70_census.hapmat", sep = ""), col.names = F, row.names = F, sep = "\t", quote = F)
system(paste("cat ", path_neutral_simu_29_F20, "header_sex.txt ", path_simu_29_F20,
"hap_30_70_census.hapmat > ", path_simu_29_F20, "hap_30_70_noinit.hapmat", sep = ""))

# --- Pick s estimates
s_estimates <- readRDS(paste(path_estimates_29_F20, "s_estimates_SO_F20.RDS", sep = ""))
s_estimates_X <- s_estimates[s_estimates$CHROM=="X",]
s_estimates_X$mean_s <- apply(subset(s_estimates_X, select = paste("s_0_20.R", 1:10, sep = "")), 1,function(x) mean(x))

# ---- Smooth s estimates
wsmooth=250
pos_w_all <- read.table(paste(path_list, "pos_window_GW_parents_nooverlap.txt", sep = ""), sep = "\t", h = T)
cM_w <- pos_w_all[which(pos_w_all$unit == "cM" & pos_w_all$chr == "X" & pos_w_all$wsmooth==wsmooth), ]
pos_w <- pos_w_all[which(pos_w_all$unit == "bp" & pos_w_all$chr == "X" & pos_w_all$wsmooth==wsmooth), ]

index <- order(s_estimates_X$POS); mat_data <- s_estimates_X[index, ]
tmp_pos_left=pos_w$pos_left; tmp_pos_right=pos_w$pos_right; tmp_pos_mid=pos_w$pos_mean
tmp_cM_mid=cM_w$pos_mean
x <- mat_data$POS 
sm <- data.frame(pos=tmp_pos_mid, cM=tmp_cM_mid)
for(r in 1:10){ 
  fr <- mat_data[[paste("s_0_20.R",r,sep="")]]
  smooth <- do_smooth(tmp_pos_left, tmp_pos_right, fr, x)
  sm <- cbind(sm,smooth)
}
sm$chr <- "X"
colnames(sm) <- c("pos", "cM", paste("s_0_20.R", 1:10, sep = ""), "chr")
sm$mean_s <- apply(subset(sm, select = paste("s_0_20.R", 1:10, sep = "")), 1,function(x) mean(x))
saveRDS(sm, paste(path_traj, "/smoothed_w=", wsmooth, "_s_estimates_SO_F20_X.RDS", sep = ""))

sm <- readRDS(paste(path_traj, "/smoothed_w=", wsmooth, "_s_estimates_SO_F20_X.RDS", sep = ""))
extrp <- ggpmisc:::find_peaks(sm$mean_s, ignore_threshold = 0, span = 25, strict = TRUE, na.rm = FALSE)
extrn <- ggpmisc:::find_peaks(-sm$mean_s, ignore_threshold = 0, span = 25, strict = TRUE, na.rm = FALSE)
index <- c(which(extrn==TRUE | extrp==TRUE), length(extrp)-7)
png(paste(path_estimates_29_F20, "s_estimates_SO_F20_X.png", sep = ""), w = 800,h=800)
par(mfrow=c(2,1))
plot(s_estimates_X$POS/1000000, s_estimates_X$mean_s, ylab = "Mean selection coefficient", xlab = "Position (Mb)")
plot(sm$pos/1000000, sm$mean_s, ylab = "Mean smoothed selection coefficient", xlab = "Position (Mb)")
points(sm$pos[index]/1000000, sm$mean_s[index], col="red")
dev.off()


# --- Create target file
df <- NULL
for(ind in index){
  bound <- which(tmp_pos_mid==sm$pos[ind])
  idxw <- which(s_estimates_X$POS>=tmp_pos_left[bound] & s_estimates_X$POS<tmp_pos_right[bound])
  set.seed(1991); snp <- sample(s_estimates_X$POS[idxw], 1)
  sval <- s_estimates_X$mean_s[idxw]
  ss <- mean(sval)
  if(ss>0){benef_or <- TRUE; s <- quantile(sval, 0.99)}else{benef_or <- FALSE; s <- quantile(sval, 0.01)}
  id <- parents_X[which(parents_X$POS==snp), ]
  code <- paste(id$or, "/", id$sam, sep = "")
  if(benef_or == TRUE){
    code <- paste(id$sam, "/", id$or, sep = "")  
  }
  df <- rbind(df, data.frame(chr = "X", pos=snp, code = code, s = abs(s), h=h))
}
rownames(df) <- index
# special case of extrema 100 and 107: 100 will be a beneficial Samarkand
ind=100
bound <- which(tmp_pos_mid==sm$pos[ind])
idxw <- which(s_estimates_X$POS>=tmp_pos_left[bound] & s_estimates_X$POS<tmp_pos_right[bound])
set.seed(1991); snp <- sample(s_estimates_X$POS[idxw], 1)
id <- parents_X[which(parents_X$POS==snp), ]
code <- paste(id$or, "/", id$sam, sep = "")
df["100","code"]=code

dft[dft$V1 %in% "80","V5"] <- 0.4
dft[dft$V1 %in% "100","V5"] <- 0.25
dft[dft$V1 %in% "107","V5"] <- 0.25
dft[dft$V1 %in% "169","V5"] <- 0.2
dft[dft$V1 %in% "214","V5"] <- 0.2
dft[dft$V1 %in% "249","V5"] <- 0.25
write.table(dft, paste(path_simu_29_F20, "targets_v2.txt", sep = ""), sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)


# --- 1 target of selection
dft <- read.table(paste(path_simu_29_F20, "targets_v2.txt", sep = ""), sep = "\t")
write.table(subset(dft[dft$V1=="80",],select=paste("V", 2:6,sep="")), paste(path_simu_29_F20, "tmp.txt", sep = ""), sep = "\t", col.names = FALSE,  row.names = FALSE, quote = FALSE)
system(paste("cat ", path_simu_29_F20, "header.txt ", path_simu_29_F20, "tmp.txt > ", path_simu_29_F20, "1target_v2.txt", sep = ""))

# --- 2 targets of selection
dft <- read.table(paste(path_simu_29_F20, "targets_v2.txt", sep = ""), sep = "\t")
write.table(subset(dft[dft$V1 %in% c("80", "249"),],select=paste("V", 2:6,sep="")), paste(path_simu_29_F20, "tmp.txt", sep = ""), sep = "\t", col.names = FALSE,  row.names = FALSE, quote = FALSE)
system(paste("cat ", path_simu_29_F20, "header.txt ", path_simu_29_F20, "tmp.txt > ", path_simu_29_F20, "2target_v2.txt", sep = ""))

# --- 3 targets of selection
dft <- read.table(paste(path_simu_29_F20, "targets_v2.txt", sep = ""), sep = "\t")
write.table(subset(dft[dft$V1 %in% c("80", "214", "249"),],select=paste("V", 2:6,sep="")), paste(path_simu_29_F20, "tmp.txt", sep = ""), sep = "\t", col.names = FALSE,  row.names = FALSE, quote = FALSE)
system(paste("cat ", path_simu_29_F20, "header.txt ", path_simu_29_F20, "tmp.txt > ", path_simu_29_F20, "3target_v2.txt", sep = ""))

# --- 4 targets of selection
dft <- read.table(paste(path_simu_29_F20, "targets_v2.txt", sep = ""), sep = "\t")
write.table(subset(dft[dft$V1 %in% c("80", "169", "214", "249"),],select=paste("V", 2:6,sep="")), paste(path_simu_29_F20, "tmp.txt", sep = ""), sep = "\t", col.names = FALSE,  row.names = FALSE, quote = FALSE)
system(paste("cat ", path_simu_29_F20, "header.txt ", path_simu_29_F20, "tmp.txt > ", path_simu_29_F20, "4target_v2.txt", sep = ""))

# --- 6 targets of selection
dft <- read.table(paste(path_simu_29_F20, "targets_v2.txt", sep = ""), sep = "\t")
write.table(subset(dft[dft$V1 %in% c("80","100","107", "169", "214", "249"),],select=paste("V", 2:6,sep="")), paste(path_simu_29_F20, "tmp.txt", sep = ""), sep = "\t", col.names = FALSE,  row.names = FALSE, quote = FALSE)
system(paste("cat ", path_simu_29_F20, "header.txt ", path_simu_29_F20, "tmp.txt > ", path_simu_29_F20, "6target_v2.txt", sep = ""))

