#####################################################################################################################################################################
######################################################## Find the interference parameter for each chromosome ########################################################
#####################################################################################################################################################################
# This script simulates nSims meioses
# For each chromosomes it gets the distribution of recombination events for differing interference parameters.
# For each interference parameter it calculates the sums of squares (SSQ) in comparison to the input linkage map ('ZF324.SIM100kb.map.txt') and produces a plot
# The zebra finch has a quite distinct recombination landscape as shown both by its linkage map (Backstrom et al. 2010) and by MLH1 focus mapping (Calderon & Pigozzi 2006)
# Then the optimal interference parameter (the one where the SSQ is minimal) for each chromosome is selected.

rm(list=ls())
# ------------------------------------------------------------------------------------------------------------------
# (0) inputs -----------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------------
# define path to files here
path = "C:\\Users\\Data\\"
# name outfile
outfile <- "SSQ100kb_6.txt"
# n simulations (*2, since there is a female and a male meiosis simulated and later summed up)
nSims <- 5000
# spacing of interference parameters to be tested
sp.inf <- 100000
# ---
options(warn=2)
# files
map <- read.table(paste(path,"data_ZF324.SIM100kb.map.txt",sep=""), sep="\t", header=TRUE)
splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos)))
tgus <- read.table(paste(path, "data_ZF324.chromosomes.txt", sep=""), sep="\t", header=TRUE)
tgus <- subset(tgus,Tgu %in% c("chr1","chr10","chr11","chr12","chr13","chr14","chr15","chr17","chr18","chr19","chr1A","chr2","chr20","chr23","chr3","chr4","chr4A","chr5","chr6","chr7","chr8","chr9"))
add <- data.frame("33","chr1.1","13900000","1034353419","1027403419","1.1","51.811140","0")
colnames(add) <- colnames(tgus)
tgus <- rbind(tgus,add)
tgus$length <- as.numeric(tgus$length)


# ------------------------------------------------------------------------------------------------------------------
# (1) create recombination probabilities for each chromosomes from linkage map -----------------------------------------
# ------------------------------------------------------------------------------------------------------------------
prob <- data.frame(matrix(,ncol=5))
colnames(prob) <- c("Pos.Genet.Start","Pos.Genet.End","Pos.Phys.Start","Pos.Phys.End","Rec.Prob")
prob <- prob[-1,]
for(j in 1:length(unique(map$Chromosome))) {
	chr <- subset(map, Chromosome==unique(map$Chromosome)[j])
	m1 <- chr$Marker[-nrow(chr)]
	m2 <- chr$Marker[-1]
	chr <- matrix(c(chr$Pos.Genet[-nrow(chr)],chr$Pos.Genet[-1],chr$Pos.Phys[-nrow(chr)],chr$Pos.Phys[-1]),ncol=4)
	dstnc <- chr[,2]-chr[,1]
	chr <- data.frame(chr)
	chr$Prob <- ((exp(4*((dstnc)/100))-1)/(exp(4*((dstnc)/100))+1))
	chr <- cbind(unique(map$Chromosome)[j],m1,m2,chr)
	colnames(chr) <- c("Chromosome","Marker1","Marker2","Pos.Genet.Start","Pos.Genet.End","Pos.Phys.Start","Pos.Phys.End","Rec.Prob")
	chr$Pos.Phys.End <- chr$Pos.Phys.End-1
	prob <- rbind(prob,chr)
}


# ------------------------------------------------------------------------------------------------------------------
# (2) simulation of recombinations -------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------------
# create data frame to store the output for each chromosome and interference parameter
out.mat <- data.frame(matrix(c(NA,NA,NA),ncol=3))
colnames(out.mat) <- c("chromosome","interference.range","SSQ")
# loop over all chromosomes
#for(mm in 1:nrow(tgus)) {
for(mm in 3:4) {		#23
	tgu <- tgus[mm,]
	chromo <- as.character(tgu$Tgu)
	# create sequence of interference ranges to be tested
	interference.range <- c(seq(0,tgu$length,by=sp.inf))
	if(length(interference.range)>=500) {interference.range <- interference.range[1:500]}
	# create pdf file to store figures
	pdf(file=paste(path,chromo,"_",outfile,".pdf",sep=""), height=7, width=7)
	# loop over all interference parameter
	for(kk in 1:length(interference.range)) {
		interf.range <- interference.range[kk]
		xxMID <- rep(0,nrow(subset(prob, Chromosome==chromo)))
		xxFID <- rep(0,nrow(subset(prob, Chromosome==chromo)))
		for(nn in 1:nSims) {
			# select specified chromosome
			chr.map <- subset(map, Chromosome==chromo)
			chr.prob <- subset(prob, Chromosome==chromo)
			# get at least one crossover per chromosome
			xFID.1 <- 0
			xMID.1 <- 0
			# find the position where the crossover occurs in both female and male
			while(sum(xFID.1)==0) {
				xFID.1 <- rbinom(n=nrow(chr.prob), 1, p=chr.prob$Rec.Prob)
			}
			while(sum(xMID.1)==0) {
				xMID.1 <- rbinom(n=nrow(chr.prob), 1, p=chr.prob$Rec.Prob)
			}
			# introduce crossover interference if required
			if(interf.range>0) {
				# introduce interference - FID
				inter.F <- which(xFID.1==1)
				l <- 1
				while(l <= length(inter.F)) {
					sct <- xFID.1[max(inter.F[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.F[l]+interf.range/prob$Pos.Phys.End[1],length(xFID.1))]
				while(sum(sct)>1) {
					sct[sample(c(which(sct==1)),1,prob=c(1-chr.prob$Rec.Prob[which(sct==1)]))] <- 0
				}
				xFID.1[max(inter.F[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.F[l]+interf.range/prob$Pos.Phys.End[1],length(xFID.1))] <- sct
				inter.F <- which(xFID.1==1)
				l <- l+1
				}
				# introduce interference - MID
				inter.M <- which(xMID.1==1)
				l <- 1
				while(l <= length(inter.M)) {
					sct <- xMID.1[max(inter.M[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.M[l]+interf.range/prob$Pos.Phys.End[1],length(xMID.1))]
				while(sum(sct)>1) {
					sct[sample(c(which(sct==1)),1,prob=c(1-chr.prob$Rec.Prob[which(sct==1)]))] <- 0
				}
				xMID.1[max(inter.M[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.M[l]+interf.range/prob$Pos.Phys.End[1],length(xMID.1))] <- sct
				inter.M <- which(xMID.1==1)
				l <- l+1
				}
			}
			# create the second chromatid in both females and males (after interference is introduced because interference is acting for both sisterchromatids (by splitting up the recombination events on both chromatids)
			xFID.2 <- ifelse((sample(c(0,1),length(xFID.1),replace=TRUE)==1),xFID.1,0)
			xFID.1[which(xFID.2==1)] <- 0
			xMID.2 <- ifelse((sample(c(0,1),length(xMID.1),replace=TRUE)==1),xMID.1,0)
			xMID.1[which(xMID.2==1)] <- 0		
			# save the number of recombination events in one of the haplotypes each
			xxFID <- xxFID + xFID.1
			xxMID <- xxMID + xMID.1
		}
		# check linkage map vs simulation
		# get all recombination events from mother and father
		xx <- xxMID+xxFID
		# get the chromosome that is simulated and get the genetic position between each window
		chr <- subset(map, Chromosome==chromo)
		chr <- matrix(c(chr$Pos.Genet[-nrow(chr)],chr$Pos.Genet[-1],chr$Pos.Phys[-nrow(chr)],chr$Pos.Phys[-1]),ncol=4)
		chr <- data.frame(chr)
		for(i in 1:nrow(chr)) {
			chr$dstnc[i] <- (chr[i,2]+chr[i,1])/2
		}
		# scale the recombination events from the simulation to the average recombination events per meiosis
		xy <- (cumsum(xx)/max(cumsum(xx)))*sum(xx)/(nSims)*50
		xy <- cbind(xy,chr$dstnc,chr$X3)
		colnames(xy) <- c("SIM","LM","POS")
		xy <- data.frame(xy)
		# calculate sums of square in comparison to the input linkage map
		SSQ <- sum((xy$LM-xy$SIM)^2)
		out.mat[1,1] <- as.character(chromo)
		out.mat[1,2] <- interf.range
		out.mat[1,3] <- SSQ
		# save data and create plots
		if(mm==1 & kk==1) {
			write.table(out.mat,paste(path, outfile, sep=""), append=FALSE, row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE, eol="\n")
		} else {
			write.table(out.mat,paste(path, outfile, sep=""), append=TRUE, row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE, eol="\n")
		}
		plot(xy$POS,xy$SIM, main=paste(chromo," - ",interf.range," - ",SSQ,sep=""))
		points(xy$POS,xy$LM,col="red")
		flush.console()
		print(paste(chromo, "- interference range ",kk," out of ",length(interference.range)))
	}
	dev.off()
}



# ------------------------------------------------------------------------------------------------------------------
# (3) plot output and find optimal value -------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------------
rm(list=ls())
# define path to files here
path = "C:\\Users\\Data\\"
SSQ <- read.table(paste(path,"SSQ100kb.txt",sep=""), sep="\t", header=TRUE)
All.chr <- unique(SSQ$chromosome)
pdf(file=paste(path,"SSQ_Minimum.pdf",sep=""), height=7, width=7)
for(i in 1:length(All.chr)) {
	SSQ.chr <- subset(SSQ, chromosome==as.character(All.chr)[i])
	plot(SSQ.chr$interference.range,SSQ.chr$SSQ, main=as.character(All.chr)[i])
	SSQ.chr[which(SSQ.chr$SSQ==min(SSQ.chr$SSQ)),]
	}
dev.off()

