#####################################################################################################################################################################
###################################### This is a gene dropping simulation in arbitrary pedigrees to estimate the variance in F ######################################
#####################################################################################################################################################################
# Written 09/01/2014
# The simulation takes as input (1) a linkage map, (2) a pedigree file.
## The linkage map file should contain the following columns: "Chromosome","Marker","Pos.Phys","Pos.Genet"
## The pedigree file should contain the following columns: "IndID","FID","MID","Sex"
# The following input parameters must be provided:
## (1) the number of simulations to run ('nSims'),
## (2) whether you want to save the number of crossovers per meiosis in males and females ('save.recombs'),
## (3) the range of crossover interference in bp,
## (4) whether or not you want to remove the sex chromosome (TguZ).
## ------------------------------------------------------------------------------------------------------------------

# The simulation works as follows:
# (1) It creates a new look-up table that contains the recombination probabilities between adjacent markers (if requested, the sex chromosome is removed). 
## To calculate the recombination probability it uses the Kosambi map function, since our marker positions are given in Kosambi cM.
## Thus, r=0.5*((e^(4*d)-1)/(e^(4*d)+1)), in which r is the recombination probability and d is the distance in Morgan.
## Then it calculates the total physical genome length in bp from the map file.
# (2) It checks the pedigree file for inconsistencies in sex and whether it is ordered in that way that parents are placed before their offspring.
## It produces an error message if the pedigree is not correct. If you do not stop the program, it will proceed though and encounter errors later. 
# (3) It creates the output file on the harddrive (empirical F values (F.Emp) for each individual and each simulation run are stored).
## It creates a list in which all individuals are stored (see below for details).
# (4) It generates the founders + their haplotypes (for each chromosome, i.e. for the zebra finch 2*40=80 chromosomes).
# (5) The simulation loop starts:
## (5.1) The first individual that is not a founder is choosen.
## (5.2) Its parents and their haplotypes are selected.
## (5.3) Loop over all chromosomes and introduce cross-overs. The simulation makes sure that at least one cross-over event per chromosome occurs.
### If specified, it takes crossover interference into account (in the zebra finches, Calderon & Pigozzi (2006) counted 45.7 and 45.2 MLH1 foci per oocyte and spermatocyte, respectively).
### The crossover interference works like that: if there are two or more recombination events inside the interference range, then only one is kept randomly.
## (5.4) For each chromosome the individual gets one (recombined) chromosome from its mother and its father.
## (5.5) For each chromosome the length of autozygous stretches (homozygosity for the founder haplotypes) is calculated by taking into account
### (A) that if the stretch starts at the first or last marker it cannot be as long as in the centre
### (B) that crossovers occur randomly between the two adjacent markers.
## (5.6) For each individual the total length of autozygous stretches is calculated and divided by the total genome length --> F.Emp.
## (5.7) F.Emp is stored on the hard drive.
## (5.8) The parents of the individual are removed from the list of individuals if they are (A) not founders and (B) not needed later on as parents again.
## (5.9) Only if the individual is needed later on in the simulation as a parent its haplotypes get stored in the list of individuals.
### Steps (5.8) and (5.9) should speed up the simulation if there is an extended pedigree to simulate.
## (5.10) Proceed at step (5.1) with the next individual.

# ToDo:
# Create plot full-sib mating linkage map 100kb, 10kb, 1kb, 1st generation, 5th generation (mean + SD)

rm(list=ls())
# ------------------------------------------------------------------------------------------------------------------
# (0) inputs -----------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------------
# define path to files here
path = "C:\\Users\\Data\\"
# n simulations
nSims <- 5000
# remove sex chromosome
rem.sex <- TRUE
# name outfile
outfile <- "+++SIM100kb_Relatedness_n5000_1.txt"
# create summary
smmry <- FALSE
# ---

options(warn=2)
# files
#ped <- read.table(paste(path,"data_SIM.ped3404.txt",sep=""), sep="\t", header=TRUE)
ped <- read.table(paste(path,"data_SIM.ped.txt",sep=""), sep="\t", header=TRUE)
map <- read.table(paste(path,"data_ZF324.SIM100kb.map.txt",sep=""), sep="\t", header=TRUE)
intrfrnc <- read.table(paste(path,"data_ZF324.chromosomes.txt",sep=""), sep="\t", header=TRUE)
intrfrnc <- intrfrnc[,c("Tgu","inter_range")]
addChr <- data.frame(unique(map$Chromosome)[!(unique(map$Chromosome) %in% intrfrnc$Tgu)])
addChr <- cbind(addChr, intrfrnc$inter_range[which(intrfrnc$Tgu=="chr1.1")])
colnames(addChr) <- c("Tgu","inter_range")
intrfrnc <- rbind(intrfrnc,addChr)
splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos)))


# ------------------------------------------------------------------------------------------------------------------
# (1) create recombination probabilities for each chromatid strand from linkage map ------------------------------------
# ------------------------------------------------------------------------------------------------------------------
if(rem.sex==TRUE) {
	map <- subset(map, Chromosome!="chrZ")
}
prob <- data.frame(matrix(,ncol=5))
colnames(prob) <- c("Pos.Genet.Start","Pos.Genet.End","Pos.Phys.Start","Pos.Phys.End","Rec.Prob")
prob <- prob[-1,]
for(j in 1:length(unique(map$Chromosome))) {
	chr <- subset(map, Chromosome==unique(map$Chromosome)[j])
	m1 <- chr$Marker[-nrow(chr)]
	m2 <- chr$Marker[-1]
	chr <- matrix(c(chr$Pos.Genet[-nrow(chr)],chr$Pos.Genet[-1],chr$Pos.Phys[-nrow(chr)],chr$Pos.Phys[-1]),ncol=4)
	dstnc <- chr[,2]-chr[,1]
	chr <- data.frame(chr)
	chr$Prob <- ((exp(4*((dstnc)/100))-1)/(exp(4*((dstnc)/100))+1))
	chr <- cbind(unique(map$Chromosome)[j],m1,m2,chr)
	colnames(chr) <- c("Chromosome","Marker1","Marker2","Pos.Genet.Start","Pos.Genet.End","Pos.Phys.Start","Pos.Phys.End","Rec.Prob")
	chr$Pos.Phys.End <- chr$Pos.Phys.End-1
	prob <- rbind(prob,chr)
}

# calculate the total physical genome length from the map file
genome.length <- 0
for(aa in 1:length(unique(prob$Chromosome))) {
	ToDo <- subset(prob,Chromosome==unique(prob$Chromosome)[aa])
	ToDo <- max(ToDo$Pos.Phys.End)
	genome.length <- genome.length+ToDo
}


# ------------------------------------------------------------------------------------------------------------------
# (2) check pedigree ---------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------------
for(i in 1:nrow(ped)) {
	ind <- ped[i, ]
	if(ind$Sex==1) {
		x <- which(ped$FID==ind$IndID)
		y <- which(ped$MID==ind$IndID)
		if(length(x)>0) {
			stop("males and females are mixed up!")
		}
		if(any(y<i)) {
			stop("pedigree is not ordered correctly - offspring preceed parents")
		}
	}
	if(ind$Sex==0) {
		x <- which(ped$FID==ind$IndID)
		y <- which(ped$MID==ind$IndID)
		if(length(y)>0) {
			stop("males and females are mixed up!")
		}
		if(any(y<i)) {
			stop("pedigree is not ordered correctly - offspring preceed parents")
		}
	}
}


# ------------------------------------------------------------------------------------------------------------------
# (3) create a list in which all sublists get stored. Also create textfile for simulation output -----------------------
# ------------------------------------------------------------------------------------------------------------------
INDS <- list()
# create header for output file
out <- data.frame("Sim.run","Combination","IndID1","IndID2","Relatedness.R")
write.table(out,paste(path, outfile, sep=""), append=FALSE, row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE, eol="\n")


# ------------------------------------------------------------------------------------------------------------------
# (4) generate founder haplotypes --------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------------
founders <- subset(ped,FID==0 | MID==0)
k <- 1
for(i in 1:nrow(founders)) {
	F <- NULL
	F <- list()
	F[[1]] <- founders$IndID[i]
	for(j in 1:length(unique(map$Chromosome))) {
		chr <- subset(map, Chromosome==unique(map$Chromosome)[j])
		# generate first haplotype, which is a repetition of a unique number (for each chromosome in an individual it is the same number, but different from the second haplotype)
		Haplo1 <- rep((2*k-1),nrow(chr))
		# generate second haplotype, which is a repetition of a unique number (for each chromosome in an individual it is the same number, but different from the first haplotype)
		Haplo2 <- rep((2*k),nrow(chr))
		F[[2*j]] <- Haplo1
		F[[2*j+1]] <- Haplo2
	}
	INDS[[k]] <- F
	flush.console()
	print(k)
	k <- k+1
}
# Save k as kk in order to restore it in each simulation
kk <- k


# ------------------------------------------------------------------------------------------------------------------
# (5) loop over the whole pedigree (only here the loop for nSims has to start) -----------------------------------------
# ------------------------------------------------------------------------------------------------------------------
for(nn in 1:nSims) {
timeStart = Sys.time()
# restore k
k <- kk
gnrtns <- subset(ped,FID!=0 & MID!=0)
for(i in 1:nrow(gnrtns)) {
	F <- NULL 
	F <- list()
	# save individual ID
	F[[1]] <- gnrtns$IndID[i]
	# get parents + their haplotypes of the individual
	FID <- INDS[[which(lapply(INDS, "[[", 1) == as.character(gnrtns$FID[i]))]]
	MID <- INDS[[which(lapply(INDS, "[[", 1) == as.character(gnrtns$MID[i]))]]
	# loop over all chromosomes
	for(j in 1:length(unique(map$Chromosome))) {
		chr.map <- subset(map, Chromosome==as.character(unique(map$Chromosome)[j]))
		chr.prob <- subset(prob, Chromosome==as.character(unique(map$Chromosome)[j]))
		interf.range <- subset(intrfrnc, Tgu==as.character(unique(map$Chromosome))[j])$inter_range
		# get at least one crossover per chromosome
		xFID.1 <- 0
		xMID.1 <- 0
		# find the position where the crossover occurs in both female and male
		while(sum(xFID.1)==0) {
		xFID.1 <- rbinom(n=nrow(chr.prob), 1, p=chr.prob$Rec.Prob)
		}
		while(sum(xMID.1)==0) {
		xMID.1 <- rbinom(n=nrow(chr.prob), 1, p=chr.prob$Rec.Prob)
		}
		# introduce crossover interference if required
		if(interf.range>0) {
			# introduce interference - FID
			inter.F <- which(xFID.1==1)
			l <- 1
			while(l <= length(inter.F)) {
				sct <- xFID.1[max(inter.F[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.F[l]+interf.range/prob$Pos.Phys.End[1],length(xFID.1))]
				while(sum(sct)>1) {
					sct[sample(c(which(sct==1)),1,prob=c(1-chr.prob$Rec.Prob[which(sct==1)]))] <- 0  
				}
			xFID.1[max(inter.F[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.F[l]+interf.range/prob$Pos.Phys.End[1],length(xFID.1))] <- sct
			inter.F <- which(xFID.1==1)
			l <- l+1
			}
			# introduce interference - MID
			inter.M <- which(xMID.1==1)
			l <- 1
			while(l <= length(inter.M)) {
				sct <- xMID.1[max(inter.M[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.M[l]+interf.range/prob$Pos.Phys.End[1],length(xMID.1))]
				while(sum(sct)>1) {
					sct[sample(c(which(sct==1)),1,prob=c(1-chr.prob$Rec.Prob[which(sct==1)]))] <- 0
				}
			xMID.1[max(inter.M[l]-interf.range/prob$Pos.Phys.End[1],1):min(inter.M[l]+interf.range/prob$Pos.Phys.End[1],length(xMID.1))] <- sct
			inter.M <- which(xMID.1==1)
			l <- l+1
			}			
		}

		# choose left (which is equivalent to either maternal or paternal) haplotype randomly - FID
		FID.hapL <- sample(c(0,1),1)
		# split haplotypes at choosen recombination spots for both chromatids - FID
		FID.1.strt <- FID[[2*j+(FID.hapL%%2)]]
		FID.2.strt <- FID[[2*j+(FID.hapL%%2)]]
		FID.1.end <- FID[[2*j+((FID.hapL+1)%%2)]]
		FID.2.end <- FID[[2*j+((FID.hapL+1)%%2)]]
		# split all chromatids
		FID.1.strt <- splitAt(FID.1.strt,(which(xFID.1==1)+1))
		FID.2.strt <- splitAt(FID.2.strt,(which(xFID.1==1)+1))
		FID.1.end <- splitAt(FID.1.end,(which(xFID.1==1)+1))
		FID.2.end <- splitAt(FID.2.end,(which(xFID.1==1)+1))
		
		FID.Haplo1 <- c()
		FID.Haplo2 <- c()
		FID.Haplo3 <- c()
		FID.Haplo4 <- c()
		m <- 1
		haplos <- c(1,2,3,4)
		while(m<length(FID.1.strt)) {
			element1 <- sample(haplos,1)
			if(element1 %in% c(1,2)) {
				element2 <- sample(c(3,4),1)
				rest1 <- c(1,2)[!c(1,2) == element1]
				rest2 <- c(3,4)[!c(3,4) == element2]
			} else {
				element2 <- sample(c(1,2),1)
				rest1 <- c(3,4)[!c(3,4) == element1] 
				rest2 <- c(1,2)[!c(1,2) == element2]}
			if(m==1) {
				haplo1.start <- c(FID.1.strt[2*m-1],FID.2.strt[2*m-1],FID.1.end[2*m-1],FID.2.end[2*m-1])[element1]
				haplo2.start <- c(FID.1.strt[2*m-1],FID.2.strt[2*m-1],FID.1.end[2*m-1],FID.2.end[2*m-1])[element2]
				haplo1.end <- c(FID.1.strt[2*m],FID.2.strt[2*m],FID.1.end[2*m],FID.2.end[2*m])[element2]
				haplo2.end <- c(FID.1.strt[2*m],FID.2.strt[2*m],FID.1.end[2*m],FID.2.end[2*m])[element1]
				haplo3.start <- c(FID.1.strt[2*m-1],FID.2.strt[2*m-1],FID.1.end[2*m-1],FID.2.end[2*m-1])[rest1]
				haplo3.end <- c(FID.1.strt[2*m],FID.2.strt[2*m],FID.1.end[2*m],FID.2.end[2*m])[rest1]
				haplo4.start <- c(FID.1.strt[2*m-1],FID.2.strt[2*m-1],FID.1.end[2*m-1],FID.2.end[2*m-1])[rest2]
				haplo4.end <- c(FID.1.strt[2*m],FID.2.strt[2*m],FID.1.end[2*m],FID.2.end[2*m])[rest2]
				FID.Haplo1.1 <- unlist(c(haplo1.start,haplo1.end))
				FID.Haplo2.1 <- unlist(c(haplo2.start,haplo2.end))
				FID.Haplo3.1 <- unlist(c(haplo3.start,haplo3.end))
				FID.Haplo4.1 <- unlist(c(haplo4.start,haplo4.end))

				if(tail(FID.Haplo1.1,1)<=tail(FID.Haplo2.1,1)) {
					FID.Haplo1 <- FID.Haplo1.1
					FID.Haplo3 <- FID.Haplo2.1}
				if(tail(FID.Haplo1.1,1)>tail(FID.Haplo2.1,1)) {
					FID.Haplo3 <- FID.Haplo1.1
					FID.Haplo1 <- FID.Haplo2.1}				
				if(tail(FID.Haplo3.1,1)<=tail(FID.Haplo4.1,1)) {
					FID.Haplo2 <- FID.Haplo3.1
					FID.Haplo4 <- FID.Haplo4.1}				
				if(tail(FID.Haplo3.1,1)>tail(FID.Haplo4.1,1)) {
					FID.Haplo4 <- FID.Haplo3.1
					FID.Haplo2 <- FID.Haplo4.1}				
				
			} else {
				haplo1.start <- c(FID.1.strt[m+1],FID.2.strt[m+1],FID.1.end[m+1],FID.2.end[m+1])[element1]
				haplo2.start <- c(FID.1.strt[m+1],FID.2.strt[m+1],FID.1.end[m+1],FID.2.end[m+1])[element2]
				haplo3.start <- c(FID.1.strt[m+1],FID.2.strt[m+1],FID.1.end[m+1],FID.2.end[m+1])[rest1]
				haplo4.start <- c(FID.1.strt[m+1],FID.2.strt[m+1],FID.1.end[m+1],FID.2.end[m+1])[rest2]
				HH <- sample(list(FID.Haplo1,FID.Haplo2),2,replace=FALSE)
				FID.Haplo1.1 <- unlist(c(HH[1],haplo1.start))
				FID.Haplo2.1 <- unlist(c(HH[2],haplo4.start))
				HH <- sample(list(FID.Haplo3,FID.Haplo4),2,replace=FALSE)
				FID.Haplo3.1 <- unlist(c(HH[1],haplo2.start))
				FID.Haplo4.1 <- unlist(c(HH[2],haplo3.start))
				
				if(tail(FID.Haplo1.1,1)<=tail(FID.Haplo2.1,1)) {
					FID.Haplo1 <- FID.Haplo1.1
					FID.Haplo3 <- FID.Haplo2.1}
				if(tail(FID.Haplo1.1,1)>tail(FID.Haplo2.1,1)) {
					FID.Haplo3 <- FID.Haplo1.1
					FID.Haplo1 <- FID.Haplo2.1}				
				if(tail(FID.Haplo3.1,1)<=tail(FID.Haplo4.1,1)) {
					FID.Haplo2 <- FID.Haplo3.1
					FID.Haplo4 <- FID.Haplo4.1}				
				if(tail(FID.Haplo3.1,1)>tail(FID.Haplo4.1,1)) {
					FID.Haplo4 <- FID.Haplo3.1
					FID.Haplo2 <- FID.Haplo4.1}
			}
			m <- m+1
		}
		
		# choose left (which is equivalent to either maternal or paternal) haplotype randomly - MID
		MID.hapL <- sample(c(0,1),1)
		# split haplotypes at choosen recombination spots - MID
		MID.1.strt <- MID[[2*j+(MID.hapL%%2)]]
		MID.2.strt <- MID[[2*j+(MID.hapL%%2)]]
		MID.1.end <- MID[[2*j+((MID.hapL+1)%%2)]]
		MID.2.end <- MID[[2*j+((MID.hapL+1)%%2)]]
		# split all chromatids
		MID.1.strt <- splitAt(MID.1.strt,(which(xMID.1==1)+1))
		MID.2.strt <- splitAt(MID.2.strt,(which(xMID.1==1)+1))
		MID.1.end <- splitAt(MID.1.end,(which(xMID.1==1)+1))
		MID.2.end <- splitAt(MID.2.end,(which(xMID.1==1)+1))
		
		MID.Haplo1 <- c()
		MID.Haplo2 <- c()
		MID.Haplo3 <- c()
		MID.Haplo4 <- c()
		m <- 1
		haplos <- c(1,2,3,4)
		while(m<length(MID.1.strt)) {
			element1 <- sample(haplos,1)
			if(element1 %in% c(1,2)) {
				element2 <- sample(c(3,4),1)
				rest1 <- c(1,2)[!c(1,2) == element1]
				rest2 <- c(3,4)[!c(3,4) == element2]
			} else {
				element2 <- sample(c(1,2),1)
				rest1 <- c(3,4)[!c(3,4) == element1] 
				rest2 <- c(1,2)[!c(1,2) == element2]}
			if(m==1) {
				haplo1.start <- c(MID.1.strt[2*m-1],MID.2.strt[2*m-1],MID.1.end[2*m-1],MID.2.end[2*m-1])[element1]
				haplo2.start <- c(MID.1.strt[2*m-1],MID.2.strt[2*m-1],MID.1.end[2*m-1],MID.2.end[2*m-1])[element2]
				haplo1.end <- c(MID.1.strt[2*m],MID.2.strt[2*m],MID.1.end[2*m],MID.2.end[2*m])[element2]
				haplo2.end <- c(MID.1.strt[2*m],MID.2.strt[2*m],MID.1.end[2*m],MID.2.end[2*m])[element1]
				haplo3.start <- c(MID.1.strt[2*m-1],MID.2.strt[2*m-1],MID.1.end[2*m-1],MID.2.end[2*m-1])[rest1]
				haplo3.end <- c(MID.1.strt[2*m],MID.2.strt[2*m],MID.1.end[2*m],MID.2.end[2*m])[rest1]
				haplo4.start <- c(MID.1.strt[2*m-1],MID.2.strt[2*m-1],MID.1.end[2*m-1],MID.2.end[2*m-1])[rest2]
				haplo4.end <- c(MID.1.strt[2*m],MID.2.strt[2*m],MID.1.end[2*m],MID.2.end[2*m])[rest2]
				MID.Haplo1.1 <- unlist(c(haplo1.start,haplo1.end))
				MID.Haplo2.1 <- unlist(c(haplo2.start,haplo2.end))
				MID.Haplo3.1 <- unlist(c(haplo3.start,haplo3.end))
				MID.Haplo4.1 <- unlist(c(haplo4.start,haplo4.end))

				if(tail(MID.Haplo1.1,1)<=tail(MID.Haplo2.1,1)) {
					MID.Haplo1 <- MID.Haplo1.1
					MID.Haplo3 <- MID.Haplo2.1}
				if(tail(MID.Haplo1.1,1)>tail(MID.Haplo2.1,1)) {
					MID.Haplo3 <- MID.Haplo1.1
					MID.Haplo1 <- MID.Haplo2.1}				
				if(tail(MID.Haplo3.1,1)<=tail(MID.Haplo4.1,1)) {
					MID.Haplo2 <- MID.Haplo3.1
					MID.Haplo4 <- MID.Haplo4.1}				
				if(tail(MID.Haplo3.1,1)>tail(MID.Haplo4.1,1)) {
					MID.Haplo4 <- MID.Haplo3.1
					MID.Haplo2 <- MID.Haplo4.1}				
				
			} else {
				haplo1.start <- c(MID.1.strt[m+1],MID.2.strt[m+1],MID.1.end[m+1],MID.2.end[m+1])[element1]
				haplo2.start <- c(MID.1.strt[m+1],MID.2.strt[m+1],MID.1.end[m+1],MID.2.end[m+1])[element2]
				haplo3.start <- c(MID.1.strt[m+1],MID.2.strt[m+1],MID.1.end[m+1],MID.2.end[m+1])[rest1]
				haplo4.start <- c(MID.1.strt[m+1],MID.2.strt[m+1],MID.1.end[m+1],MID.2.end[m+1])[rest2]
				HH <- sample(list(MID.Haplo1,MID.Haplo2),2,replace=FALSE)
				MID.Haplo1.1 <- unlist(c(HH[1],haplo1.start))
				MID.Haplo2.1 <- unlist(c(HH[2],haplo4.start))
				HH <- sample(list(MID.Haplo3,MID.Haplo4),2,replace=FALSE)
				MID.Haplo3.1 <- unlist(c(HH[1],haplo2.start))
				MID.Haplo4.1 <- unlist(c(HH[2],haplo3.start))
				
				if(tail(MID.Haplo1.1,1)<=tail(MID.Haplo2.1,1)) {
					MID.Haplo1 <- MID.Haplo1.1
					MID.Haplo3 <- MID.Haplo2.1}
				if(tail(MID.Haplo1.1,1)>tail(MID.Haplo2.1,1)) {
					MID.Haplo3 <- MID.Haplo1.1
					MID.Haplo1 <- MID.Haplo2.1}				
				if(tail(MID.Haplo3.1,1)<=tail(MID.Haplo4.1,1)) {
					MID.Haplo2 <- MID.Haplo3.1
					MID.Haplo4 <- MID.Haplo4.1}				
				if(tail(MID.Haplo3.1,1)>tail(MID.Haplo4.1,1)) {
					MID.Haplo4 <- MID.Haplo3.1
					MID.Haplo2 <- MID.Haplo4.1}
			}
			m <- m+1
		}
		
		# Select one of the four female haplotypes and one of the four male haplotypes for the individual
		Haplo.Ind.1 <- sample(c(1,2,3,4),1, replace=FALSE)
		Haplo.Ind.2 <- sample(c(1,2,3,4),1, replace=FALSE)
		F[[2*j]] <- if(Haplo.Ind.1==1) {FID.Haplo1} else if(Haplo.Ind.1==2) {FID.Haplo2} else if(Haplo.Ind.1==3) {FID.Haplo3} else if(Haplo.Ind.1==4) {FID.Haplo4}
		F[[2*j+1]] <- if(Haplo.Ind.2==1) {MID.Haplo1} else if(Haplo.Ind.2==2) {MID.Haplo2} else if(Haplo.Ind.2==3) {MID.Haplo3} else if(Haplo.Ind.2==4) {MID.Haplo4}	
	}
	INDS[[k]] <- F
	k <- k+1
}

flush.console()	
print(paste("Created all individuals for simulation number ",nn," out of ",nSims,sep=""))

# calculate the relatedness between all individuals in the pedigree
for(ii in 1:length(INDS)) {
	for(jj in ii:length(INDS)) {
		Ind1 <- INDS[[ii]]
		Ind2 <- INDS[[jj]]
		complete.genome <- 0
		related.genome <- 0
		for(ll in 2:((length(Ind1)-1)/2)) {
			xa <- ifelse(Ind1[[2*ll-2]]==Ind2[[2*ll-2]],1,0)
			xb <- ifelse(Ind1[[2*ll-1]]==Ind2[[2*ll-2]],1,0)
			xc <- ifelse(Ind1[[2*ll-2]]==Ind2[[2*ll-1]],1,0)
			xd <- ifelse(Ind1[[2*ll-1]]==Ind2[[2*ll-1]],1,0)
			complete.chromo <- (sum(length(xa)+length(xb)+length(xc)+length(xd)))/2
			related.chromo <- sum(sum(xa)+sum(xb)+sum(xc)+sum(xd))
			complete.genome <- complete.genome + complete.chromo
			related.genome <- related.genome + related.chromo
		}
		kinship <- related.genome/complete.genome
		write.table(as.data.frame(list(nn,paste(as.character(Ind1[[1]]),"_",as.character(Ind2[[1]]),sep=""),as.character(Ind1[[1]]),as.character(Ind2[[1]]),kinship)), paste(path, outfile, sep=""), append=TRUE, row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE, eol="\n")
	}
}

flush.console()	
print(paste("Taking ",round(Sys.time()-timeStart,digit=2)," minutes for simulation number ",nn," out of ",nSims,sep=""))
}




#####################################################################################################################################################################
### SUMMARY #########################################################################################################################################################
#####################################################################################################################################################################
if(smmry==TRUE) {
rm(list=ls())
require(pedantics)
# define path to files here
path = "C:\\Users\\Data\\"
ped <- read.table(paste(path,"data_SIM.ped.txt",sep=""), sep="\t", header=TRUE)
ped$MID[which(ped$MID==0)] <- NA
ped$FID[which(ped$FID==0)] <- NA

# get inbreeding coefficient and relatedness of individuals in our pedigree
pedToDo <- ped[,1:3]
colnames(pedToDo) <- c('id','dam','sire')
Relatedness <- pedigreeStats(pedToDo, includeA=TRUE, graphicalReport="n")
FPed <- Relatedness$inbreedingCoefficients
RelA <- Relatedness$Amatrix
id.row1 <- matrix(rep(1:nrow(RelA),nrow(RelA)), nrow=nrow(RelA))
id.col1 <- matrix(rep(1:nrow(RelA),rep(nrow(RelA),nrow(RelA))), ncol=nrow(RelA))
id.row.ex1<-(id.row1[upper.tri(id.row1, diag=TRUE)])
id.col.ex1<-id.col1[upper.tri(id.col1, diag=TRUE)]
val1 <- RelA[upper.tri(RelA, diag=TRUE)]
RelA <- data.frame(id.row.ex1, id.col.ex1, val1)
names(RelA)<-c("IndID1","IndID2","Relatedness")
RelA <- RelA[order(RelA$IndID1),]
RelA$Combination <- paste(RelA$IndID1,"_",RelA$IndID2,sep="")

# summarize simulation
file_list <- list.files(paste(path,"Relatedness\\",sep=""))
for (ll in 1:length(file_list)) {
	# if the merged dataset doesn't exist, create it
	if (!exists("out.sim")) {
		out.sim <- read.table(paste(path,"Relatedness\\",file_list[ll],sep=""), header=TRUE, sep="\t")
	}
	# if the merged dataset does exist, append to it
	else if (exists("out.sim")) {
		temp_out <- read.table(paste(path,"Relatedness\\",file_list[ll],sep=""), header=TRUE, sep="\t")
		out.sim <- rbind(out.sim, temp_out)
		rm(temp_out)
	}
}

# Create new Sim.run variable that is unique for each simulation run
L <- sum(out.sim$Sim.run==1)
for(i in 1:(nrow(out.sim)/L)) {
	out.sim[(i*L-L+1):(i*L),1] <- rep(i,L)
	flush.console()
	if(i %% 100 == 0) {print(i)}
}

# Save all simulation data on hard drive
write.table(out.sim,paste(path, "+++AllData.Relatedness.sim100kb.txt", sep=""), append=FALSE, row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE, eol="\n")

# Select only the first 10000 simulation runs for the summary, you can also sample from all simulation runs
out.sim <- subset(out.sim,Sim.run<=10000)
#out.sim <- out.sim[sample(c(1:max(out.sim$Sim.run)),n=10000, replace=FALSE), ]

# Create summary containing the mean, SD, etc. of F for each individual
summary.out <- data.frame(matrix(c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),nrow=1))
colnames(summary.out) <- c("Combination","IndID1","IndID2","Relatedness.Ped","Mean.R","SD.R","SD.R.low","SD.R.up","Min.R","Max.R","95CILow.R","95CIUp.R")
for(mm in 1:length(unique(out.sim$Combination))) {
	comb <- subset(out.sim,Combination==unique(out.sim$Combination)[mm])
	summary.out[mm,"Combination"] <- as.character(comb$Combination[1])
	summary.out[mm,"IndID1"] <- comb$IndID1[1]
	summary.out[mm,"IndID2"] <- comb$IndID2[1]
	summary.out[mm,"Relatedness.Ped"] <- RelA[which(RelA$Combination==comb$Combination[1]),"Relatedness"]
	summary.out[mm,"Mean.R"] <- mean(comb$Relatedness.R)
	summary.out[mm,"SD.R"] <- sd(comb$Relatedness.R)
	summary.out[mm,"SD.R.low"] <- quantile(comb$Relatedness.R,c(pnorm(-1),pnorm(1)))[1]
	summary.out[mm,"SD.R.up"] <- quantile(comb$Relatedness.R,c(pnorm(-1),pnorm(1)))[2]
	summary.out[mm,"Min.R"] <- min(comb$Relatedness.R)
	summary.out[mm,"Max.R"] <- max(comb$Relatedness.R)
	summary.out[mm,"95CILow.R"] <- quantile(comb$Relatedness.R,c(0.025,0.975))[1]
	summary.out[mm,"95CIUp.R"] <- quantile(comb$Relatedness.R,c(0.025,0.975))[2]
	flush.console()
	print(mm)
}
write.table(summary.out,paste(path, "+++summary.n10000.Relatedness.sim100kb.txt", sep=""), append=FALSE, row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE, eol="\n")
nrow(out.sim)/nrow(summary.out)
plot(summary.out$Relatedness.Ped,summary.out$Mean.R)
abline(lm(summary.out$Mean.R~summary.out$Relatedness.Ped),col="red")
}
#####################################################################################################################################################################
### END #############################################################################################################################################################
#####################################################################################################################################################################