rm(list=ls())

# library to do mark-recapture statistics

library(marked)
library(coin)
library(smatr)

# write out the path to your directory

myPath <- '/path/to/files/'

# this is the main simulation

# parameters are: 1) population size (whole number), 2) difference in risk between susceptible and resistant genotypes (proportion 0 to 1), 3) identity of resistant genotype (choose 'allele' or 'het') 4) mean number offspring for a pair (whole number), 5) slope of offspring reduction per infection level (between 0 and 1), 6) heritability of infection risk phenotype (between 0 and 1), 7) slope of change in capture rate based on infection levels (between 0 and 1), 8) number of sampling events (whole number), 9) sample size (whole number)



mark_recap_sim <- function(Nind, alleleDiff, bestGeno, mean.offspring,  inf.penalty, allelePredict, infecBias, no.Samp, sampSize){

	# set the resistance phenotype value, drawn from a uniform distribution from 0 to 1. The value represents the probability of infection given that the individual is exposed to the pathogen
	
	inds <- cbind(rbinom(Nind, 1, 0.5), rbinom(Nind, 1, 0.5))
	
	rownames(inds) <- paste0('parent_', 1:Nind)
	
	# decide if individuals get infected. Use the resistance value as the probability of infection (a '1' or positive outcome to the trial) in a random binomial trial
	
	# collapse genotypes into a single vector
	
	geno <- rowSums(inds)
	
	# assign probability of infection to each genotype depending on chosen advantaged genotype
	
	if(bestGeno == 'allele'){
		
		geno[geno == 0] <- alleleDiff
		
		geno[geno %in% c(1, 2)] <- 1- alleleDiff
		
	}else if (bestGeno == 'het'){
		
		geno[geno == 1] <- 1- alleleDiff
		
		geno[geno %in% c(0, 2)] <- alleleDiff			
		
	}
	
	# add randomness to decrease heritability
	
	geno2 <- geno*(allelePredict) + (1-allelePredict)*rnorm(length(geno), 0.5, 0.2) 
	
	# handle boundary conditions
	
	geno2[geno2 < 0] <- 0
	
	geno2[geno2 > 1] <- 1
	
	# choose which individuals are infected based on genotype risk
	
	infected <- unlist(lapply(geno2, function(x){return(rbinom(1, 1, x))}))
	
	# make a 3 column matrix of genotype value and infection state
	
	parentPop <- cbind(inds, infected)

	# divide randomly into mothers and fathers
	
	index <- sample(1:Nind, Nind/2)
	
	index <- index[order(index)]
	
	# distribute genotypes into males and females
	
	mothers <- parentPop[(1:Nind) %in% index,]
	
	fathers <- parentPop[!(1:Nind) %in% index,]
	
	# randomly sample mothers and fathers with replacement (to get half sibs), generate offspring
	
	offspring <- list()
	
	for(i in 1:nrow(mothers)){

		# get phenotype value and infection status
		motherTmp <- mothers[i,]
		
		# find a father (random sample to allow for half-sibs)
		fatherSamp <- sample(1:nrow(fathers), 1)
		
		# get phenotype value and infection status
		fatherTmp <- fathers[fatherSamp,]
		
		# calculate how many offspring they lose by their infection state
		penaltyTmp <- sum(motherTmp[3], fatherTmp[3])/2
		
		# fix this version
		
		meanTmp <- mean.offspring*(1- penaltyTmp)*inf.penalty + mean.offspring*(1 - inf.penalty)
		
		# randomly sample their # offspring from a normal distribution
		noOffspring <- round(rpois(1, lambda = meanTmp))
		
		# generate offspring if they have them, generate placeholder if not
		
		if(noOffspring < 1){print('skip')}else{
		
			# generate offspring genotypes
			offspringAlleles  <- t(replicate(noOffspring, c(sample(motherTmp[1:2], 1), sample(fatherTmp[1:2], 1))))
		
			# save the offspring values
			offspring[[i]] <- cbind(rep(rownames(mothers)[i], times= noOffspring), rep(rownames(fathers)[fatherSamp], times= noOffspring), offspringAlleles)
		}
		
	}
	
	# make a matrix
	offMat <- data.frame(do.call(rbind, offspring))
	
	rownames(offMat) <- paste0('offspring_', 1:nrow(offMat))
	
	# infect the offspring
	
	expPool <- rowSums(cbind(as.numeric(offMat$V3), as.numeric(offMat$V4)))
	
	names(expPool) <- rownames(offMat)

	if(bestGeno == 'allele'){
		
		expPool[expPool == 0] <- alleleDiff
		
		expPool[expPool %in% c(1, 2)] <- 1- alleleDiff
		
	}else if (bestGeno == 'het'){
		
		expPool[expPool == 1] <- 1- alleleDiff
		
		expPool[expPool %in% c(0, 2)] <- alleleDiff			
		
	}
	
	expPool2 <- expPool*(allelePredict) + (1-allelePredict)*rnorm(length(expPool), 0.5, 0.2)
	
	expPool2[expPool2 < 0] <- 0
	
	expPool2[expPool2 > 1] <- 1
	
	infectedOff <- unlist(lapply(expPool2, function(x){return(rbinom(1, 1, x))}))	
	
	names(infectedOff) <- 	rownames(offMat)
	
	# put together the full population
	
	totPop <- c(infected, infectedOff)
	
	# perform mark-recapture simulation
	
	capHistFull <- list()
	
	capHistUnbiased <- list()
	
	# loop over the number of sampling events, doing simultaneous samples with no difference in capture rates and differences in capture rates depending on infection state
	
	for(i in 1:no.Samp){
		
		inf <- totPop[totPop == 1]
		
		nonInf <- totPop[totPop == 0]
		
		prop <- length(inf)/length(totPop)
		
		infProp <- prop*(infecBias)/(prop*(infecBias) + (1-prop))
		
		infSize <- round(sampSize*infProp)
		
		infSamp <- sample(inf, infSize)
		
		noInfSamp <- sample(nonInf, sampSize - infSize)
		
		capHist <- c(infSamp, noInfSamp)
		
		chFull <- rep(0, length=length(totPop))
		
		names(chFull) <- names(totPop)
		
		chFull[names(chFull) %in% names(capHist)] <- 1
		
		capHistFull[[i]] <- chFull	
		
		capUnbiased <- totPop[sample(1:length(totPop), sampSize)]
		
		chUnb <- rep(0, length=length(totPop))
		
		names(chUnb) <- names(totPop)
		
		chUnb[names(chUnb) %in% names(capUnbiased)] <- 1
				
		capHistUnbiased[[i]] <- chUnb
	
	}
	
	# formulate capture history inputs for the 'marked' library
	
	chTmp <- do.call(cbind, capHistFull)
	
	chTmp2 <- apply(chTmp, 1, function(x){paste0(x, collapse='')})
	
	# add infection state to the capture histories
	
	chTot <- data.frame(chTmp2, totPop)
	
	# remove unsampled individuals, which would not be visible in an empirical study
	
	ch <- chTot[!chTot$chTmp2 == '00000',]
	
	names(ch) <- c('ch', 'infected')
	
	# start 'marked' analysis
	
	ch.proc <- process.data(ch, model='cjs', begin.time=1)
		
	ch.ddl <- make.design.data(ch.proc)
		
	mod.Phi.inf.pdot <- crm(ch.proc, ch.ddl, groups='infected', model.parameters=list(Phi=list(formula=~1), p=list(formula=~infected)))
	
	# format output 
	
	parentFormat <- data.frame(rep(NA, length=nrow(parentPop)), rep(NA, length=nrow(parentPop)), parentPop)
	
	rownames(parentFormat) <- rownames(parentPop)
	
	colnames(parentFormat) <- c('FATHER', 'MOTHER', 'allele1', 'allele2','trait1')
	
	offFormat <- data.frame(offMat, infectedOff)
	
	colnames(offFormat) <- c('FATHER', 'MOTHER', 'allele1', 'allele2','trait1')
	
	popMat <- data.frame(rbind(parentFormat, offFormat))

	# repeat for the control samples 
	
	chTmpUnb <- do.call(cbind, capHistUnbiased)
	
	chTmp2Unb <- apply(chTmpUnb, 1, function(x){paste0(x, collapse='')})
	
	chTotUnb <- data.frame(chTmp2Unb, totPop)
	
	chUnb <- chTotUnb[! chTotUnb $chTmp2Unb == '00000',]
	
	names(chUnb) <- c('ch', 'infected')
	
	ch.proc.unb <- process.data(chUnb, model='cjs', begin.time=1)
		
	ch.ddl.unb <- make.design.data(ch.proc.unb)
		
	mod.Phi.unb <- crm(ch.proc.unb, ch.ddl.unb, groups='infected', model.parameters=list(Phi=list(formula=~1), p=list(formula=~infected)))
	
	return(list(mod.Phi.inf.pdot, mod.Phi.unb, popMat, ch, chUnb))
	
}


###############################################
# quick running example analysis ##############
###############################################

# run the simulation over a range of values for parameter that controls how much genotype predicts infection risk (relative to a random element)

# the loop prints 'skip' when parental pairs have no offspring, and prints out a report from the mark-recapture function

mr_output <- list()

genoPredict <- seq(0, 1, by=0.05)


for(i in 1:length(genoPredict)){

	mr_output[[i]] <- mark_recap_sim(5000, 0.9, 'het', 10, 0.5, genoPredict[i], 0.3, 5, 500)

}


# take the output of each run of the model and calculate relative risk of infection for each genotype

# note - this loop is set up for the heterozygote case. For the allele case, 


rel_risk_total <- c()

rel_risk_sample <- c()

rel_risk_control <- c()

for(i in 1:length(mr_output)){
	
	tmpGeno <- mr_output[[i]][[3]]
	
	genotypes <- as.numeric(tmpGeno[,3]) + as.numeric(tmpGeno[,4])
	
	names(genotypes) <- rownames(tmpGeno)
	
	# get the relative risk for the full population
	
	geno0rate <- sum(as.numeric(tmpGeno[,5])[genotypes == 0])/length(as.numeric(tmpGeno[,5])[genotypes == 0])
	
	geno1rate <- sum(as.numeric(tmpGeno[,5])[genotypes == 1])/length(as.numeric(tmpGeno[,5])[genotypes == 1])
	
	geno2rate <- sum(as.numeric(tmpGeno[,5])[genotypes == 2])/length(as.numeric(tmpGeno[,5])[genotypes == 2])
	
	rel_risk_total[i] <- geno1rate/mean(geno0rate, geno2rate)
	
	# find the individuals captured in the biased and control events
	
	biased <- rownames(mr_output[[i]][[4]])
	
	biasedGeno <- tmpGeno[rownames(tmpGeno) %in% biased,]
	
	control <- rownames(mr_output[[i]][[5]])
	
	controlGeno <- tmpGeno[rownames(tmpGeno) %in% control,]
	
	# repeat relative risk calculation for each set
	
	genotypesCap <- as.numeric(biasedGeno[,3]) + as.numeric(biasedGeno[,4])
	
	geno0rateCap <- sum(as.numeric(biasedGeno[,5])[genotypesCap == 0])/length(as.numeric(biasedGeno[,5])[genotypesCap == 0])
	
	geno1rateCap <- sum(as.numeric(biasedGeno[,5])[genotypesCap == 1])/length(as.numeric(biasedGeno[,5])[genotypesCap == 1])
	
	geno2rateCap <- sum(as.numeric(biasedGeno[,5])[genotypesCap == 2])/length(as.numeric(biasedGeno[,5])[genotypesCap == 2])
	
	rel_risk_sample[i] <- geno1rateCap/mean(geno0rateCap, geno2rateCap)
	
	# control
	
	genotypesControl <- as.numeric(controlGeno[,3]) + as.numeric(controlGeno[,4])
	
	geno0rateCap <- sum(as.numeric(controlGeno[,5])[genotypesControl == 0])/length(as.numeric(controlGeno[,5])[genotypesControl == 0])
	
	geno1rateCap <- sum(as.numeric(controlGeno[,5])[genotypesControl == 1])/length(as.numeric(controlGeno[,5])[genotypesControl == 1])
	
	geno2rateCap <- sum(as.numeric(controlGeno[,5])[genotypesControl == 2])/length(as.numeric(controlGeno[,5])[genotypesControl == 2])
	
	rel_risk_control[i] <- geno1rateCap/mean(geno0rateCap, geno2rateCap)
	
	
}

# plot out the results, comparing the full population results to the sampled results


plot(rel_risk_total, rel_risk_control, xlab='true relative risk', ylab='measured relative risk', pch=21, bg='#ffffbf')

points(rel_risk_total, rel_risk_sample, pch=21, bg='#2c7bb6')

legend('topleft', legend=c('control', 'different rate'), fill=c('#ffffbf', '#2c7bb6'), bty='n')

# test 1: are the slopes the same?

# does the slope of the relative risk values from the control samples against the relative risk from the full dataset differ from one?

slope.test(rel_risk_total, rel_risk_control, 1)

# does the slope of the relative risk values from the biased samples against the relative risk from the full dataset differ from one?

slope.test(rel_risk_total, rel_risk_sample, 1)


# test 2: do the samples have higher variance than the full dataset?


fligner.test(list(rel_risk_total, rel_risk_control))

fligner.test(list(rel_risk_total, rel_risk_sample))


# test 3: do the mark-recapture statistics accurately separate control and biased runs?

sample_p <- c()

control_p <- c()

for(i in 1:length(mr_output)){
	
	tmpPsampInf <- mr_output[[i]][[1]]$results$reals$p[1, 3]	
	
	tmpPsampUninf <- mr_output[[i]][[1]]$results$reals$p[2, 3]	
	
	sample_p[i] <- tmpPsampUninf - tmpPsampInf

	tmpPcontrolInf <- mr_output[[i]][[2]]$results$reals$p[1, 3]	

	tmpPcontrolUninf <- mr_output[[i]][[2]]$results$reals$p[2, 3]	
	
	control_p[i] <- tmpPcontrolUninf - tmpPcontrolInf
	
}

# are the differences in capture probabilities signifcantly different between control and biased runs?

t.test(sample_p, control_p)

# do the differences in capture probabilities correlate with residuals from a linear model comparing full and sampled population values?

controlResid <- lm(rel_risk_control ~ rel_risk_total)$residuals

sampleResid <- lm(rel_risk_sample ~ rel_risk_total)$residuals


summary(lm(control_p ~ controlResid))

summary(lm(sample_p ~ sampleResid))



##############################################################
# full code used in paper - can run very long! ###############
##############################################################

# calculate impact of infection on reproductive success

# each loop is run four times: twice each with 'allele' and 'het' for the resistant genotype, each with infecBias <- runif(200, 1.1, 1.9), and infecBias <- runif(200, 0.1, 0.9)

# edit the write.table command at the end of the section to reflect genotype and bias 


reproSim <- list()

inf.penalty <- runif(200, 0, 1)
infecBias <- runif(200, 1.1, 1.9) # in this simulation, infection increases capture probability


for(i in 1:length(inf.penalty)){

	reproSim[[i]] <- mark_recap_sim(5000, 0.8, 'allele', 10, inf.penalty[i], 0.8, infecBias[i], 5, 500)

}


# process the outputs to find the loss of offspring

real_Off_uninf <- c()
real_Off_inf <- c()
cap_Off_uninf <- c()
cap_Off_inf <- c()
cap_p_uninf <- c()
cap_p_inf <- c()
cap_val_uninf_cont <- c()
cap_val_inf_cont <- c()
cap_p_uninf_cont <- c()
cap_p_inf_cont <- c()
	
for(j in 1:length(reproSim)){
		
	inf_rate_real <- reproSim[[j]][[3]]
	
	# get real offspring for infected vs. uninfected
	
	parents <- inf_rate_real[grepl('parent', rownames(inf_rate_real)),]
	
	offspring <- inf_rate_real[!grepl('parent', rownames(inf_rate_real)),]
	
	parentsInf <- parents[parents$trait1 == 1,]
	
	parentsUninf <- parents[parents$trait1 == 0,]
	
	offOfInf <- unlist(lapply(rownames(parentsInf), function(x){sum(offspring[,1] == x) +  sum(offspring[,2] == x)}))
	
	offOfUninf <- unlist(lapply(rownames(parentsUninf), function(x){sum(offspring[,1] == x) +  sum(offspring[,2] == x)}))
		
	real_Off_uninf[j] <- mean(offOfUninf)
	
	real_Off_inf[j] <- mean(offOfInf)

	# get measured penalty from biased sampling
		
	captured <- inf_rate_real[rownames(inf_rate_real) %in% rownames(reproSim[[j]][[4]]),]
	
	parentsCap <- captured[grepl('parent', rownames(captured)),]
	
	offspringCap <- captured[!grepl('parent', rownames(captured)),]
	
	parentsInfCap <- parentsCap[parentsCap$trait1 == 1,]
	
	parentsUninfCap <- parentsCap[parentsCap$trait1 == 0,]
	
	offOfInfCap <- unlist(lapply(rownames(parentsInfCap), function(x){sum(offspringCap[,1] == x) +  sum(offspringCap[,2] == x)}))
	
	offOfUninfCap <- unlist(lapply(rownames(parentsUninfCap), function(x){sum(offspringCap[,1] == x) +  sum(offspringCap[,2] == x)}))	
			
	cap_Off_uninf[j] <- mean(offOfUninfCap)
	
	cap_Off_inf[j] <- mean(offOfInfCap)
	
	cap_p_uninf[j] <- reproSim[[j]][[1]]$results$reals$p[1, 3]
		
	cap_p_inf[j] <- reproSim[[j]][[1]]$results$reals$p[2, 3]
	
	# get measured penalty from unbiased sampling
		
	capturedUnb <- inf_rate_real[rownames(inf_rate_real) %in% rownames(reproSim[[j]][[5]]),]
	
	parentsCapUnb <- capturedUnb[grepl('parent', rownames(capturedUnb)),]
	
	offspringCapUnb <- capturedUnb[!grepl('parent', rownames(capturedUnb)),]
	
	parentsInfCapUnb <- parentsCapUnb[parentsCapUnb$trait1 == 1,]
	
	parentsUninfCapUnb <- parentsCapUnb[parentsCapUnb$trait1 == 0,]
	
	offOfInfCapUnb <- unlist(lapply(rownames(parentsInfCapUnb), function(x){sum(offspringCapUnb[,1] == x) +  sum(offspringCapUnb[,2] == x)}))
	
	offOfUninfCapUnb <- unlist(lapply(rownames(parentsUninfCapUnb), function(x){sum(offspringCapUnb[,1] == x) +  sum(offspringCapUnb[,2] == x)}))
				
	cap_val_uninf_cont[j] <- mean(offOfUninfCapUnb) 
	
	cap_val_inf_cont[j] <- mean(offOfInfCapUnb)
		
	cap_p_uninf_cont[j] <- reproSim[[j]][[2]]$results$reals$p[1, 3]
		
	cap_p_inf_cont[j] <- reproSim[[j]][[2]]$results$reals$p[2, 3]
	
}


repro_mat <- cbind(real_Off_uninf, real_Off_inf, cap_Off_uninf, cap_Off_inf, cap_p_uninf, cap_p_inf, cap_val_uninf_cont, cap_val_inf_cont, cap_p_uninf_cont, cap_p_inf_cont)


colnames(repro_mat) <- c('full_uninf', 'full_inf', 'cap_uninf', 'cap_inf', 'cap_p_uninf', 'cap_p_inf', 'cont_uninf', 'cont_inf', 'cont_p_uninf', 'cont_p_inf')

rownames(repro_mat) <- paste0('sim_', 1:nrow(repro_mat))

write.table(repro_mat, paste0(myPath, 'RR_over_allele'), quote=F)

########################################
# calculate relative risk of genotypes #
########################################

relriskSim <- list()

allelePredict <- runif(200, 0, 1)
infecBias <- runif(200, 1.1, 1.9) # in this simulation, infection increases capture probability


for(i in 1:length(allelePredict)){

	relriskSim[[i]] <- mark_recap_sim(5000, allelePredict[i], 'allele', 10, 0.5, 0.8, infecBias[i], 5, 500)

}


# calculate relative risk

realGeno_RR <- c()
capGeno_biased_RR <- c()
capGeno_unb_RR <- c()
pval_uninf <- c()
pval_inf <- c()
pval_uninf_unb <- c()
pval_inf_unb <- c()


for(j in 1:length(relriskSim)){
		
	inf_rate_real <- relriskSim[[j]][[3]]
	
	genotype <- as.numeric(inf_rate_real[,3]) + as.numeric(inf_rate_real[,4])
	
	infRate <- as.numeric(inf_rate_real[,5])
	
	# get relative risk of each genotype
	
	rr_Suc <- sum(genotype==1 & infRate == 1 | genotype==2 & infRate == 1)/sum(genotype==1 | genotype==2)
	
	rr_Res <- sum(genotype==0 & infRate == 1)/sum(genotype== 0)
	
	realGeno_RR[j] <- rr_Suc/rr_Res
	
	#####################
		
	captured <- inf_rate_real[rownames(inf_rate_real) %in% rownames(relriskSim[[j]][[4]]),]
	
	capGeno <- as.numeric(captured[,3]) + as.numeric(captured[,4])
	
	infCap <- as.numeric(captured[,5])
	
	rr_Suc_biased <- sum(capGeno ==1 & infCap == 1 | capGeno ==2 & infCap == 1)/sum(capGeno ==1 | capGeno ==2)
	
	rr_Res_biased <- sum(capGeno ==0 & infCap == 1)/sum(capGeno == 0)
			
	capGeno_biased_RR[j] <- rr_Suc_biased/rr_Res_biased
	
	######################
						
	capturedCont <- inf_rate_real[rownames(inf_rate_real) %in% rownames(relriskSim[[j]][[5]]),]
	
	capGenoCont <- as.numeric(capturedCont[,3]) + as.numeric(capturedCont[,4])
	
	infCapCont <- as.numeric(capturedCont[,5])
	
	rr_Suc_unbiased <- sum(capGenoCont ==1 & infCapCont == 1 | capGenoCont ==2 & infCapCont == 1)/sum(capGenoCont ==1 | capGenoCont ==2)
	
	rr_Res_unbiased <- sum(capGenoCont ==0 & infCapCont == 1)/sum(capGenoCont == 0)
			
	capGeno_unb_RR[j] <- rr_Suc_unbiased/rr_Res_unbiased
	
	################
	
	pval_uninf[j] <- relriskSim[[j]][[1]]$results$reals$p[1, 3]
	
	pval_inf[j] <- relriskSim[[j]][[1]]$results$reals$p[2, 3]
	
	pval_uninf_unb[j] <- relriskSim[[j]][[2]]$results$reals$p[1, 3]
		
	pval_inf_unb[j] <- relriskSim[[j]][[2]]$results$reals$p[2, 3]
	
}


rr_mat <- cbind(realGeno_RR, capGeno_biased_RR, capGeno_unb_RR, pval_uninf, pval_inf, pval_uninf_unb, pval_inf_unb)

colnames(rr_mat) <- c('full_RR', 'cap_RR', 'cont_RR', 'cap_p_uninf', 'cap_p_inf',  'cont_p_uninf', 'cont_p_inf')

rownames(rr_mat) <- paste0('sim_', 1:nrow(rr_mat))

write.table(rr_mat, paste0(myPath, '/RR_over_allele'),  quote=F)

############################################################################
# impact of bias on detectability of change in allele frequency ############
############################################################################


alleleChangeSim <- list()

allelePredict <- runif(200, 0, 1)
infecBias <- runif(200, 1.1, 1.9) # in this simulation, infection increases capture probability

for(i in 1:length(allelePredict)){

	alleleChangeSim[[i]] <- mark_recap_sim(5000, allelePredict[i], 'het', 10, 0.5, 0.8, infecBias[i], 5, 500)


}


# get real and measured difference between number of offspring in infected and uninfected populations

realChange <- c()
capChange <- c()
pval0 <- c()
pval1 <- c()
capChangeCont <- c()
pval0Cont <- c()
pval1Cont <- c()
	
for(j in 1:length(alleleChangeSim)){
		
	inf_rate_real <- alleleChangeSim[[j]][[3]]
	
	# get real offspring for infected vs. uninfected
	
	parents <- inf_rate_real[grepl('parent', rownames(inf_rate_real)),]
	
	offspring <- inf_rate_real[!grepl('parent', rownames(inf_rate_real)),]
	
	meanParent <- mean(as.numeric(c(parents$allele1, parents$allele2)))
		
	meanOff <- mean(as.numeric(c(offspring $allele1, offspring $allele2)))
		
	realChange[j] <- meanParent - meanOff

	# get real penalty
		
	captured <- inf_rate_real[rownames(inf_rate_real) %in% rownames(alleleChangeSim[[j]][[4]]),]
	
	parentsCap <- captured[grepl('parent', rownames(captured)),]
	
	offspringCap <- captured[!grepl('parent', rownames(captured)),]
	
	meanParentCap <- mean(as.numeric(c(parentsCap $allele1, parentsCap $allele2)))
		
	meanOffCap <- mean(as.numeric(c(offspringCap $allele1, offspringCap $allele2)))	
			
	capChange[j] <- (meanParentCap - meanOffCap)
	
	pval0[j] <- alleleChangeSim[[j]][[1]]$results$reals$p[1, 3]
		
	pval1[j] <- alleleChangeSim[[j]][[1]]$results$reals$p[2, 3]
	
	# get unbiased penalty
		
	capturedUnb <- inf_rate_real[rownames(inf_rate_real) %in% rownames(alleleChangeSim[[j]][[5]]),]
	
	parentsCapUnb <- capturedUnb[grepl('parent', rownames(capturedUnb)),]
	
	offspringCapUnb <- capturedUnb[!grepl('parent', rownames(capturedUnb)),]
	
	meanParentCapUnb <- mean(as.numeric(c(parentsCapUnb $allele1, parentsCapUnb $allele2)))
		
	meanOffCapUnb <- mean(as.numeric(c(offspringCapUnb $allele1, offspringCapUnb $allele2)))	
			
	capChangeCont[j] <- (meanParentCapUnb - meanOffCapUnb)
	
	pval0Cont[j] <- alleleChangeSim[[j]][[2]]$results$reals$p[1, 3]
		
	pval1Cont[j] <- alleleChangeSim[[j]][[2]]$results$reals$p[2, 3]	
	
}

allele_change <- cbind(realChange, capChange, capChangeCont, pval0, pval1, pval0Cont, pval1Cont)


colnames(allele_change) <- c('full_change', 'cap_change', 'cont_change', 'cap_p_uninf', 'cap_p_inf',  'cont_p_uninf', 'cont_p_inf')

rownames(allele_change) <- paste0('sim_', 1:nrow(allele_change))

write.table(allele_change, paste0(myPath, 'allele_change_over_het'),  quote=F)


########################
# statistical analyses #
########################

# tests appear in the order they are reported in the results section

# reproductive success statistics

repro_over <- read.table(paste0(myPath, 'repro_mat_over'))

repro_under <- read.table(paste0(myPath, 'repro_mat_under'))

# raw success difference versus real difference

slope.test(repro_over$cont_uninf-repro_over$cont_inf, repro_over$full_uninf-repro_over$full_inf, 1)

# corrected success difference versus real difference

slope.test((repro_over$cont_uninf-repro_over$cont_inf)/repro_over$cont_uninf, (repro_over$full_uninf-repro_over$full_inf)/repro_over$full_uninf, 1)

# corrected control vs. reduced and increased rate values

slope.test((repro_over$cap_uninf-repro_over$cap_inf)/repro_over$cap_uninf, (repro_over$cont_uninf-repro_over$cont_inf)/repro_over$cont_uninf, 1)

slope.test((repro_under$cap_uninf-repro_under$cap_inf)/repro_under$cap_uninf, (repro_under$cont_uninf-repro_under$cont_inf)/repro_under$cont_uninf, 1)

# sampling variance of control, over, and under samples relative to real variance

fligner.test(list((repro_over$full_uninf-repro_over$full_inf)/repro_over$full_uninf, (repro_over$cont_uninf-repro_over$cont_inf)/repro_over$cont_uninf))

fligner.test(list((repro_over$full_uninf-repro_over$full_inf)/repro_over$full_uninf, (repro_over$cap_uninf-repro_over$cap_inf)/repro_over$cap_uninf))

fligner.test(list((repro_under$full_uninf-repro_under$full_inf)/repro_under$full_uninf, (repro_under$cap_uninf-repro_under$cap_inf)/repro_under$cap_uninf))

# test CJS capture probability values: test residuals versus difference in capture probability values

# prepare inputs 

resid_over <- resid(lm((repro_over$cap_uninf-repro_over$cap_inf) ~ (repro_over$full_uninf-repro_over$full_inf)))

resid_under <- resid(lm((repro_under$cap_uninf-repro_under$cap_inf) ~ (repro_under$full_uninf-repro_under$full_inf)))

resid_cont <- resid(lm((repro_under$cont_uninf-repro_under$cont_inf) ~ (repro_under$full_uninf-repro_under$full_inf)))

p_diff_over <- repro_over$cap_p_uninf - repro_over$cap_p_inf

p_diff_under <- repro_under$cap_p_uninf - repro_under$cap_p_inf

p_diff_cont <- repro_under$cont_p_uninf - repro_under$cont_p_inf

# run tests

t.test(p_diff_over, p_diff_cont)

t.test(p_diff_under, p_diff_cont)

summary(lm(resid_over ~ p_diff_over))

summary(lm(resid_under ~ p_diff_under))

summary(lm(resid_cont ~ p_diff_cont))


# relative risk statistics 

RR_over_het <- read.table(paste0(myPath, 'RR_over_het'))

RR_under_het <- read.table(paste0(myPath, 'RR_under_het'))

RR_over_allele <- read.table(paste0(myPath, 'RR_over_allele'))

RR_under_allele <- read.table(paste0(myPath, 'RR_under_allele'))


# raw success difference versus real difference

slope.test(RR_over_het$cont_RR, RR_over_het$full_RR, 1)

slope.test(RR_over_het$cap_RR, RR_over_het$full_RR, 1)

slope.test(RR_under_het$cap_RR, RR_under_het$full_RR, 1)


slope.test(RR_over_allele$cont_RR, RR_over_allele$full_RR, 1)

slope.test(RR_over_allele$cap_RR, RR_over_allele$full_RR, 1)

slope.test(RR_under_allele$cap_RR, RR_under_allele$full_RR, 1)


# tests for variance

fligner.test(list(RR_over_het$full_RR, RR_over_het$cap_RR))

fligner.test(list(RR_under_het$full_RR, RR_under_het$cap_RR))

fligner.test(list(RR_under_het$full_RR, RR_under_het$cont_RR))


fligner.test(list(RR_over_allele$full_RR, RR_over_allele $cap_RR))

fligner.test(list(RR_under_allele$full_RR, RR_under_allele $cap_RR))

fligner.test(list(RR_under_allele $full_RR, RR_under_allele $cont_RR))


# prepare inputs 

resid_over_het <- resid(lm(RR_over_het$cap_RR ~ RR_over_het$full_RR))

resid_under_het <- resid(lm(RR_under_het$cap_RR ~ RR_under_het$full_RR))

resid_cont_het <- resid(lm(RR_under_het$cont_RR ~ RR_under_het$full_RR))

p_diff_over_het <- RR_over_het $cap_p_uninf - RR_over_het $cap_p_inf

p_diff_under_het <- RR_under_het $cap_p_uninf - RR_under_het $cap_p_inf

p_diff_cont_het <- RR_under_het $cont_p_uninf - RR_under_het$cont_p_inf

####

resid_over_allele <- resid(lm(RR_over_allele$cap_RR ~ RR_over_allele $full_RR))

resid_under_allele <- resid(lm(RR_under_allele$cap_RR ~ RR_under_allele $full_RR))

resid_cont_allele <- resid(lm(RR_under_allele $cont_RR ~ RR_under_allele $full_RR))

p_diff_over_allele <- RR_over_allele $cap_p_uninf - RR_over_allele $cap_p_inf

p_diff_under_allele <- RR_under_allele $cap_p_uninf - RR_under_allele $cap_p_inf

p_diff_cont_allele <- RR_under_allele $cont_p_uninf - RR_under_allele $cont_p_inf

# run tests

t.test(p_diff_over_het, p_diff_cont_het)

t.test(p_diff_under_het, p_diff_cont_het)

t.test(p_diff_over_allele, p_diff_cont_allele)

t.test(p_diff_under_allele, p_diff_cont_allele)



summary(lm(resid_over_het ~ p_diff_over_het))

summary(lm(resid_under_het ~ p_diff_under_het))

summary(lm(resid_cont_het ~ p_diff_cont_het))


summary(lm(resid_over_allele ~ p_diff_over_allele))

summary(lm(resid_under_allele ~ p_diff_under_allele))

summary(lm(resid_cont_allele ~ p_diff_cont_allele))


# allele frequency change statistics 

change_over_het <- read.table(paste0(myPath, 'allele_change_over_het'))

change_under_het <- read.table(paste0(myPath, 'allele_change_under_het'))

change_over_allele <- read.table(paste0(myPath, 'allele_change_over_allele'))

change_under_allele <- read.table(paste0(myPath, 'allele_change_under_allele'))


# raw success difference versus real difference

slope.test(change_over_het $cont_change, change_over_het $full_change, 1)

slope.test(change_over_het $cap_change, change_over_het $full_change, 1)

slope.test(change_under_het $cap_change, change_under_het $full_change, 1)


slope.test(change_over_allele $cont_change, change_over_allele $full_change, 1)

slope.test(change_over_allele $cap_change, change_over_allele $full_change, 1)

slope.test(change_under_allele $cap_change, change_under_allele $full_change, 1)


# tests for variance

fligner.test(list(change_over_het $full_change, change_over_het $cap_change))

fligner.test(list(change_under_het $full_change, change_under_het $cap_change))

fligner.test(list(change_under_het $full_change, change_under_het $cont_change))


fligner.test(list(change_over_allele$full_change, change_over_allele $cap_change))

fligner.test(list(change_under_allele$full_change, change_under_allele $cap_change))

fligner.test(list(change_under_allele $full_change, change_under_allele $cont_change))


# prepare inputs 

resid_over_het <- resid(lm(change_over_het$cap_change ~ change_over_het$full_change))

resid_under_het <- resid(lm(change_under_het$cap_change ~ change_under_het$full_change))

resid_cont_het <- resid(lm(change_under_het$cont_change ~ change_under_het$full_change))

p_diff_over_het <- change_over_het $cap_p_uninf - change_over_het $cap_p_inf

p_diff_under_het <- change_under_het $cap_p_uninf - change_under_het $cap_p_inf

p_diff_cont_het <- change_under_het $cont_p_uninf - change_under_het$cont_p_inf

####

resid_over_allele <- resid(lm(change_over_allele $cap_change ~ change_over_allele $full_change))

resid_under_allele <- resid(lm(change_under_allele $cap_change ~ change_under_allele $full_change))

resid_cont_allele <- resid(lm(change_under_allele $cont_change ~ change_under_allele $full_change))

p_diff_over_allele <- change_over_allele $cap_p_uninf - change_over_allele $cap_p_inf

p_diff_under_allele <- change_under_allele $cap_p_uninf - change_under_allele $cap_p_inf

p_diff_cont_allele <- change_under_allele $cont_p_uninf - change_under_allele $cont_p_inf

# run tests

t.test(p_diff_over_het, p_diff_cont_het)

t.test(p_diff_under_het, p_diff_cont_het)

t.test(p_diff_over_allele, p_diff_cont_allele)

t.test(p_diff_under_allele, p_diff_cont_allele)



summary(lm(resid_over_het ~ p_diff_over_het))

summary(lm(resid_under_het ~ p_diff_under_het))

summary(lm(resid_cont_het ~ p_diff_cont_het))


summary(lm(resid_over_allele ~ p_diff_over_allele))

summary(lm(resid_under_allele ~ p_diff_under_allele))

summary(lm(resid_cont_allele ~ p_diff_cont_allele))

















