#setwd("C:/Users/kejima/Dropbox/Andrew_murine/JAX/type-I-error")
#setwd("/gpfs/home/k/e/kejima/Karst/thindrives/Andrew_murine/JAX/type-I-error")


args <- commandArgs(TRUE)
library(openxlsx)
library(coin)
library(doParallel)
source("boot_perm.R")
cl <- makePSOCKcluster(4)
registerDoParallel(cl)

stid = as.numeric(args[1]) # strain 7 vs (140,141,142,144), 35 vs (143,162)
if(stid%in%c(140,141,142,144)==T) cont=7
if(stid%in%c(143,162)==T) cont=35
n = as.numeric(args[2]) # simulated sample size 
s = as.character(args[3]) # sex

#### Read data ####
data = read.xlsx("data.xlsx",sheet=1)
wild = subset(data,strainid==cont&sex==s) # control group
mutant = subset(data,strainid==stid&sex==s) # mutant group
wild_plus = wild
wild_plus$value = wild$value + (mean(mutant$value) - mean(wild$value)) # shifting distribution to make it same as mutant group


n_w = n #simulated sample size for wildtype
n_m = n #simulated sample size for mutant
simu=15352 #number of simulations
set.seed(07202018) #arbitrarily set seed (date of first analysis)

#### p-value disritbution: case 1 (shifting distribution) ####
r <- foreach(icount(simu), .combine='rbind', .packages='coin') %dopar% {
	resample_w = sample(wild_plus$value, n_w, replace = T, prob = NULL)
	resample_m = sample(mutant$value, n_m, replace = T, prob = NULL)
	temp = NULL
	temp$val = c(resample_w, resample_m)
	temp$grp = factor(c(rep(1, length(resample_w)),rep(2, length(resample_m))))
	
	if(var(resample_w)==0 && var(resample_m)==0){
		if(resample_w[1]==resample_m[1]){ # if all the samples are same, p-value is one
			pval_t_regular = 1
			pval_t_welch = 1
			pval_wil = 1
			pval_perm = 1
			pval_boot = 1
		}else{ # if all the samples of each group are same but different between groups, p-value is zero (except Wilcoxon test)
			pval_t_regular = 0
			pval_t_welch = 0
			pval_wil = pvalue(wilcox_test(val~grp, data=temp,distribution = "exact"))
			pval_perm = 0
			pval_boot = 0
		}
	}else{
		pval_t_regular = t.test(resample_w, resample_m,var.equal=T)$p.value
		pval_t_welch = t.test(resample_w, resample_m)$p.value
		pval_wil = pvalue(wilcox_test(val~grp, data=temp,distribution = "exact"))
		pval_perm = Permutation(resample_w, resample_m)
		pval_boot = Bootstrap(resample_w, resample_m)
	}
	c(pval_t_regular, pval_t_welch, pval_wil, pval_perm, pval_boot)
}
write.table(r, append=F, paste(stid,"-",n,"-",s,"-case-1.csv",sep=""), row.names = F, col.names = F, sep=',')



#### p-value disritbution: case 2 (combining distribution) ####
r <- foreach(icount(simu), .combine='rbind', .packages='coin') %dopar% {
	resample_w=NULL; resample_m=NULL
	temp1 <- rbinom(1, n_w, 1/2) # 'temp1' is drawn from wild type group and 'n_w-temp1' is drawn from mutant group
	resample_w = c(sample(wild$value, temp1, replace = T, prob = NULL), sample(mutant$value, n_w-temp1, replace = T, prob = NULL))
	temp2 <- rbinom(1, n_m, 1/2) # 'temp2' is drawn from wild type group and 'n_m-temp2' is drawn from mutant group
	resample_m = c(sample(wild$value, temp2, replace = T, prob = NULL), sample(mutant$value, n_m-temp2, replace = T, prob = NULL))
	temp = NULL
	temp$val = c(resample_w,resample_m)
	temp$grp = factor(c(rep(1, length(resample_w)),rep(2, length(resample_m))))
	
	if(var(resample_w)==0 && var(resample_m)==0){
		if(resample_w[1]==resample_m[1]){ # if all the samples are same, p-value is one
			pval_t_regular = 1
			pval_t_welch = 1
			pval_wil = 1
			pval_perm = 1
			pval_boot = 1
		}else{ # if all the samples of each group are same but different between groups, p-value is zero (except Wilcoxon test)
			pval_t_regular = 0
			pval_t_welch = 0
			pval_wil = pvalue(wilcox_test(val~grp, data=temp,distribution = "exact"))
			pval_perm = 0
			pval_boot = 0
		}
	}else{
		pval_t_regular = t.test(resample_w, resample_m,var.equal=T)$p.value
		pval_t_welch = t.test(resample_w, resample_m)$p.value
		pval_wil = pvalue(wilcox_test(val~grp, data=temp,distribution = "exact"))
		pval_perm = Permutation(resample_w, resample_m)
		pval_boot = Bootstrap(resample_w, resample_m)
	}
	c(pval_t_regular, pval_t_welch, pval_wil, pval_perm, pval_boot)
}
write.table(r, append=F, paste(stid,"-",n,"-",s,"-case-2.csv",sep=""), row.names = F, col.names = F, sep=',')
stopCluster(cl)