# Internal Path Shortcuts
BASE = .
DATA = $(BASE)/Data
INPUT = $(DATA)/Input
OUTPUT = $(DATA)/Output
REF = $(DATA)/Input/ReferenceVCFs
# Operational Parameters
THREADS?=1
IMPUTETHREADS?=8
VCFPATTERN?=*.vcf
PFLAGS?=-v
NE?=1000000
TAG?=''
# Paths that point to some locally installed programs
JARPATH = ~/.local/src
FASTAPATH = /project/Data/Fasta/EquCab2_wChrun1_2

REFVCF = $(REF)/Processed/2M_data_wTB_10252015.phased.conformed.sorted.vcf

all: $(REFVCF)

.PHONY: all

$(REF)/Processed/%.phased.conformed.sorted.vcf : $(REF)/%.vcf
	cp $^ $@

# Reference File Actions
init:
	mkdir -p $(BASE)/Data 
	# Make input dirs
	mkdir -p $(INPUT)/ReferenceVCFs
	mkdir -p $(INPUT)/VCFs
	# Make output dirs
	mkdir -p $(OUTPUT)/Conformed
	mkdir -p $(OUTPUT)/Imputed
	mkdir -p $(OUTPUT)/Sorted
	mkdir -p $(OUTPUT)/Filtered
	mkdir -p $(OUTPUT)/DuplicateIDs

phase: 
	# phase the reference genome
	java -jar $(JARPATH)/beagle.21Jan17.6cc.jar \
		gt=$(REF)/2M_data_wTB_10252015.valid.vcf \
		out=$(REF)/2M_data_wTB_10252015.phased \
		nthreads=$(IMPUTETHREADS)
	# Conform the phased reference
	ponytools conformVCF \
		--vcf $(REF)/2M_data_wTB_10252015.phased.vcf.gz \
		--out $(REF)/2M_data_wTB_10252015.phased.conformed.vcf
	# Sort the phased reference
	ponytools sortVCF \
		--vcf $(REF)/2M_data_wTB_10252015.phased.vcf.gz \
		--out $(REF)/2M_data_wTB_10252015.phased.conformed.sorted.vcf
	

# Target File Options
conform:
	parallel --gnu -j $(THREADS) \
		ponytools conformVCF \
		--vcf {} \
		--out $(OUTPUT)/Conformed/{/} \
	::: $(shell find $(INPUT)/VCFs -name $(VCFPATTERN))

sort:
	parallel --gnu -j $(THREADS)\
		ponytools sortVCF \
		--vcf {} \
		--fasta $(FASTAPATH)/Equus_cab_nucl_wChrUn1_2.fasta \
		--out $(OUTPUT)/Sorted/{/} \
	::: $(shell find $(OUTPUT)/Conformed/ -name $(VCFPATTERN))

find-duplicates:
	vcftools --vcf $(OUTPUT)/Sorted/$$i --diff $(REF)/2M_data_wTB_10252015.phased.conformed.sorted.vcf --diff-indv --stdout | grep  -P '\sB' | cut -f 1 | sort -R > $(OUTPUT)/DuplicateIDs/$$i.dups  

impute:
	parallel --gnu -j $(THREADS) $(PFLAGS) \
		java -jar $(JARPATH)/beagle.21Jan17.6cc.jar \
        	ne=$(NE) \
		nthreads=$(IMPUTETHREADS) \
		gt={1} \
		ref={2} \
		out=$(OUTPUT)/Imputed/{1/.}$(TAG) \
		::: $(shell find $(OUTPUT)/Sorted/ -name $(VCFPATTERN)) ::: $(shell find $(INPUT)/ReferenceVCFs/*.vcf)

clean:
	rm -fr $(OUTPUT)/*
	make init
