# Create the count and ref files
################################################################################

rule ParseMpileup_createCountFiles:
    """
    Parse mpileup outputs and create count/ref files
    """
    input:
        "%s/FASTQ/TRIMMED/{samples}.mpileup" % (config["project-folder"])
    output:
        co="%s/FASTQ/TRIMMED/{samples}.count.txt" % (config["project-folder"]),
        ref="%s/FASTQ/TRIMMED/{samples}.ref.txt" % (config["project-folder"])
    params:
        p=config["params"]["step6"]["p"],
        wd="%s/FASTQ/TRIMMED" % config["project-folder"],
        pipefolder=config["pipeline-folder"]
    log:
        "%s/logs/Perl/ParseMpileup_createCountFiles.{samples}.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/ParseMpileup_createCountFiles.{samples}.tsv" % (config["project-folder"])
    threads: 1
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-6_1.pl -b {input} -p {params.p} &> {log}
  	"""

if config["genome"] == "":
    pass
else:
    rule ParseMpileup_createCountFiles_reference:
        """
        Parse mpileup outputs and create count/ref files for reference genome
        """
        input:
            "%s/MPILEUP/mpileup_reference/{samples}.mpileup" % (config["project-folder"])
        output:
            co="%s/MPILEUP/mpileup_reference/{samples}.count.txt" % (config["project-folder"]),
            ref="%s/MPILEUP/mpileup_reference/{samples}.ref.txt" % (config["project-folder"])
        params:
            p=config["params"]["step6"]["p"],
            wd="%s/MPILEUP/mpileup_reference" % config["project-folder"],
            pipefolder=config["pipeline-folder"]
        log:
            "%s/logs/Perl/ParseMpileup_createCountFiles_reference.{samples}.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/ParseMpileup_createCountFiles_reference.{samples}.tsv" % (config["project-folder"])
        threads: 1
        conda:"envs/gbs.yaml"
        singularity: config["singularity"]["gbs"]
        shell:"""
            cd {params.wd}
            perl {params.pipefolder}/scripts/GBS-SNP-CROP-6_1.pl -b {input} -p {params.p} &> {log}
      	"""
  	
# Create the verticalRef files
################################################################################

rule create_verticalRef:
    """
    Merge the separate ref files
    """
    input:
        co=expand("%s/FASTQ/TRIMMED/{samples}.count.txt" % (config["project-folder"]), samples=samples),
        ref=expand("%s/FASTQ/TRIMMED/{samples}.ref.txt" % (config["project-folder"]), samples=samples),
        barcodes=config["barcodes-file"]
    output:
       vref="%s/FASTQ/TRIMMED/VerticalRefPos.txt" % (config["project-folder"]),
       co="%s/FASTQ/TRIMMED/CountFileList.txt" % (config["project-folder"])
    log:
       "%s/logs/BASH/create_verticalRef.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/create_verticalRef.tsv" % (config["project-folder"])
    params:
        wd="%s/FASTQ/TRIMMED" % config["project-folder"],
        countfolder="%s/FASTQ/TRIMMED" % (config["project-folder"]),
        pipefolder=config["pipeline-folder"],
        tmpdir=config["local-scratch"]
    threads: 1
    shell:"""
        export TMPDIR={params.tmpdir}
        sort -u -m -k2n {input.ref} -o {params.wd}/VerticalRefPos.tmp;
        sort -k2n {params.wd}/VerticalRefPos.tmp | uniq > {output.vref};
        #ls {params.wd}/*count.txt | xargs -n1 basename > {output.co}
        #ls {params.wd}/*count.txt > {output.co}
        
        {params.pipefolder}/scripts/getCountFileList.sh {input.barcodes} {params.countfolder} > {output.co}
    """
    
if config["genome"] == "":
    pass
else:
    rule create_verticalRef_reference:
        """
        Merge the separate ref files for reference genome
        """
        input:
            co=expand("%s/MPILEUP/mpileup_reference/{samples}.count.txt" % (config["project-folder"]), samples=samples),
            ref=expand("%s/MPILEUP/mpileup_reference/{samples}.ref.txt" % (config["project-folder"]), samples=samples),
            barcodes=config["barcodes-file"]
        output:
           vref="%s/MPILEUP/mpileup_reference/VerticalRefPos.txt" % (config["project-folder"]),
           co="%s/MPILEUP/mpileup_reference/CountFileList.txt" % (config["project-folder"])
        log:
           "%s/logs/BASH/create_verticalRef_reference.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/create_verticalRef_reference.tsv" % (config["project-folder"])
        params:
            wd="%s/MPILEUP/mpileup_reference" % config["project-folder"],
            countfolder="%s/MPILEUP/mpileup_reference" % (config["project-folder"]),
            pipefolder=config["pipeline-folder"],
            tmpdir=config["local-scratch"]
        threads: 1
        shell:"""
            export TMPDIR={params.tmpdir}
            sort -u -m -k2n {input.ref} -o {params.wd}/VerticalRefPos.tmp;
            sort -k2n {params.wd}/VerticalRefPos.tmp | uniq > {output.vref};
           # DO not do it that way, it is the wrong order!!!!
           # ls {params.wd}/*count.txt | xargs -n1 basename > {output.co}
           # ls {params.wd}/*count.txt > {output.co}
            {params.pipefolder}/scripts/getCountFileList.sh {input.barcodes} {params.countfolder} > {output.co}
        """
    
# Create the checkpoints to break out into parallel job
################################################################################
    
checkpoint cut_verticalRef:
    """
    Divide the input verticalRef-file for parallel processing
    """
    input:
       "%s/FASTQ/TRIMMED/VerticalRefPos.txt" % (config["project-folder"])
    output:
        directory("%s/FASTQ/TRIMMED/VerticalRefPos/" % (config["project-folder"]))
    params:
        out="%s/FASTQ/TRIMMED/VerticalRefPos/VerticalRefPos." % (config["project-folder"]),
        split=1000000
    shell:"""
        mkdir -p {output}
        split -l {params.split} --numeric-suffixes {input} {params.out}
    """

if config["genome"] == "":
    pass
else:
    checkpoint cut_verticalRef_reference:
        """
        Divide the input verticalRef-file for parallel processing
        """
        input:
           "%s/MPILEUP/mpileup_reference/VerticalRefPos.txt" % (config["project-folder"])
        output:
            directory("%s/MPILEUP/mpileup_reference/VerticalRefPos/" % (config["project-folder"]))
        params:
            out="%s/MPILEUP/mpileup_reference/VerticalRefPos/VerticalRefPos." % (config["project-folder"]),
            split=1000000
        shell:"""
            mkdir -p {output}
            split -l {params.split} --numeric-suffixes {input} {params.out}
        """
        
# Create parallel MasterMatrix
################################################################################
    
rule create_MasterMatrix_parallel:
    """
    Process verticalRef parallel to create MasterMatrix
    """
    input:
        verRef="%s/FASTQ/TRIMMED/VerticalRefPos/VerticalRefPos.{i}" % (config["project-folder"]),
        counts="%s/FASTQ/TRIMMED/CountFileList.txt" % (config["project-folder"])
    output:
        "%s/FASTQ/TRIMMED/VerticalRefPos/GSC.MasterMatrix_{i}.tsv" % (config["project-folder"])
    log:
            "%s/logs/BASH/create_MasterMatrix_parallel_{i}.log" % (config["project-folder"])
    benchmark:
            "%s/benchmark/create_MasterMatrix_parallel.{i}.tsv" % (config["project-folder"])
    params:
        p=config["params"]["step6"]["p"],
        wd="%s/FASTQ/TRIMMED/VerticalRefPos" % config["project-folder"],
        pipefolder=config["pipeline-folder"]
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-6_2.pl -in {input.verRef} -count {input.counts} -out {output} -p {params.p} &> {log}
     """

if config["genome"] == "":
    pass
else:
    rule create_MasterMatrix_parallel_reference:
        """
        Process verticalRef parallel to create MasterMatrix
        """
        input:
            verRef="%s/MPILEUP/mpileup_reference/VerticalRefPos/VerticalRefPos.{i}" % (config["project-folder"]),
            counts="%s/MPILEUP/mpileup_reference/CountFileList.txt" % (config["project-folder"])
        output:
            "%s/MPILEUP/mpileup_reference/VerticalRefPos/GSC.MasterMatrix_{i}.tsv" % (config["project-folder"])
        log:
            "%s/logs/BASH/create_MasterMatrix_parallel_reference_{i}.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/create_MasterMatrix_parallel_reference.{i}.tsv" % (config["project-folder"])
        params:
            p=config["params"]["step6"]["p"],
            wd="%s/MPILEUP/mpileup_reference/VerticalRefPos" % config["project-folder"],
            pipefolder=config["pipeline-folder"]
        conda:"envs/gbs.yaml"
        singularity: config["singularity"]["gbs"]
        shell:"""
            cd {params.wd}
            perl {params.pipefolder}/scripts/GBS-SNP-CROP-6_2.pl -in {input.verRef} -count {input.counts} -out {output} -p {params.p} &> {log}
         """
         
# Aggregate the parallel output
################################################################################

def aggregate_inputMasterMatrix(wildcards):
    """
    Aggregate the input object for the final MasterMatrix
    """
    checkpoint_outputMM = checkpoints.cut_verticalRef.get(**wildcards).output[0]
    return expand("%s/FASTQ/TRIMMED/VerticalRefPos/GSC.MasterMatrix_{i}.tsv" % (config["project-folder"]),
                  i=glob_wildcards(os.path.join(checkpoint_outputMM, "VerticalRefPos.{i}")).i)        

if config["genome"] == "":
    pass
else:
    def aggregate_inputMasterMatrix_reference(wildcards):
        """
        Aggregate the input object for the final MasterMatrix
        """
        checkpoint_outputMM = checkpoints.cut_verticalRef_reference.get(**wildcards).output[0]
        return expand("%s/MPILEUP/mpileup_reference/VerticalRefPos/GSC.MasterMatrix_{i}.tsv" % (config["project-folder"]),
                      i=glob_wildcards(os.path.join(checkpoint_outputMM, "VerticalRefPos.{i}")).i)        

rule aggregate_MasterMatrix:
    input:
        aggregate_inputMasterMatrix
    output:
        "%s/FASTQ/TRIMMED/GSC.MasterMatrix.txt" % (config["project-folder"])
    benchmark:
        "%s/benchmark/aggregate_MasterMatrix.tsv" % (config["project-folder"])
    shell:"""
        cat {input} > {output}
    """    

if config["genome"] == "":
    pass
else:
    rule aggregate_MasterMatrix_reference:
        input:
            aggregate_inputMasterMatrix_reference
        output:
            "%s/MPILEUP/mpileup_reference/GSC.MasterMatrix.txt" % (config["project-folder"])
        benchmark:
            "%s/benchmark/aggregate_MasterMatrix_reference.tsv" % (config["project-folder"])
        shell:"""
            cat {input} > {output}
        """    
        
rule FilterVariants:
    """
    Filter variants and call genotypes.
    """
    input:
        "%s/FASTQ/TRIMMED/GSC.MasterMatrix.txt" % (config["project-folder"])
    output:
        filtered="%s/FASTQ/TRIMMED/variants/GSC.GenoMatrix.txt" % (config["project-folder"]),
        unfiltered="%s/FASTQ/TRIMMED/variants/GSC.GenoMatrix.unfiltered.txt" % (config["project-folder"])
    params:
        input=config["params"]["step7"]["input"],
        out=config["params"]["step7"]["out"],
        unfiltered=config["params"]["step7"]["unfiltered"],
        p=config["params"]["step7"]["p"],
        mnHoDepth0=config["params"]["step7"]["mnHoDepth0"],
        mnHoDepth1=config["params"]["step7"]["mnHoDepth1"],
        mnHetDepth=config["params"]["step7"]["mnHetDepth"],
        altStrength=config["params"]["step7"]["altStrength"],
        mnAlleleRatio=config["params"]["step7"]["mnAlleleRatio"],
        mnCall=config["params"]["step7"]["mnCall"],
        mnAvgDepth=config["params"]["step7"]["mnAvgDepth"],
        mxAvgDepth=config["params"]["step7"]["mxAvgDepth"],
        wd="%s/FASTQ/TRIMMED" % config["project-folder"],
        pipefolder=config["pipeline-folder"]
    log:
        "%s/logs/Perl/FilterVariants.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/FilterVariants.tsv" % (config["project-folder"])
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        #******PARAMETERS*****
        # -in 	Discovery master matrix input file. The output from last step 	File 	GSC.MasterMatrix.txt
        # -out 	Genotyping matrix for the population 	Output file 	GSC.GenoMatrix.txt
        # -p 	Identify SNPs only (snp) or SNPs + indels (indel) 	String 	--
        # -mnHoDepth0 	Minimum depth required for calling a homozygote when the alternative allele depth = 0 	Numeric 	5
        # -mnHoDepth1 	Minimum depth required for calling a homozygote when the alternative allele depth = 1 	Numeric 	20
        # -mnHetDepth 	Minimum depth required for each allele when calling a heterozygote 	Numeric 	3
        # -altStrength 	Across the population for a given putative bi-allelic variant, this alternate allele strength parameter is the minimum proportion of non-primary allele reads that are the secondary allele 	Numeric 	0.8
        # -mnAlleleRatio 	Minimum required ratio of less frequent allele depth to more frequent allele depth 	Numeric 	0.25
        # -mnCall 	Minimum acceptable proportion of genotyped individuals to retain a variant 	Numeric 	0.75
        # -mnAvgDepth 	Minimum average depth of an acceptable variant 	Numeric 	3
        # -mxAvgDepth 	Maximum average depth of an acceptable variant 	Numeric 	200

        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-7.pl -in {params.input} -out {params.out} -p {params.p} -mnHoDepth0 {params.mnHoDepth0} -mnHoDepth1 {params.mnHoDepth1} -mnHetDepth {params.mnHetDepth} -altStrength {params.altStrength} -mnAlleleRatio {params.mnAlleleRatio} -mnCall {params.mnCall} -mnAvgDepth {params.mnAvgDepth} -mxAvgDepth {params.mxAvgDepth} &> {log}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-7.pl -in {params.input} -out {params.unfiltered} -p {params.p} -mnHoDepth0 {params.mnHoDepth0} -mnHoDepth1 {params.mnHoDepth1} -mnHetDepth {params.mnHetDepth} -altStrength 0 -mnAlleleRatio 0 -mnCall 0 -mnAvgDepth 0 -mxAvgDepth 1000000 &> {log}
  	"""

if config["genome"] == "":
    pass
else:
    rule FilterVariants_reference:
        """
        Filter variants and call genotypes using the reference genome.
        """
        input:
            "%s/MPILEUP/mpileup_reference/GSC.MasterMatrix.txt" % (config["project-folder"])
        output:
            filtered="%s/MPILEUP/mpileup_reference/variants/GSC.GenoMatrix.txt" % (config["project-folder"]),
            unfiltered="%s/MPILEUP/mpileup_reference/variants/GSC.GenoMatrix.unfiltered.txt" % (config["project-folder"])
        params:
            input=config["params"]["step7"]["input"],
            out=config["params"]["step7"]["out"],
            unfiltered=config["params"]["step7"]["unfiltered"],
            p=config["params"]["step7"]["p"],
            mnHoDepth0=config["params"]["step7"]["mnHoDepth0"],
            mnHoDepth1=config["params"]["step7"]["mnHoDepth1"],
            mnHetDepth=config["params"]["step7"]["mnHetDepth"],
            altStrength=config["params"]["step7"]["altStrength"],
            mnAlleleRatio=config["params"]["step7"]["mnAlleleRatio"],
            mnCall=config["params"]["step7"]["mnCall"],
            mnAvgDepth=config["params"]["step7"]["mnAvgDepth"],
            mxAvgDepth=config["params"]["step7"]["mxAvgDepth"],
            wd="%s/MPILEUP/mpileup_reference" % config["project-folder"],
            pipefolder=config["pipeline-folder"]
        log:
            "%s/logs/Perl/FilterVariants_reference.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/FilterVariants_reference.tsv" % (config["project-folder"])
        conda:"envs/gbs.yaml"
        singularity: config["singularity"]["gbs"]
        shell:"""
            #******PARAMETERS*****
            # -in 	Discovery master matrix input file. The output from last step 	File 	GSC.MasterMatrix.txt
            # -out 	Genotyping matrix for the population 	Output file 	GSC.GenoMatrix.txt
            # -p 	Identify SNPs only (snp) or SNPs + indels (indel) 	String 	--
            # -mnHoDepth0 	Minimum depth required for calling a homozygote when the alternative allele depth = 0 	Numeric 	5
            # -mnHoDepth1 	Minimum depth required for calling a homozygote when the alternative allele depth = 1 	Numeric 	20
            # -mnHetDepth 	Minimum depth required for each allele when calling a heterozygote 	Numeric 	3
            # -altStrength 	Across the population for a given putative bi-allelic variant, this alternate allele strength parameter is the minimum proportion of non-primary allele reads that are the secondary allele 	Numeric 	0.8
            # -mnAlleleRatio 	Minimum required ratio of less frequent allele depth to more frequent allele depth 	Numeric 	0.25
            # -mnCall 	Minimum acceptable proportion of genotyped individuals to retain a variant 	Numeric 	0.75
            # -mnAvgDepth 	Minimum average depth of an acceptable variant 	Numeric 	3
            # -mxAvgDepth 	Maximum average depth of an acceptable variant 	Numeric 	200
    
            cd {params.wd}
            perl {params.pipefolder}/scripts/GBS-SNP-CROP-7.pl -in {params.input} -out {params.out} -p {params.p} -mnHoDepth0 {params.mnHoDepth0} -mnHoDepth1 {params.mnHoDepth1} -mnHetDepth {params.mnHetDepth} -altStrength {params.altStrength} -mnAlleleRatio {params.mnAlleleRatio} -mnCall {params.mnCall} -mnAvgDepth {params.mnAvgDepth} -mxAvgDepth {params.mxAvgDepth} &> {log}
            perl {params.pipefolder}/scripts/GBS-SNP-CROP-7.pl -in {params.input} -out {params.unfiltered} -p {params.p} -mnHoDepth0 {params.mnHoDepth0} -mnHoDepth1 {params.mnHoDepth1} -mnHetDepth {params.mnHetDepth} -altStrength 0 -mnAlleleRatio 0 -mnCall 0 -mnAvgDepth 0 -mxAvgDepth 1000000 &> {log}
      	"""
        
    
# vim: set filetype=sh :

rule CreateVCF:
    """
    Create the VCF output of the pipeline.
    """
    input:
        filtered="%s/FASTQ/TRIMMED/variants/GSC.GenoMatrix.txt" % (config["project-folder"]),
        unfiltered="%s/FASTQ/TRIMMED/variants/GSC.GenoMatrix.unfiltered.txt" % (config["project-folder"]),
        barcodes=config["barcodes-file"]
    output:
        "%s/FASTQ/TRIMMED/GSC.vcf" % (config["project-folder"])
    params:
        genoMatrix=config["params"]["step8"]["genoMatrix"],
        out=config["params"]["step8"]["out"],
        unfilteredOut=config["params"]["step8"]["unfiltered"],
        format=config["params"]["step8"]["format"],
        wd="%s/FASTQ/TRIMMED" % config["project-folder"],
        pipefolder=config["pipeline-folder"]
    log:
        "%s/logs/Perl/CreateVCF.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/CreateVCF.tsv" % (config["project-folder"])
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        #******PARAMETERS*****
        # -in 	Final genotyping matrix input file 	File 	GSC.GenoMatrix.txt
        # -out 	Output label name without extension 	String 	GSC
        # -b 	BarcodeID file name See Appendix A 	File 	--
        # -formats 	The name(s) (R, Tassel, Plink, vcf, H) for which the GBS-SNP-CROP final genotyping matrix should be converted. If more than one format is desired, the names should be separated by commas without any space, as in the examples shown below 	String 	R,T,P,V,H

        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-8.pl -in {input.filtered} -out {params.out} -b {input.barcodes} -formats {params.format} &> {log}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-8.pl -in {input.unfiltered} -out {params.unfilteredOut} -b {input.barcodes} -formats {params.format} &> {log}
        cd ../..
  	"""

if config["genome"] == "":
    pass
else:
    rule CreateVCF_reference:
        """
        Create the VCF output of the pipeline for the reference genome.
        """
        input:
            filtered="%s/MPILEUP/mpileup_reference/variants/GSC.GenoMatrix.txt" % (config["project-folder"]),
            unfiltered="%s/MPILEUP/mpileup_reference/variants/GSC.GenoMatrix.unfiltered.txt" % (config["project-folder"]),
            barcodes=config["barcodes-file"]
        output:
            "%s/VCF/FinalSetVariants_referenceGenome.vcf" % (config["project-folder"])
        params:
            genoMatrix=config["params"]["step8"]["genoMatrix"],
            out=config["params"]["step8"]["out"],
            unfilteredOut=config["params"]["step8"]["unfiltered"],
            format=config["params"]["step8"]["format"],
            wd="%s/MPILEUP/mpileup_reference" % config["project-folder"],
            outdir="%s/VCF" % config["project-folder"],
            pipeout="%s/MPILEUP/mpileup_reference/GSC.vcf" % (config["project-folder"]),
            pipefolder=config["pipeline-folder"]
        log:
            "%s/logs/Perl/CreateVCF_reference.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/CreateVCF_reference.tsv" % (config["project-folder"])
        conda:"envs/gbs.yaml"
        singularity: config["singularity"]["gbs"]
        shell:"""
            #******PARAMETERS*****
            # -in 	Final genotyping matrix input file 	File 	GSC.GenoMatrix.txt
            # -out 	Output label name without extension 	String 	GSC
            # -b 	BarcodeID file name See Appendix A 	File 	--
            # -formats 	The name(s) (R, Tassel, Plink, vcf, H) for which the GBS-SNP-CROP final genotyping matrix should be converted. If more than one format is desired, the names should be separated by commas without any space, as in the examples shown below 	String 	R,T,P,V,H
    
            cd {params.wd}
            perl {params.pipefolder}/scripts/GBS-SNP-CROP-8.pl -in {input.filtered} -out {params.out} -b {input.barcodes} -formats {params.format} &> {log}
            perl {params.pipefolder}/scripts/GBS-SNP-CROP-8.pl -in {input.unfiltered} -out {params.unfilteredOut} -b {input.barcodes} -formats {params.format} &> {log}
            cp {params.pipeout} {output}
            cd ../..
      	"""

################################################################################
##
## Process the final mock

rule ParseMpileup_createCountFiles_finalMock:
    """
    Parse mpileup outputs and create count/ref files for final mock
    """
    input:
        "%s/MPILEUP/mpileup_finalMock/{samples}.mpileup" % (config["project-folder"])
    output:
        co="%s/MPILEUP/mpileup_finalMock/{samples}.count.txt" % (config["project-folder"]),
        ref="%s/MPILEUP/mpileup_finalMock/{samples}.ref.txt" % (config["project-folder"])
    params:
        p=config["params"]["step6"]["p"],
        wd="%s/MPILEUP/mpileup_finalMock" % config["project-folder"],
        pipefolder=config["pipeline-folder"]
    log:
        "%s/logs/Perl/ParseMpileup_createCountFiles_finalMock.{samples}.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/ParseMpileup_createCountFiles_finalMock.{samples}.tsv" % (config["project-folder"])
    threads: 1
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-6_1.pl -b {input} -p {params.p} &> {log}
  	"""
  	
rule create_verticalRef_finalMock:
    """
    Merge the separate ref files for final mock
    """
    input:
        co=expand("%s/MPILEUP/mpileup_finalMock/{samples}.count.txt" % (config["project-folder"]), samples=samples),
        ref=expand("%s/MPILEUP/mpileup_finalMock/{samples}.ref.txt" % (config["project-folder"]), samples=samples),
        barcodes=config["barcodes-file"]
    output:
       vref="%s/MPILEUP/mpileup_finalMock/VerticalRefPos.txt" % (config["project-folder"]),
       co="%s/MPILEUP/mpileup_finalMock/CountFileList.txt" % (config["project-folder"])
    log:
       "%s/logs/BASH/create_verticalRef_finalMock.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/create_verticalRef_finalMock.tsv" % (config["project-folder"])
    params:
        wd="%s/MPILEUP/mpileup_finalMock" % config["project-folder"],
        countfolder="%s/MPILEUP/mpileup_finalMock" % (config["project-folder"]),
        pipefolder=config["pipeline-folder"],
        tmpdir=config["local-scratch"]
    threads: 1
    shell:"""
        export TMPDIR={params.tmpdir}
        sort -u -m -k2n {input.ref} -o {params.wd}/VerticalRefPos.tmp;
        sort -k2n {params.wd}/VerticalRefPos.tmp | uniq > {output.vref};
        #ls {params.wd}/*count.txt | xargs -n1 basename > {output.co}
        #ls {params.wd}/*count.txt > {output.co}
        
        {params.pipefolder}/scripts/getCountFileList.sh {input.barcodes} {params.countfolder} > {output.co}
    """

checkpoint cut_verticalRef_finalMock:
    """
    Divide the input verticalRef-file for parallel processing in finalMock
    """
    input:
       "%s/MPILEUP/mpileup_finalMock/VerticalRefPos.txt" % (config["project-folder"])
    output:
        directory("%s/MPILEUP/mpileup_finalMock/VerticalRefPos/" % (config["project-folder"]))
    params:
        out="%s/MPILEUP/mpileup_finalMock/VerticalRefPos/VerticalRefPos." % (config["project-folder"]),
        split=1000000
    shell:"""
        mkdir -p {output}
        split -l {params.split} --numeric-suffixes {input} {params.out}
    """
    
rule create_MasterMatrix_parallel_finalMock:
    """
    Process verticalRef parallel to create MasterMatrix for final mock
    """
    input:
        verRef="%s/MPILEUP/mpileup_finalMock/VerticalRefPos/VerticalRefPos.{i}" % (config["project-folder"]),
        counts="%s/MPILEUP/mpileup_finalMock/CountFileList.txt" % (config["project-folder"])
    output:
        "%s/MPILEUP/mpileup_finalMock/VerticalRefPos/GSC.MasterMatrix_{i}.tsv" % (config["project-folder"])
    log:
        "%s/logs/BASH/create_MasterMatrix_parallel_finalMock.{i}.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/create_MasterMatrix_parallel_finalMock.{i}.tsv" % (config["project-folder"])
    params:
        p=config["params"]["step6"]["p"],
        wd="%s/MPILEUP/mpileup_finalMock/VerticalRefPos" % config["project-folder"],
        pipefolder=config["pipeline-folder"]
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-6_2.pl -in {input.verRef} -count {input.counts} -out {output} -p {params.p} &> {log}
     """
     
def aggregate_inputMasterMatrix_finalMock(wildcards):
    """
    Aggregate the input object for the final MasterMatrix for finalMock
    """
    checkpoint_outputMM = checkpoints.cut_verticalRef_finalMock.get(**wildcards).output[0]
    return expand("%s/MPILEUP/mpileup_finalMock/VerticalRefPos/GSC.MasterMatrix_{i}.tsv" % (config["project-folder"]),
                  i=glob_wildcards(os.path.join(checkpoint_outputMM, "VerticalRefPos.{i}")).i)   
                  
rule aggregate_MasterMatrix_finalMock:
    input:
        aggregate_inputMasterMatrix_finalMock
    output:
        "%s/MPILEUP/mpileup_finalMock/GSC.MasterMatrix.txt" % (config["project-folder"])
    benchmark:
        "%s/benchmark/aggregate_MasterMatrix_finalMock.tsv" % (config["project-folder"])
    shell:"""
        cat {input} > {output}
    """    
    
rule FilterVariants_finalMock:
    """
    Filter variants and call genotypes using the final mock.
    """
    input:
        "%s/MPILEUP/mpileup_finalMock/GSC.MasterMatrix.txt" % (config["project-folder"])
    output:
        filtered="%s/MPILEUP/mpileup_finalMock/variants/GSC.GenoMatrix.txt" % (config["project-folder"]),
        unfiltered="%s/MPILEUP/mpileup_finalMock/variants/GSC.GenoMatrix.unfiltered.txt" % (config["project-folder"])
    params:
        input=config["params"]["step7"]["input"],
        out=config["params"]["step7"]["out"],
        unfiltered=config["params"]["step7"]["unfiltered"],
        p=config["params"]["step7"]["p"],
        mnHoDepth0=config["params"]["step7"]["mnHoDepth0"],
        mnHoDepth1=config["params"]["step7"]["mnHoDepth1"],
        mnHetDepth=config["params"]["step7"]["mnHetDepth"],
        altStrength=config["params"]["step7"]["altStrength"],
        mnAlleleRatio=config["params"]["step7"]["mnAlleleRatio"],
        mnCall=config["params"]["step7"]["mnCall"],
        mnAvgDepth=config["params"]["step7"]["mnAvgDepth"],
        mxAvgDepth=config["params"]["step7"]["mxAvgDepth"],
        wd="%s/MPILEUP/mpileup_finalMock" % config["project-folder"],
        pipefolder=config["pipeline-folder"]
    log:
        "%s/logs/Perl/FilterVariants_finalMock.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/FilterVariants_finalMock.tsv" % (config["project-folder"])
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        #******PARAMETERS*****
        # -in 	Discovery master matrix input file. The output from last step 	File 	GSC.MasterMatrix.txt
        # -out 	Genotyping matrix for the population 	Output file 	GSC.GenoMatrix.txt
        # -p 	Identify SNPs only (snp) or SNPs + indels (indel) 	String 	--
        # -mnHoDepth0 	Minimum depth required for calling a homozygote when the alternative allele depth = 0 	Numeric 	5
        # -mnHoDepth1 	Minimum depth required for calling a homozygote when the alternative allele depth = 1 	Numeric 	20
        # -mnHetDepth 	Minimum depth required for each allele when calling a heterozygote 	Numeric 	3
        # -altStrength 	Across the population for a given putative bi-allelic variant, this alternate allele strength parameter is the minimum proportion of non-primary allele reads that are the secondary allele 	Numeric 	0.8
        # -mnAlleleRatio 	Minimum required ratio of less frequent allele depth to more frequent allele depth 	Numeric 	0.25
        # -mnCall 	Minimum acceptable proportion of genotyped individuals to retain a variant 	Numeric 	0.75
        # -mnAvgDepth 	Minimum average depth of an acceptable variant 	Numeric 	3
        # -mxAvgDepth 	Maximum average depth of an acceptable variant 	Numeric 	200
    
        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-7.pl -in {params.input} -out {params.out} -p {params.p} -mnHoDepth0 {params.mnHoDepth0} -mnHoDepth1 {params.mnHoDepth1} -mnHetDepth {params.mnHetDepth} -altStrength {params.altStrength} -mnAlleleRatio {params.mnAlleleRatio} -mnCall {params.mnCall} -mnAvgDepth {params.mnAvgDepth} -mxAvgDepth {params.mxAvgDepth} &> {log}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-7.pl -in {params.input} -out {params.unfiltered} -p {params.p} -mnHoDepth0 {params.mnHoDepth0} -mnHoDepth1 {params.mnHoDepth1} -mnHetDepth {params.mnHetDepth} -altStrength 0 -mnAlleleRatio 0 -mnCall 0 -mnAvgDepth 0 -mxAvgDepth 1000000 &> {log}
    """
    
rule CreateVCF_finalMock:
    """
    Create the VCF output of the pipeline for the final mock.
    """
    input:
        filtered="%s/MPILEUP/mpileup_finalMock/variants/GSC.GenoMatrix.txt" % (config["project-folder"]),
        unfiltered="%s/MPILEUP/mpileup_finalMock/variants/GSC.GenoMatrix.unfiltered.txt" % (config["project-folder"]),
        barcodes=config["barcodes-file"]
    output:
        "%s/VCF/FinalSetVariants_finalMock.vcf" % (config["project-folder"]),
    params:
        genoMatrix=config["params"]["step8"]["genoMatrix"],
        out=config["params"]["step8"]["out"],
        unfilteredOut=config["params"]["step8"]["unfiltered"],
        format=config["params"]["step8"]["format"],
        wd="%s/MPILEUP/mpileup_finalMock" % config["project-folder"],
        outdir="%s/VCF" % config["project-folder"],
        pipeout="%s/MPILEUP/mpileup_finalMock/GSC.vcf" % (config["project-folder"]),
        pipefolder=config["pipeline-folder"]
    log:
        "%s/logs/Perl/CreateVCF_finalMock.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/CreateVCF_finalMock.tsv" % (config["project-folder"])
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        #******PARAMETERS*****
        # -in 	Final genotyping matrix input file 	File 	GSC.GenoMatrix.txt
        # -out 	Output label name without extension 	String 	GSC
        # -b 	BarcodeID file name See Appendix A 	File 	--
        # -formats 	The name(s) (R, Tassel, Plink, vcf, H) for which the GBS-SNP-CROP final genotyping matrix should be converted. If more than one format is desired, the names should be separated by commas without any space, as in the examples shown below 	String 	R,T,P,V,H
     
        mkdir -p {params.outdir}
        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-8.pl -in {input.filtered} -out {params.out} -b {input.barcodes} -formats {params.format} &> {log}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-8.pl -in {input.unfiltered} -out {params.unfilteredOut} -b {input.barcodes} -formats {params.format} &> {log}
        
        cp {params.pipeout} {output}
        cd ../..
  	"""
