rule CreateClusterAndMockReference:
    """
    Cluster reads and assemble the Mock Reference.
    """
    input:
        expand("%s/FASTQ/TRIMMED/{samples}.R1.fq.gz" % (config["project-folder"]), samples=samples),
        barcodes=config["barcodes-file"]
    output:
        Genome="%s/FASTQ/TRIMMED/GSC.MR.Genome.fa" % (config["project-folder"]),
        Clusters="%s/FASTQ/TRIMMED/GSC.MR.Clusters.fa" % (config["project-folder"])
    params:
        libtype=config["libtype"],
        libname=config["libname"],
        pear=config["params"]["step4"]["pear"],
        vsearch=config["params"]["step4"]["vsearch"],
        cl=config["params"]["step4"]["cl"],
        rl=config["params"]["step4"]["rl"],
        pl=config["params"]["step4"]["pl"],
        p=config["params"]["step4"]["p"],
        id=config["params"]["step4"]["id"],
        db=config["params"]["step4"]["db"],
        min=config["params"]["step4"]["min"],
        MR=config["params"]["step4"]["MR"],
        wd="%s/FASTQ/TRIMMED" % config["project-folder"],
        pipefolder=config["pipeline-folder"],
        threads=config["params"]["step4"]["threads"]
    log:
        "%s/logs/CreateClusterAndMockReference.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/CreateClusterAndMockReference.tsv" % (config["project-folder"])
    conda:"envs/gbs.yaml"
    singularity: config["singularity"]["gbs"]
    shell:"""
        echo "Number of threads used:" {params.threads}

        #******PARAMETERS*****
	      # -rl: Raw GBS read length. Numeric. Default: 150
	      # pl: Minimum length required after merging to retain read. Numeric. Default: 32
	      # p: p-value for PEAR. Numeric. Default: 0.01
	      # id: Nucleotide identity value required for Vsearch read clustering. Numeric. Default: 0.93
	      # db: Activate the Vsearch derreplication process in blocks. Numeric. Default: Population size
	      # min: Minimum length (bp) for a Mock Reference cluster. Numeric. Default: 32
	      # MR: Mock Reference name. String. Default: GSC.MockRef
        
        cd {params.wd}
        perl {params.pipefolder}/scripts/GBS-SNP-CROP-4.pl -pr {params.pear} -vs {params.vsearch} -d {params.libtype} -b {input.barcodes} -t {params.threads} -cl {params.cl} -rl {params.rl} -pl {params.pl} -p {params.p} -id {params.id}  -min {params.min} -MR {params.MR} &> {log}
  	"""

if config["genome"] == "":
    pass
else:
    rule MockVsRef_alignment:
        """
        Map the mock reference clusters to the genome (minimap2).
        """
        input:
            refgenome=config["genome"],
            mockgenome="%s/FASTQ/TRIMMED/GSC.MR.Clusters.fa" % (config["project-folder"])
        output:
            "%s/SAM/mockToRef.sam" % (config["project-folder"])
        log:
            "%s/logs/MINIMAP2/mm2_map_mockVSref.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/MockVsRef_alignment.tsv" % (config["project-folder"])
        threads: 20
        conda:"envs/minimap2.yaml"
        singularity: config["singularity"]["minimap2"]
        shell:"""
           minimap2 -ax sr -I8g --split-prefix=tmp {input.refgenome} {input.mockgenome} > {output} 2> {log}
        """

if config["genome"] == "":
    pass
else:
    rule MockVsRef_samtools_SamToSortedBam:
        """
        Index the Mock Reference Genome.
        """
        input:
            ref=config["genome"],
            sam="%s/SAM/mockToRef.sam" % (config["project-folder"])
        output:
            bam="%s/BAM/Mockref/mockToRef.bam" % (config["project-folder"]),
            sorted="%s/BAM/Mockref/mockToRef.sorted.bam" % (config["project-folder"])
        log:
            "%s/logs/Samtools/MockVsRef_samtools_SamToSortedBam.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/MockVsRef_samtools_SamToSortedBam.tsv" % (config["project-folder"])
        conda:"envs/step3.yaml"
        singularity: config["singularity"]["gbs"]
        shell:"""
	    echo "df -h output:\n----------------------------\n" &>> {log}
            df -h &>> {log}
	    echo "samtools view (sam to bam)\n----------------------------\n" &>> {log}
            samtools view -b {input.sam} > {output.bam} 2>> {log}
	    echo "samtools sort:\n----------------------------\n" &>> {log}
            samtools sort {output.bam} -o {output.sorted} &>> {log}
	    echo "samtools index:\n----------------------------\n" &>> {log}
            samtools index -c {output.sorted} &>> {log}
      	"""


if config["genome"] == "":
    pass
else:  	
    rule MockVsRef_AlignmentStats:
        """
        Get the mapping stats for the mock vs. reference.
        """
        input:
            ref=config["genome"],
            sam="%s/SAM/mockToRef.sam" % (config["project-folder"]),
            sbam="%s/BAM/Mockref/mockToRef.sorted.bam" % (config["project-folder"])
        output:
            fs="%s/BAM/Mockref/mockToRef.sam.flagstat" % (config["project-folder"]),
            samflags="%s/BAM/Mockref/mockToRef.sam.samflags" % (config["project-folder"]), 
            stats="%s/BAM/Mockref/mockToRef.sam.stats" % (config["project-folder"]),
            c="%s/BAM/Mockref/mockToRef.coverage" % (config["project-folder"]),
            bed="%s/BAM/Mockref/mockToRef.bed" % (config["project-folder"]),
            bedMerged="%s/BAM/Mockref/mockToRef.merged.bed" % (config["project-folder"]),
            leftBed="%s/BAM/Mockref/mockToRef.merged.left.bed" % (config["project-folder"]),
            rightBed="%s/BAM/Mockref/mockToRef.merged.right.bed" % (config["project-folder"]),
            leftFACounts="%s/SAM/mockToRef.left.fa.counts" % (config["project-folder"]),
            rightFACounts="%s/SAM/mockToRef.right.fa.counts" % (config["project-folder"]),
            leftFA="%s/BAM/Mockref/mockToRef.merged.left.fa" % (config["project-folder"]),
            rightFA="%s/BAM/Mockref/mockToRef.merged.right.fa" % (config["project-folder"]),
            FA="%s/BAM/Mockref/mockToRef.merged.combined.fa" % (config["project-folder"]),
            FACounts="%s/BAM/Mockref/mockToRef.merged.combined.fa.counts" % (config["project-folder"])
        log:
            "%s/logs/MockVsRef_AlignmentStats.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/MockVsRef_AlignmentStats.tsv" % (config["project-folder"])
        threads: lambda cores: cpu_count()
        params: tmpdir=config["tmpdir"]
        conda:"envs/step3.yaml"
        singularity: config["singularity"]["gbs"]
        shell:"""
            df -h &> {log}
            echo "Number of threads used:" {threads}
            echo "Initial temporary folder": $TMPDIR
            TMPDIR={params.tmpdir}
            echo "Used temporary folder": $TMPDIR
            sed '/^@/d' {input.sam} | cut -f2 | sort | uniq -c > {output.samflags}
            samtools flagstat {input.sam} > {output.fs}
            samtools stats {input.sam} > {output.stats}
            samtools idxstats {input.sbam} | awk '{{print $1\" \"$3}}' > {output.c}
            bedtools genomecov -ibam {input.sbam} -bg > {output.bed}
            bedtools merge -i {output.bed} > {output.bedMerged}
            awk '{{print $1 "\\t" (($2 - 6)<0?0:($2 - 6)) "\\t" ($2==0?6:$2)}}' {output.bedMerged} > {output.leftBed}
            awk '{{print $1 "\\t" $3 "\\t" $3 + 6}}' {output.bedMerged} > {output.rightBed}
            bedtools getfasta -fi {input.ref} -bed {output.leftBed} > {output.leftFA}
            bedtools getfasta -fi {input.ref} -bed {output.rightBed} > {output.rightFA}
            
            sed '/^>/d' {output.leftFA} | awk '{{ print toupper($0) }}' | sort | uniq -c > {output.leftFACounts}
            sed '/^>/d' {output.rightFA} | awk '{{ print toupper($0) }}' | sort | uniq -c > {output.rightFACounts}

            paste -d '+' {output.leftFA} {output.rightFA} > {output.FA}
            sed '/^>/d' {output.FA} | awk '{{ print toupper($0) }}' | sort | uniq -c > {output.FACounts}
      	"""

if config["genome"] == "":
    pass
else:
    rule MockVsRef_SortedBamToMpileup:
        """
        Get Mpileup for mockref against Reference Genome.
        """
        input:
            bam="%s/BAM/Mockref/mockToRef.sorted.bam" % (config["project-folder"]),
            refgenome=config["genome"]
        output:
            "%s/MPILEUP/mpileup_mockToRef/mockToRef.mpileup" % (config["project-folder"])
        log:
            "%s/logs/minimap2-SortedBamToMpileup.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/MockVsRef_SortedBamToMpileup.tsv" % (config["project-folder"])
        conda:"envs/samtools.yaml"
        singularity: config["singularity"]["gbs"]
        params:
            threads=config["params"]["step5d"]["threads"],
            Q=config["params"]["step5d"]["Q"],
            q=config["params"]["step5d"]["q"]
        shell:"""
            df -h &> {log}
            samtools mpileup -Q {params.Q} -q {params.q} -B -C 50 -f {input.refgenome} {input.bam} > {output}
      	"""

##########################################################
##
## Mock Reference refinement
##
##########################################################

rule refine_mock_reference:
    """
    Refine the mock reference (R).
    """
    input:
        clusters="%s/FASTQ/TRIMMED/GSC.MR.Clusters.fa" % (config["project-folder"]),
        script=ancient(config["refinement-script"]),
        c=expand("%s/FASTQ/TRIMMED/alignments_clusters/{samples}.coverage" % (config["project-folder"]), samples=samples)
    output:
        "%s/MockReference/MockReference.fa" % (config["project-folder"])
    log:
        "%s/logs/R/refine_mock_reference.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/refine_mock_reference.tsv" % (config["project-folder"])
    singularity: config["singularity"]["r-gbs"]
    params:
       projFolder=config["project-folder"],
       pipeFolder=config["pipeline-folder"],
       minTotalReadCoverage=config["mockref"]["TotalReadCoverage"],
       minSampleCoverage=config["mockref"]["minSampleCoverage"]
    shell:"""
       R -e "projFolder <- '{params.projFolder}'; \
             mockClusters.file <- '{input.clusters}'; \
             minTotalReadCoverage <- '{params.minTotalReadCoverage}'; \
             minSampleCoverage <- '{params.minSampleCoverage}'; \
             mockClusters.refined.file <- '{output}'; \
             snakemake <- TRUE;\
             source('{input.script}')" &> {log}
    """

if config["genome"] == "":
    pass
else:
    rule FinalMockVsRef_alignment:
        """
        Map the mock reference clusters to the genome (minimap2).
        """
        input:
            refgenome=config["genome"],
            mockgenome="%s/MockReference/MockReference.fa" % (config["project-folder"])
        output:
            "%s/SAM/finalMockToRef.sam" % (config["project-folder"])
        log:
            "%s/logs/MINIMAP2/mm2_map_finalMockVSref.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/FinalMockVsRef_alignment.tsv" % (config["project-folder"])
        threads: 20
        singularity: config["singularity"]["minimap2"]
        shell:"""
           minimap2 -ax sr -I8g --split-prefix=tmp {input.refgenome} {input.mockgenome} > {output} 2> {log}
        """

if config["genome"] == "":
    pass
else:
    rule FinalMockVsRef_samtools_SamToSortedBam:
        """
        Index the Mock Reference Genome.
        """
        input:
            ref=config["genome"],
            sam="%s/SAM/finalMockToRef.sam" % (config["project-folder"])
        output:
            bam="%s/BAM/FinalMockref/mockToRef.bam" % (config["project-folder"]),
            sorted="%s/BAM/FinalMockref/mockToRef.sorted.bam" % (config["project-folder"])
        log:
            "%s/logs/Samtools/FinalMockVsRef_samtools_SamToSortedBam.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/FinalMockVsRef_samtools_SamToSortedBam.tsv" % (config["project-folder"])
        singularity: config["singularity"]["gbs"]
        shell:"""
	    echo "df -h output:\n----------------------------\n" &>> {log}
            df -h &>> {log}
	    echo "samtools view (sam to bam)\n----------------------------\n" &>> {log}
            samtools view -b {input.sam} > {output.bam} 2>> {log}
	    echo "samtools sort:\n----------------------------\n" &>> {log}
            samtools sort {output.bam} -o {output.sorted} &>> {log}
	    echo "samtools index:\n----------------------------\n" &>> {log}
            samtools index -c {output.sorted} &>> {log}
      	"""


if config["genome"] == "":
    pass
else:  	
    rule FinalMockVsRef_AlignmentStats:
        """
        Get the mapping stats for the mock vs. reference.
        """
        input:
            ref=config["genome"],
            sam="%s/SAM/finalMockToRef.sam" % (config["project-folder"]),
            sbam="%s/BAM/FinalMockref/mockToRef.sorted.bam" % (config["project-folder"])
        output:
            fs="%s/BAM/FinalMockref/mockToRef.sam.flagstat" % (config["project-folder"]),
            samflags="%s/BAM/FinalMockref/mockToRef.sam.samflags" % (config["project-folder"]), 
            stats="%s/BAM/FinalMockref/mockToRef.sam.stats" % (config["project-folder"]),
            c="%s/BAM/FinalMockref/mockToRef.coverage" % (config["project-folder"]),
            bed="%s/BAM/FinalMockref/mockToRef.bed" % (config["project-folder"]),
            bedMerged="%s/BAM/FinalMockref/mockToRef.merged.bed" % (config["project-folder"]),
            leftBed="%s/BAM/FinalMockref/mockToRef.merged.left.bed" % (config["project-folder"]),
            rightBed="%s/BAM/FinalMockref/mockToRef.merged.right.bed" % (config["project-folder"]),
            leftFACounts="%s/SAM/finalMockToRef.left.fa.counts" % (config["project-folder"]),
            rightFACounts="%s/SAM/finalMockToRef.right.fa.counts" % (config["project-folder"]),
            leftFA="%s/BAM/FinalMockref/mockToRef.merged.left.fa" % (config["project-folder"]),
            rightFA="%s/BAM/FinalMockref/mockToRef.merged.right.fa" % (config["project-folder"]),
            FA="%s/BAM/FinalMockref/mockToRef.merged.combined.fa" % (config["project-folder"]),
            FACounts="%s/BAM/FinalMockref/mockToRef.merged.combined.fa.counts" % (config["project-folder"])
        log:
            "%s/logs/FinalMockVsRef_AlignmentStats.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/FinalMockVsRef_AlignmentStats.tsv" % (config["project-folder"])
        threads: lambda cores: cpu_count()
        params: tmpdir=config["tmpdir"]
        conda:"envs/step3.yaml"
        singularity: config["singularity"]["gbs"]
        shell:"""
            df -h &> {log}
            echo "Number of threads used:" {threads}
            echo "Initial temporary folder": $TMPDIR
            TMPDIR={params.tmpdir}
            echo "Used temporary folder": $TMPDIR
            sed '/^@/d' {input.sam} | cut -f2 | sort | uniq -c > {output.samflags}
            samtools flagstat {input.sam} > {output.fs}
            samtools stats {input.sam} > {output.stats}
            samtools idxstats {input.sbam} | awk '{{print $1\" \"$3}}' > {output.c}
            bedtools genomecov -ibam {input.sbam} -bg > {output.bed}
            bedtools merge -i {output.bed} > {output.bedMerged}
            awk '{{print $1 "\\t" (($2 - 6)<0?0:($2 - 6)) "\\t" ($2==0?6:$2)}}' {output.bedMerged} > {output.leftBed}
            awk '{{print $1 "\\t" $3 "\\t" $3 + 6}}' {output.bedMerged} > {output.rightBed}
            bedtools getfasta -fi {input.ref} -bed {output.leftBed} > {output.leftFA}
            bedtools getfasta -fi {input.ref} -bed {output.rightBed} > {output.rightFA}
            
            sed '/^>/d' {output.leftFA} | awk '{{ print toupper($0) }}' | sort | uniq -c > {output.leftFACounts}
            sed '/^>/d' {output.rightFA} | awk '{{ print toupper($0) }}' | sort | uniq -c > {output.rightFACounts}

            paste -d '+' {output.leftFA} {output.rightFA} > {output.FA}
            sed '/^>/d' {output.FA} | awk '{{ print toupper($0) }}' | sort | uniq -c > {output.FACounts}
      	"""



if config["genome"] == "":
    pass
else:  	
    rule Mockrefloci_vs_samplealignments:
        """
        Intersect positions of mock reference on reference against the sample alignments
        """
        input:
            ref=config["genome"],
            bed="%s/BAM/FinalMockref/mockToRef.bed" % (config["project-folder"]),
            gtf="%s/Stringtie/{samples}.stringtie.gtf" % (config["project-folder"]),
            left="%s/FASTQ/TRIMMED/alignments_reference/{samples}.flanking_left" % (config["project-folder"]),
            right="%s/FASTQ/TRIMMED/alignments_reference/{samples}.flanking_right" % (config["project-folder"])
        output:
            inter="%s/Stringtie/{samples}.intersected_with_mockOnRef_loci.is" % (config["project-folder"]),
            left="%s/FASTQ/TRIMMED/alignments_reference/{samples}.flanking_left.fa" % (config["project-folder"]),
            right="%s/FASTQ/TRIMMED/alignments_reference/{samples}.flanking_right.fa" % (config["project-folder"])
        log:
            "%s/logs/Mockrefloci_vs_samplealignments.{samples}.log" % (config["project-folder"])
        benchmark:
            "%s/benchmark/Mockrefloci_vs_samplealignments.{samples}.tsv" % (config["project-folder"])
        params: tmpdir=config["tmpdir"]
        threads: lambda cores: cpu_count()
        singularity: config["singularity"]["gbs"]
        shell:"""
            bedtools intersect -wa -a {input.bed} -b {input.gtf} > {output.inter}
            TMPDIR={params.tmpdir}

            bedtools getfasta -fi {input.ref} -bed {input.left} > {output.left}
            bedtools getfasta -fi {input.ref} -bed {input.right} > {output.right}
        """
