# vim: set filetype=sh :

def get_reference_files(wildcards):
    if config["genome"] == "":
        return []
    else:
        path = config["project-folder"]
        file1 = "/References/full_inSilico_reference.fa"
        file2 = "/References/sizeSelected_inSilico_reference.fa"
        file3 = "/BAM/Mockref/mockToRef.sorted.bam"
        file4 = "/BAM/Mockref/mockToRef.sam.samflags"
        file5 = "/MPILEUP/mpileup_reference/GSC.vcf.fa"
        file6 = "/BAM/FinalMockref/mockToRef.sam.flagstat"
        file7 = "/VCF/FinalSetVariants_referenceGenome.vcf"
        file8 = "/MPILEUP/mpileup_reference/variants/GSC.GenoMatrix.txt"
        file9 = "/MPILEUP/mpileup_reference/GSC.vcf.fa"
        file10 = "/BAM/mockVariantsToReference/mockVariantsToReference.bam"

        output = [path + file1, path + file2, path + file3, path + file4, path + file5, path + file6, path + file7, path + file8, path + file9, path + file10]
        return output   

def get_reference_expand_files(wildcards):
    if config["genome"] == "":
        return []
    else:    
        samples = set(samplesheet["sample_name"])
        path = config["project-folder"]
        output1 = [path + "/FASTQ/TRIMMED/alignments_reference/" + x + ".coverage" for x in samples]
        output2 = [path + "/FASTQ/TRIMMED/alignments_reference/" + x + ".sorted.bam" for x in samples]
        output3 = [path + "/FASTQ/TRIMMED/alignments_reference/" + x + ".sam.flagstat" for x in samples]
        output4 = [path + "/FASTQ/TRIMMED/alignments_reference/" + x + ".paired_alignments" for x in samples]
        output5 = [path + "/FASTQ/TRIMMED/alignments_reference/" + x + ".flanking_left.fa" for x in samples]
        output6 = [path + "/FASTQ/TRIMMED/alignments_reference/" + x + ".flanking_right.fa" for x in samples]
        output7 = [path + "/BAM/Insilico/full/" + x + ".sam.flagstat" for x in samples]
        output8 = [path + "/BAM/Insilico/full/" + x + ".coverage" for x in samples]
        output9 = [path + "/BAM/Insilico/selected/" + x + ".sam.flagstat" for x in samples]
        output10 = [path + "/BAM/Insilico/selected/" + x + ".coverage" for x in samples]
        output11 = [path + "/QUANTIFICATION/Reference_contigs/" + x + "_reference_contigs_fc.txt" for x in samples]
        output12 = [path + "/Stringtie/" + x + ".intersected_with_mockOnRef_loci.is" for x in samples]
    
        output = output1 + output2 + output3 + output4 + output5 + output6 + output7 + output8 + output9 + output10 + output11 + output12
        return output
   
   
rule R_finalReport:
    """
    Create the final report (R).
    """
    input:
        get_reference_files,
        get_reference_expand_files,
        script=config["report-script"],
        fa="%s/FASTQ/TRIMMED/GSC.vcf.fa" % (config["project-folder"]),
        mockFlagstats=expand("%s/FASTQ/TRIMMED/alignments/{samples}.sam.flagstat" % (config["project-folder"]), samples=samples),
        c=expand("%s/FASTQ/TRIMMED/alignments_clusters/{samples}.coverage" % (config["project-folder"]), samples=samples),
        mqcraw="%s/QC/RAW/multiqc_R1/" % (config["project-folder"]),
        mqcconcatenated="%s/QC/CONCATENATED/multiqc_R1/" % (config["project-folder"]),
        mqctrimmed="%s/QC/TRIMMED/multiqc_R1/" % (config["project-folder"]),
        qdRaw=expand("%s/QC/RAW/{rawsamples}_R1_qualdist.txt" % (config["project-folder"]), rawsamples=rawsamples),
        qdConc=expand("%s/QC/CONCATENATED/{samples}_R1_qualdist.txt" % (config["project-folder"]), samples=samples),
        qdTrimmed=expand("%s/QC/TRIMMED/{samples}_R1_qualdist.txt" % (config["project-folder"]), samples=samples),
        fsFinal=expand("%s/BAM/alignments_finalMock/{samples}.sam.flagstat" % (config["project-folder"]), samples=samples),
        finalSetVariants="%s/VCF/FinalSetVariants_finalMock.vcf" % (config["project-folder"]),
        unfinalVcf="%s/FASTQ/TRIMMED/GSC.vcf" % (config["project-folder"]),
        cFinalMock=expand("%s/BAM/alignments_finalMock/{samples}.coverage" % (config["project-folder"]), samples=samples)
    output:
        "%s/finalReport.html" % (config["project-folder"])
    log:
        "%s/logs/R/finalReport.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/R_finalReport.tsv" % (config["project-folder"])
    singularity:
        config["singularity"]["r-gbs"]
    params:
       projFolder=config["project-folder"],
       pipeFolder=config["pipeline-folder"],
       pipeConfig=config["pipeline-config"],
       serverConfig=config["server-config"],
       refGenome=config["genome"],
       fullInsilico=config["full-insilico-genome"],
       selectedInsilico=config["selected-insilico-genome"],
       enz1=config["enz1"],
       enz2=config["enz2"],
       minLength=config["minLength"],
       maxLength=config["maxLength"]
    shell:"""
       R -e "projFolder <- '{params.projFolder}'; \
             pipelineFolder <- '{params.pipeFolder}'; \
             refGenome.file <- '{params.refGenome}'; \
             pipelineConfig.file <- '{params.pipeConfig}'; \
             serverConfig.file <- '{params.serverConfig}'; \
             enz1 <- '{params.enz1}'; \
             enz2 <- '{params.enz2}'; \
             minLength <- '{params.minLength}'; \
             maxLength <- '{params.maxLength}'; \
             fullInsilico <- '{params.fullInsilico}'; \
             selectedInsilico <- '{params.selectedInsilico}'; \
             snakemake <- TRUE;\
             options(knitr.duplicate.label = 'allow');\
             source('{input.script}')" &> {log}
    """
    
rule R_QCReport:
    """
    Create the final report (R).
    """
    input:
        script=config["qc-script"],
        mqcraw="%s/QC/RAW/multiqc_R1/" % (config["project-folder"]),
        mqcconcatenated="%s/QC/CONCATENATED/multiqc_R1/" % (config["project-folder"]),
        mqctrimmed="%s/QC/TRIMMED/multiqc_R1/" % (config["project-folder"]),
        qdRaw=expand("%s/QC/RAW/{rawsamples}_R1_qualdist.txt" % (config["project-folder"]), rawsamples=rawsamples),
        qdConc=expand("%s/QC/CONCATENATED/{samples}_R1_qualdist.txt" % (config["project-folder"]), samples=samples),
        qdTrimmed=expand("%s/QC/TRIMMED/{samples}_R1_qualdist.txt" % (config["project-folder"]), samples=samples),
    output:
        "%s/QC-Report.html" % (config["project-folder"])
    log:
        "%s/logs/R/QC-Report.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/R_QCReport.tsv" % (config["project-folder"])
    singularity:
        config["singularity"]["r-gbs"]
    params:
       projFolder=config["project-folder"],
       pipeFolder=config["pipeline-folder"],
       pipeConfig=config["pipeline-config"],
       serverConfig=config["server-config"],
       refGenome=config["genome"],
       enz1=config["enz1"],
       enz2=config["enz2"],
       minLength=config["minLength"],
       maxLength=config["maxLength"]
    shell:"""
       echo "Creating the QC-Report"
       echo "###################################################################"
       echo ""
       
       R -e "projFolder <- '{params.projFolder}'; \
             pipelineFolder <- '{params.pipeFolder}'; \
             refGenome.file <- '{params.refGenome}'; \
             pipelineConfig.file <- '{params.pipeConfig}'; \
             serverConfig.file <- '{params.serverConfig}'; \
             enz1 <- '{params.enz1}'; \
             enz2 <- '{params.enz2}'; \
             minLength <- '{params.minLength}'; \
             maxLength <- '{params.maxLength}'; \
             snakemake <- TRUE;\
             options(knitr.duplicate.label = 'allow');\
             source('{input.script}')" &> {log}
    """

def get_vc_files(wildcards):
    if config["genome"] == "":
        return []
    else:
        path = config["project-folder"]
        file1 = "/VCF/FinalSetVariants_referenceGenome.vcf"

        output = [path + file1]
        return output   
    
rule R_VariantCallingReport:
    """
    Create the final report (R).
    """
    input:
        get_vc_files,
        script=config["variant-script"],
        mockvcf="%s/VCF/FinalSetVariants_finalMock.vcf" % (config["project-folder"]),
        unfinalVcf="%s/FASTQ/TRIMMED/GSC.vcf" % (config["project-folder"])
    output:
        "%s/VariantCalling-Report.html" % (config["project-folder"])
    log:
        "%s/logs/R/VariantCalling-Report.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/R_VariantCallingReport.tsv" % (config["project-folder"])
    singularity:
        config["singularity"]["r-gbs"]
    params:
       projFolder=config["project-folder"],
       pipeFolder=config["pipeline-folder"],
       pipeConfig=config["pipeline-config"],
       serverConfig=config["server-config"],
       refGenome=config["genome"],
       enz1=config["enz1"],
       enz2=config["enz2"],
       minLength=config["minLength"],
       maxLength=config["maxLength"]
    shell:"""
       echo "Creating the VariantCalling-Report"
       echo "###################################################################"
       echo ""
       
       R -e "projFolder <- '{params.projFolder}'; \
             pipelineFolder <- '{params.pipeFolder}'; \
             refGenome.file <- '{params.refGenome}'; \
             pipelineConfig.file <- '{params.pipeConfig}'; \
             serverConfig.file <- '{params.serverConfig}'; \
             enz1 <- '{params.enz1}'; \
             enz2 <- '{params.enz2}'; \
             minLength <- '{params.minLength}'; \
             maxLength <- '{params.maxLength}'; \
             snakemake <- TRUE;\
             options(knitr.duplicate.label = 'allow');\
             source('{input.script}')" &> {log}
    """

rule R_MockEval:
    """
    Create the mock evaluation report (R).
    """
    input:
        "%s/MockReference/MockReference.fa" % (config["project-folder"]),
        "%s/FASTQ/TRIMMED/GSC.MR.Clusters.fa" % (config["project-folder"]),
        expand("%s/BAM/alignments_finalMock/{samples}.sam.flagstat" % (config["project-folder"]), samples=samples),
        expand("%s/FASTQ/TRIMMED/alignments_clusters/{samples}.sam.flagstat" % (config["project-folder"]), samples=samples),
        script=config["mockeval-script"]
    output:
        "%s/mockEvalReport.html" % (config["project-folder"])
    log:
        "%s/logs/R/mockEvalReport.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/R_MockEval.tsv" % (config["project-folder"])
    singularity:
        config["singularity"]["r-gbs"]
    params:
       projFolder=config["project-folder"],
       pipeFolder=config["pipeline-folder"],
       pipeConfig=config["pipeline-config"],
       serverConfig=config["server-config"],
       enz1=config["enz1"],
       enz2=config["enz2"],
       minLength=config["minLength"],
       maxLength=config["maxLength"]
    shell:"""
       R -e "projFolder <- '{params.projFolder}'; \
             pipelineFolder <- '{params.pipeFolder}'; \
             pipelineConfig.file <- '{params.pipeConfig}'; \
             serverConfig.file <- '{params.serverConfig}'; \
             enz1 <- '{params.enz1}'; \
             enz2 <- '{params.enz2}'; \
             minLength <- '{params.minLength}'; \
             maxLength <- '{params.maxLength}'; \
             snakemake <- TRUE;\
             options(knitr.duplicate.label = 'allow');\
             rmarkdown::render('{input.script}',output_file='{output}')" &> {log}
    """

rule R_InsilicoReport:
    """
    Create the in-silico report (R).
    """
    input:
        script=config["insilico-report-script"],
        fsFull=expand("%s/BAM/Insilico/full/{samples}.sam.flagstat" % (config["project-folder"]), samples=samples),
        cFull=expand("%s/BAM/Insilico/full/{samples}.coverage" % (config["project-folder"]), samples=samples),
        fsSelected=expand("%s/BAM/Insilico/selected/{samples}.sam.flagstat" % (config["project-folder"]), samples=samples),
        cSelected=expand("%s/BAM/Insilico/selected/{samples}.coverage" % (config["project-folder"]), samples=samples)
    output:
        "%s/Insilico-Report.html" % (config["project-folder"])
    log:
        "%s/logs/R/InsilicoCalling-Report.log" % (config["project-folder"])
    benchmark:
        "%s/benchmark/R_InsilicoReport.tsv" % (config["project-folder"])
    singularity:
        config["singularity"]["r-gbs"]
    params:
       projFolder=config["project-folder"],
       pipeFolder=config["pipeline-folder"],
       pipeConfig=config["pipeline-config"],
       serverConfig=config["server-config"],
       refGenome=config["genome"],
       fullInsilico=config["full-insilico-genome"],
       selectedInsilico=config["selected-insilico-genome"],
       enz1=config["enz1"],
       enz2=config["enz2"],
       minLength=config["minLength"],
       maxLength=config["maxLength"]
    shell:"""
       echo "Creating the Insilico-Report"
       echo "###################################################################"
       echo ""
       
       R -e "projFolder <- '{params.projFolder}'; \
             pipelineFolder <- '{params.pipeFolder}'; \
             refGenome.file <- '{params.refGenome}'; \
             pipelineConfig.file <- '{params.pipeConfig}'; \
             serverConfig.file <- '{params.serverConfig}'; \
             enz1 <- '{params.enz1}'; \
             enz2 <- '{params.enz2}'; \
             minLength <- '{params.minLength}'; \
             maxLength <- '{params.maxLength}'; \
             fullInsilico <- '{params.fullInsilico}'; \
             selectedInsilico <- '{params.selectedInsilico}'; \
             snakemake <- TRUE;\
             options(knitr.duplicate.label = 'allow');\
             source('{input.script}')" &> {log}
    """
