min_qual = 3
kmer_size = 24
nreads = 500000
enzyme = "DpnII"
reference = params.reference
out_dir = params.out_dir

frag_size = [150, 200, 250, 300, 350, 400, 500]
efficiency = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75]
seeds = [3053294, 6266726, 6198264, 1818471, 2388358, 7334410, 3214416, 3253953, 6210985, 2345205, 8771108, 5115437, 9334143, 3119798, 7631033, 4644819, 3229084, 6240994, 4259812, 1005970]
sample_range = [12500, 25000, 50000, 100000, 250000, 500000]

process simulate {
    conda '/home/ubuntu/miniconda3/envs/sim3c_py3'
    cpus 1
    publishDir out_dir, mode: 'symlink', overwrite: true, saveAs: {fn -> "raw/${kmer_size}/${s}/${eff}/${fsize}/${fn}"}

    input:
    each eff from efficiency
    each s from seeds
    each fsize from frag_size

    output:
    tuple s, eff, fsize, path("reads.fq.gz"), path("sim3C.log") into simulated_reads

    """
    sim3C -v --seed $s --dist equal --profile-name profile.tsv \
        --machine-profile DRR -l 150 -e $enzyme -n $nreads --insert-min 120 --insert-max ${fsize + 90} \
        --efficiency $eff --insert-mean $fsize --insert-sd 30 --linear $reference reads.fq
    pigz -p2 reads.fq
    """
}

process downsample {
    conda '/home/ubuntu/miniconda3/envs/qc3c'
    cpus 4
    publishDir out_dir, mode: 'symlink', overwrite: true, pattern: "*.fq.gz", saveAs: {fn -> "raw/${kmer_size}/${s}/${eff}/${fsize}/${max_samples}/${fn}"}

    input:
    tuple s, eff, fsize, path(reads), path(log) from simulated_reads
    each max_samples from sample_range

    output:
    tuple s, eff, fsize, max_samples, path('sampled.fq.gz') into sampled_reads


    """
    reformat.sh threads=${task.cpus} sampleseed=$s samplereadstarget=${max_samples} int=t in=$reads out=sampled.fq.gz
    """
}

sampled_reads.into{to_merge; to_clean}

process mergefq {
    conda '/home/ubuntu/miniconda3/envs/qc3c'
    cpus 4
    publishDir out_dir, mode: 'symlink', overwrite: true, saveAs: {fn -> "merge/${kmer_size}/${s}/${eff}/${fsize}/${max_samples}/${fn}"}

	input:
	tuple s, eff, fsize, max_samples, path(reads) from to_merge

	output:
	tuple s, eff, fsize, max_samples, path("merged.fq.gz"), path('fastp.log') into merged_reads

	"""
	fastp -w ${task.cpus} -i $reads --interleaved_in --include_unmerged --merge --merged_out merged.fq.gz 2> fastp.log
	"""
}



process merged_mkdb {
    conda '/home/ubuntu/miniconda3/envs/qc3c'
	cpus 2
	publishDir out_dir, mode: 'symlink', overwrite: true, pattern: "*.jf", saveAs: {fn -> "merge/${kmer_size}/${s}/${eff}/${fsize}/${max_samples}/jf/${fn}"}

	input:
	tuple s, eff, fsize, max_samples, path(reads), path(log) from merged_reads

	output:
	tuple s, eff, fsize, max_samples, path(reads), path("lib.jf") into kmer_dbs

	"""
	qc3C mkdb --min-quality $min_qual --kmer-size $kmer_size -t ${task.cpus} -l lib.jf -r $reads
	"""
}

process merged_kmer {
    conda '/home/ubuntu/miniconda3/envs/qc3c'
	cpus 1
	publishDir out_dir, mode: 'copy', overwrite: true, saveAs: {fn -> "merge/${kmer_size}/${s}/${eff}/${fsize}/${max_samples}/kmer_qc/${fn}"}

	input:
	tuple s, eff, fsize, max_samples, path(reads), path(lib) from kmer_dbs

	output:
	set s, eff, fsize, max_samples, file('qc3C.log'), file("report.*") into kmer_qc_out

	"""
	qc3C kmer -v -t ${task.cpus} -s $s -m $fsize -e $enzyme -l $lib -r $reads
	"""
}

process clean {
    conda '/home/ubuntu/miniconda3/envs/qc3c'
    cpus 4
    publishDir out_dir, mode: 'symlink', overwrite: true, saveAs: {fn -> "unmerged/${kmer_size}/${s}/${eff}/${fsize}/${max_samples}/${fn}"}

        input:
        tuple s, eff, fsize, max_samples, path(reads) from to_clean

        output:
        tuple s, eff, fsize, max_samples, path("cleaned.fq.gz"), path('fastp.log') into cleaned_reads

        """
        fastp -w ${task.cpus} -i $reads --interleaved_in -o cleaned_R1.fq.gz -O cleaned_R2.fq.gz 2> fastp.log
        reformat.sh in1=cleaned_R1.fq.gz in2=cleaned_R2.fq.gz out=cleaned.fq.gz
        """
}

process cleaned_mkdb {
    conda '/home/ubuntu/miniconda3/envs/qc3c'
	cpus 4
	publishDir out_dir, mode: 'symlink', overwrite: true, pattern: "*.jf", saveAs: {fn -> "unmerged/${kmer_size}/${s}/${eff}/${fsize}/${max_samples}/jf/${fn}"}

	input:
	tuple s, eff, fsize, max_samples, path(reads), path(log) from cleaned_reads

	output:
	tuple s, eff, fsize, max_samples, path(reads), path("lib.jf") into clean_kmer

	"""
	qc3C mkdb --min-quality $min_qual --kmer-size $kmer_size -t ${task.cpus} -l lib.jf -r $reads
	"""
}

process clean_qc3C_kmer {
    conda '/home/ubuntu/miniconda3/envs/qc3c'
	cpus 1
	publishDir out_dir, mode: 'symlink', overwrite: true, saveAs: {fn -> "unmerged/${kmer_size}/${s}/${eff}/${fsize}/${max_samples}/kmer_qc/${fn}"}

	input:
	tuple s, eff, fsize, max_samples, path(reads), path(lib) from clean_kmer

	output:
	set s, eff, fsize, max_samples, file('qc3C.log'), file("report.*") into clean_kmer_out

	"""
	qc3C kmer -v -t ${task.cpus} -s $s -m $fsize -e $enzyme -l $lib -r $reads
	"""
}
