"""
    Just the assembly reports and nothing else
"""

__author__ = 'Daniel Park <dpark@broadinstitute.org>'

import os
shell.executable("/bin/bash")

configfile: "config.yaml"

def read_file(fname):
    with open(fname, 'rt') as inf:
        for line in inf:
            yield line.strip()

def cat_files_with_header(inFiles, outFile):
    with open(outFile, 'wt') as outf:
        header = None
        for f in inFiles:
            with open(f, 'rt') as inf:
                h = None
                for line in inf:
                    if h == None:
                        # this is the header line
                        h = line
                        if header == None:
                            # this is the first file
                            outf.write(line)
                            header = h
                        else:
                            # this is not the first file
                            if h != header:
                                raise Exception("headers do not match")
                    else:
                        outf.write(line)
 

rule all_assembly_reports:
    input:
            expand("{reports_dir}/assembly/{sample}.txt",
                reports_dir=config["reports_dir"],
                sample= list(set(list(read_file(config["samples_assembly"])) + list(read_file(config["samples_assembly_failures"])))))
    output:
            config["reports_dir"]+"/summary.assembly.txt"
    params: LSF="-N"
    run:
            cat_files_with_header(input, output[0])
        

rule assembly_report:
    output: config["reports_dir"]+'/assembly/{sample}.txt'
    resources: mem=2
    params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'),
            logid="{sample}"
    shell:  "{config[bin_dir]}/reports.py assembly_stats {wildcards.sample} {output}" \
            " --assembly_dir {config[data_dir]}/{config[subdirs][assembly]}" \
            " --assembly_tmp {config[tmp_dir]}/{config[subdirs][assembly]}" \
            " --align_dir {config[data_dir]}/{config[subdirs][align_self]}" \
            " --reads_dir {config[data_dir]}/{config[subdirs][per_sample]}" \
            " --raw_reads_dir {config[data_dir]}/{config[subdirs][source]}"
