Source code for umap.run_bowtie

from argparse import ArgumentParser
from datetime import datetime
import os
import re
import subprocess


def subset_list(list_items, regex):
    out_list = []
    for each_item in list_items:
        RE = re.search(regex, each_item)
        if RE:
            out_list.append(each_item)
    return out_list


[docs]class BowtieWrapper: def __init__(self, kmer_dir, bowtie_dir, index_dir, index_name, job_id): """Runs Bowtie one <chrom>.<kmer>.<jobid>.kmer.gz Using the job_id, this function identifies one kmer.gz file and runs bowtie on that and saves the output to <chrom>.<kmer>.<jobid>.bowtie.gz. :param kmer_dir: Directory with <chrom>.<kmer>.<jobid>.kmer.gz files :param bowtie_dir: Directory with Bowtie 1.1.0 executable files. :param index_dir: Directory with Bowtie index :param index_name: Name used for generating Bowtie index files :param int job_id: will be used for finding kmer.gz file :returns: Saves the output to a file in the same directory as kmer_dir """ self.kmer_dir = kmer_dir self.bowtie_dir = bowtie_dir self.index_dir = index_dir self.index_name = index_name self.job_id = job_id self.execute_bowtie_command() def execute_bowtie_command(self): """The only method of BowtieWrapper Will be executed automatically by BowtieWrapper :raises ValueError: If job_id is out of expected range """ kmer_names = ["{}/{}".format(self.kmer_dir, each_kmer) for each_kmer in subset_list(os.listdir(self.kmer_dir), ".kmer.gz$")] kmer_names.sort() LongIndex = False short_ind_path = "{}/{}.1.ebwtl".format( self.index_dir, self.index_name) if os.path.exists(short_ind_path): LongIndex = True print("Switching to use of long index") if job_id <= len(kmer_names): try: kmer_file = kmer_names[job_id] except: raise ValueError( "{} does not exist. Time: {}".format( job_id, str(datetime.now()))) print("processing Kmer File {}".format(kmer_file)) # kmer_name = kmer_dir.split("/")[-1] kmer_path = "{}/{}".format(self.kmer_dir, kmer_file.split("/")[-1]) bowtie_out_path = kmer_path.replace(".kmer.gz", ".bowtie.gz") first_part_of_command = "gunzip -c {} | {}/bowtie ".format( kmer_path, self.bowtie_dir) if LongIndex: first_part_of_command = first_part_of_command +\ "--large-index " bowtiecmd = first_part_of_command +\ "{}/{} ".format(self.index_dir, self.index_name) +\ "-v 0 -k 1 -m 1 --norc --mm " +\ "-r --refidx --suppress 5,6,7,8 - " +\ "| gzip -c > {}".format(bowtie_out_path) subprocess.call(bowtiecmd, shell=True) print("Executing {}".format(bowtiecmd)) else: print("The length of files was {} but the index was {}".format( len(kmer_names), job_id))
if __name__ == "__main__": parser = ArgumentParser( description="Umap wrapper for running bowtie " "on individual k-mer files.") parser.add_argument( "kmer_dir", help="Directory containing the .kmer files") parser.add_argument( "bowtie_dir", help="Directory containing bowtie executable") parser.add_argument( "index_dir", help="Directory containing bowtie index") parser.add_argument( "index_name", help="prefix name of bowtie index") parser.add_argument( "-var_id", default="SGE_TASK_ID", help="HPC environmental variable for JOB ID") parser.add_argument( "-job_id", type=int, default=0, help="1-based index for selecting a k-mer file") args = parser.parse_args() job_id = args.job_id if job_id == 0: job_id = int(os.environ[args.var_id]) - 1 BowtieWrapper(args.kmer_dir, args.bowtie_dir, args.index_dir, args.index_name, job_id)