#!/usr/bin/env python

from Bio import SeqIO
import argparse
import sys

parser = argparse.ArgumentParser(description="This script takes a multifasta as input and filters out sequences based on length.")

required = parser.add_argument_group('required arguments')

required.add_argument("-i", "--input_fasta", help="Original fasta file", action="store", dest="input_fasta")
required.add_argument("-m", "--min_length", help="minimum length retained", action="store", dest="min_len")
required.add_argument("-M", "--max_length", help="maximum length retained", action="store", dest="max_len")
parser.add_argument("-o", "--output_file", help='name of output fasta file (default: "filtered.fasta")', action="store", dest="output_file", default="filtered.fasta")
parser.add_argument("-q", "--quiet", help="don't report percentage of retained sequences", action = "store_true")

if len(sys.argv)==1:
  parser.print_help(sys.stderr)
  sys.exit(0)

args = parser.parse_args()

in_fasta = open(args.input_fasta, "r")
out_file = open(args.output_file, "w")
min_len = args.min_len
max_len = args.max_len

total=0
kept=0

for seq_record in SeqIO.parse(in_fasta, "fasta"):

  total+=1

  if len(seq_record.seq) >= int(min_len) and len(seq_record.seq) <= int(max_len):

    kept+=1
    out_file.write(">" + str(seq_record.description) + "\n" + str(seq_record.seq) + "\n")


if not args.quiet:
  
  perc = round(float(kept) / float(total) * 100, 2)
  print("\n\tRetained " + str(kept) + " sequences of the initial " + str(total) + " (" + str(perc) + "%).\n")



in_fasta.close()
out_file.close()
