#!/usr/bin/perl -w

=head1 Description

  
  Read a fasta file with multiple entries (parameter 1)
  Split this file into parts of maximum X (given as parameter 2)
     - Kb (parameter 3 = Kb)
     - entries (parameter 3 = entries)
  If fasta file is toto.tfa, parts will be named toto_0001.tfa ... toto_xxxx.tfa

=cut

BEGIN
{
  ($prg)=($0=~/([^\/]+)$/);
  $dir=$0;
  $dir=~s/$prg$//;
  push @INC, $dir;
}


use FindBin;
use lib $FindBin::RealBin;
#use bioutils;

#------------------------------------------------------------

sub usage( $ )
  {
    printf STDERR "%s\n", $_[0];
    system("pod2text $0");
    exit(1);
  }

#------------------------------------------------------------

($#ARGV == 2) || &usage("bad parameter");

($ARGV[1] =~ /^\d+\.?\d*$/) || &usage("parameter is not a number: $ARGV[1]");

($ARGV[2] eq 'Kb' || $ARGV[2] eq 'entries') || &usage("bad size type $ARGV[2]");

if ($ARGV[2] eq 'Kb')
  {
    $max_length = $ARGV[1]*1000;
  }
else
  {
    $max_entries = $ARGV[1];
  }

$current_length = 0;
$nb_entries = 0;
if ($ARGV[0] =~ /\|\s*$/ || $ARGV[0] eq '-')
  {
	$basename = "split$$";
  }
else
  {
	$basename = $ARGV[0];
	$basename =~ s/\.\w+$//g;
	($basename) = ($basename =~ /([^\/]+)$/);
  }
$part_no = 1;

$inseq=0;

# try to open input file
open(FIN,$ARGV[0]) || open(FIN,"cat $ARGV[0]|") || die "Can't open $ARGV[0]";

# open output file
open(FOUT,">${basename}_0001.tfa") || die "can't create file ${basename}_0001.tfa";

while (<FIN>)
  {
    chomp;
    
    if (/^>/)
      {
	if ($inseq == 1)
	  {
	    if (($ARGV[2] eq 'Kb' && $current_length > $max_length) ||
		($ARGV[2] eq 'entries' && $nb_entries == $max_entries))
	      {
		close(FOUT);
		$part_no++;
		$current_length = 0;
		$nb_entries = 0;
		$part_name = sprintf("%s_%04d.tfa",$basename, $part_no);
		open(FOUT,">$part_name") || die "Can't create file $part_name";
	      }
	  }
	$inseq=1;
	$nb_entries++;
	    
	print FOUT "$_\n";
	next;
      }
    if ($inseq == 1)
    {
      print FOUT "$_\n";
      $current_length += length($_);
    }
  }

close(FOUT);
close(FIN);