#!/usr/bin/perl -w

$usage = "perl <script_name> <name_of_file_to_look_through>";

# This script will read through a interlaced Illumina NovaSeq paired-end fastq file and de-interlace it 
# into two files: one for the 1st strand and another for the 2nd strand.  The files should remain "phased".

## Written by Adam C. Payton
## edited for NovaSeq by Sarah B. Carey


$file1 = $ARGV[0];
open IN1, "<$file1" or die "File $file1 not found program terminated \n$usage\n\n\n";


open OUT1, ">$file1\_OUTPUT_1.fastq";
open OUT2, ">$file1\_OUTPUT_2.fastq";

$count1 = 0;
$count2 = 0;

while (<IN1>)
	{	$line = $_;
		chomp $line;
		if ($line =~ m /(^\@)(\S+)(\s1\:)([\s\S]+)/)		
			{	$nextline1 = <IN1>; 
				$nextline2 = <IN1>;
				$nextline3 = <IN1>;
				print OUT1 "$line\n$nextline1$nextline2$nextline3";
				$count1 = $count1 + 1;
			}
		elsif ($line =~ m /(^\@)(\S+)(\s2\:)([\s\S]+)/)	
			{	$nextline1 = <IN1>; 
				$nextline2 = <IN1>;
				$nextline3 = <IN1>;
				print OUT2 "$line\n$nextline1$nextline2$nextline3";
				$count2 = $count2 + 1;
			}
	}
	
print "\n\n\nFinished\n\n$count1 seqences written to $file1\_OUTPUT_1.fastq\n\n$count2 sequences written to $file1\_OUTPUT_2.fastq\n\n";

close IN1;
close OUT1;
close OUT2;

