#!/bin/bash

#make a list of annotations that passed filtering (performed using the rename_and_filter_bed_files.sh script)
ls *.bed > annotations_with_single_cox1.txt
gsed -i 's/\.bed//g' annotations_with_single_cox1.txt
gsed -i 's/Anopheles_cf_/Anopheles_cf\._/g' annotations_with_single_cox1.txt

cat annotations_with_single_cox1.txt | while read line
do
	target=$line
	grep cox1 $target.bed | cut -f 2-3 > $target.coords.temp
	cp ../../${target}.mitogenome_scaffold.fa ./
	#get contig name from the fasta-formated genome (first line, which starts with ">")
	grep ">" $target.mitogenome_scaffold.fa  | cut -d ' ' -f 1 > $target.contig_name.txt
	gsed -i 's/>//g' $target.contig_name.txt
	paste $target.contig_name.txt $target.coords.temp > $target.coords.txt
	rm $target.contig_name.txt $target.coords.temp
	bedtools getfasta -fo $target.COX1.fa -tab -fi $target.mitogenome_scaffold.fa -bed $target.coords.txt
	gsed -i 's/\t/\n/g' $target.COX1.fa
	gsed -i -e "1d" $target.COX1.fa
#also keep track of the lengths of the mined COX1 genes	
	awk '{print length}' $target.COX1.fa >> lengths
	grep $target annotations_with_single_cox1.txt > $target.header.txt
	gsed -i 's/^/>/g' $target.header.txt
	cat $target.header.txt $target.COX1.fa > $target.COX1.new.fa
	mv $target.COX1.new.fa $target.COX1.fa
	rm $target.mitogenome_scaffold.fa $target.coords.txt $target.mitogenome_scaffold.fa.fai $target.header.txt
done

mkdir COX1_newly_annotated_mitogenomes
mv *.COX1.fa COX1_newly_annotated_mitogenomes/
paste annotations_with_single_cox1.txt lengths > annotations_with_single_cox1.cox1_length.txt
rm lengths