#!/bin/bash
cd blasting/
mkdir alignments/ blast_summary/
echo "Query_ID	Subject_ID	evalue	Alignment_length	percent_ID" > header
cat ../target_species.txt | while read line
do
	target_species=$line
	cp ../../remove_mtDNA_from_nuclear_genomes/$target_species.nuclear_genome.linearized.mtDNA_clean.fa ./
	makeblastdb -in $target_species.nuclear_genome.linearized.mtDNA_clean.fa -input_type fasta -dbtype nucl -title $target_species -parse_seqids -out $target_species
	#blast based on the COI barcode sequence
	blastn -db $target_species -query ../COX1/$target_species.COX1.fasta -out $target_species.blast_alignment.temp -perc_identity 60 -evalue 0.0001 -task blastn -outfmt 5
	grep -A 1 Hsp_qseq $target_species.blast_alignment.temp | cut -d '<' -f 2 | grep -v '^--$' > $target_species.blast_alignment.txt
	rm $target_species.blast_alignment.temp
	blastn -db $target_species -query ../COX1/$target_species.COX1.fasta -out $target_species.blast_results -perc_identity 60 -evalue 0.0001 -task blastn -outfmt "6 qseqid sseqid evalue length pident"
	#add information on the length and %id for blast alignment results 
	cut -f 4,5 $target_species.blast_results > $target_species.length_pident
	awk '{for(i=0;i<2;i++)print}' $target_species.length_pident > $target_species.length_pident.duplicated
	paste $target_species.length_pident.duplicated $target_species.blast_alignment.txt > $target_species.blast_alignment.annotated.txt
	#exclude blast hits with 100% coverage and >=99% ID
	awk '{if($2 < 99) {print $3} else if ($1 < 658) print $3}' $target_species.blast_alignment.annotated.txt > $target_species.blast_alignment.pident_filtered.txt
	#print "--" as separator between different blast hits
	awk '1; NR%2 == 0 { print "--" }' $target_species.blast_alignment.pident_filtered.txt > $target_species.blast_alignment.pident_filtered.formatted.txt
	#delete the "--" on the last line
	gsed -i '$ d' $target_species.blast_alignment.pident_filtered.formatted.txt
	mv $target_species.blast_alignment.pident_filtered.formatted.txt alignments/$target_species.blast_alignment.txt
	#apply the same 100% coverage and >=99% ID filter to the table-format results (used for NUMT counts)
	awk '{if($5 < 99) {print $0} else if ($4 < 658) print $0}' $target_species.blast_results > $target_species.blast_results.filtered.txt
	cat header $target_species.blast_results.filtered.txt > $target_species.blast_results.txt
	wc -l < $target_species.blast_results.filtered.txt >> blast_counts.txt
	mv $target_species.blast_results.txt blast_summary/
	rm $target_species.n* $target_species.blast_results
	gsed -i 's/ //g' blast_counts.txt
	rm *filtered.txt *.annotated.txt *pident.duplicated *length_pident *alignment.txt
done


