#!/bin/bash
#use the complete spreadsheet to obtain species ID and associated nuclear genome accession

cat target_species.txt | while read line
do
	target_species=$line
	grep -w $target_species ../nuclear_genome_stats_Dec2021.csv | cut -d ',' -f 3 > download_genome_assemblies.sh
	gsed -i 's/_/ /g' download_genome_assemblies.sh

#download all accessions available for a given species
	gsed -i 's/^/ncbi-genome-download -s genbank --genera "/g' download_genome_assemblies.sh
	gsed -i 's/$/" invertebrate -o all_genome_assemblies --flat-output -F fasta/g' download_genome_assemblies.sh
	bash download_genome_assemblies.sh

	grep $target_species ../nuclear_genome_stats_Dec2021.csv | cut -d ',' -f 4 > $target_species.nuclear_accession.txt

#loop over the list of species to be included, and keep only the associated nuclear genome assembly (in cases where multiple assemblies are available for a species)
	cat $target_species.nuclear_accession.txt | while read line
	do
		accession=$line
		mv all_genome_assemblies/${accession}* ./nuclear_accessions
		mv nuclear_accessions/*.fna.gz nuclear_accessions/$target_species.nuclear_genome.fa.gz
	done
	rm $target_species.nuclear_accession.txt
done
