#!/bin/bash
#download_genome_files_from_ncbi

work_dir="/path/to/Example_Project/Retrieve_raw_genome/download_example"
edirect_dir="/path/to/efetch" #efetch_dir
assembly_accession_id_file="Accession_id_file" 
ParaFly_dir="/path/to/ParaFly"
threads="Number of threads"
cd $work_dir
if [ ! -d $work_dir ];then mkdir $work_dir;fi
i=0
cat $assembly_accession_id_file | while read -r line
do
    [[ $line == \#* ]] && continue
    [[ $line == "" ]] && continue
    IFS=$'\t' read -r sample_name assembly_accession_id  <<<"$line"
    i=$((i+1))
    if [[ -z $sample_name ]]
    then
        sample_name="sample_${i}"
    fi
    echo "retrieve genome assembly by the NCBI genome assembly accession_id: $assembly_accession_id for the sample $sample_name ..."
    $edirect_dir/efetch -db sequences -id $assembly_accession_id -format fasta |gzip -c  > $work_dir/$sample_name.fa.gz    
done > run_retrieve.list
$ParaFly_dir -c run_retrieve.list -CPU $threads
