#!/bin/bash
#CDS alignment with MACSE (v2.04)

date
wd="/path/to/Example_Project/MACSE_example"
PVs_cds_strain_file="/path/to/cds_strain_file"  ##example_cds_strain.txt
PVs_cds_aln_path="/path/to/cds_aln_path"  ##example_cds_aln_path.txt
PVs_cds_files_all_dir="/path/to/example_several_cds_files_all"
PVs_cds_realn_dir="/path/to/Example_Project/MACSE_example/example_cds_realn"
PVs_cds_aln_check_dir="/path/to/Example_Project/MACSE_example/example_cds_aln_check"
MACSE_dir="/path/to/MACSE"
seqtk_dir="/path/to/seqtk" ## v1.3
seqkit_dir="/path/to/seqkit" ## v2.2.0
scripts_dir="/path/to/scripts"
python_dir="/path/to/python" #v3.9.12
parallel_dir="/path/to/parallel"
threads="Number of threads"
cd $wd
## mkdir
if [ ! -d $PVs_cds_aln_check_dir ];then mkdir $PVs_cds_aln_check_dir;fi
if [ ! -d $PVs_cds_realn_dir ];then mkdir $PVs_cds_realn_dir;fi

## fasta check
cat $PVs_cds_aln_path | while read line
do
IFS=$';' read PATH CDS <<< "$line"
$python $scripts_dir/05_MACSE_fasta_check.py -in_aln $PVs_cds_files_all_dir/$PATH/$CDS.fa -out_aln $PVs_cds_aln_check_dir/$CDS.checked.fa        
done

## macse realn
for gene in `cat $PVs_cds_strain_file`
do
    echo "java -jar $MACSE_dir/macse.jar -prog alignSequences -gc_def 1 -seq $PVs_cds_aln_check_dir/$gene.checked.fa -out_NT $PVs_cds_realn_dir/$gene.macse_NT.aln.fa -out_AA $PVs_cds_realn_dir/$gene.macse_AA.aln.fa"
done > run_MACSE_ref.jobs

$parallel_dir/parallel -j $threads < run_MACSE_ref.jobs

echo "Done!"

date


