#!/bin/bash
#second alignment of genome with cactus (v2.2.0)
#phylogenetic tree produced by IQ-TREE (v2.0.6)

date
workdir="/path/to/Example_Project/Genome_alignment/Cactus_alignment_example"
cactus_dir="/path/to/cactus"
genome_dir="/path/to/Example_Project/Genome_alignment/Cactus_alignment_example/softmask_genome"
strains="strain_name_file"
ref_strains="ref_strain_file"
guide_tree="guide_tree.treefile" ##produced_by_program_'05_run_mafft.sh'
seqFile="seqFile"
scripts_dir="/path/to/scripts"
prefix="any prefix you like"
iqtree_dir="/path/to/IQ-TREE" ##v2.0.6
python_dir="/path/to/python" ##v3.9.12
hal2maf_dir="/path/to/hal2maf" ## v2.2
clipkit_dir="/path/to/clipkit" ## v1.3.0
threads="Number of threads"
seed="any seed you like"
cd $workdir

# cactus input seqFile
$python_dir/python3 $scripts_dir/01_generate_cactus_input_seqFile.py \
    -strains $strains \
    -ref_strains $ref_strains \
    -genome_dir $genome_dir \
    -guide_tree $guide_tree \
    -out_seqFile $seqFile

## cactus align
$cactus_dir/cactus ./jobstore $seqFile --maxCores 32 ${prefix}.hal

## hal2maf
$hal2maf_dir/hal2maf --onlyOrthologs --noDupes --maxRefGap 2000 --refGenome hom_sap_HPV16 ${prefix}.hal ${prefix}.maf

# maf2fa
$python_dir/python3 $scripts_dir/02_maf2fa_for_noDupes.py -maf $prefix.maf -out_fa_QC $prefix.fa.QC -out_fa_info $prefix.fa.info -out_fa $prefix.fa

## clipkit
$clipkit_dir/clipkit $prefix.fa -m gappy -o $prefix.clip.fa

## iqtree v2.0.6
$iqtree_dir/iqtree2 -s ./$prefix.clip.fa \
        --seed $seed \
        -m MFP \
        -B 1000 --bnni \
        -alrt 1000 \
        -T $threads \
        --prefix $prefix.clip.iqtree \
        --safe


date
