#!/bin/bash
##ADMIXTURE: maximum likelihood estimation of individual ancestries from multilocus SNV genotype datasets (curated data)

workdir="/path/to/Example_Project/Genome_SNV/ADMIXTURE_example"
input_dir="/path/to/Example_Project/Genome_alignment"
scripts_dir="/path/to/scripts"
snp_sites_dir="/path/to/snp-sites" #v2.5.1
admixture_dir="/path/to/admixture" #v1.3.0
python_dir="/path/to/python" #v3.9.12
plink_dir="/path/to/plink/" #v1.9
cutoff=0.1
MAF=0.01
flag1="flag1"
flag2="flag2"
prefix="any_prefix_you_like"
prefix2="${prefix}.${flag2}"

input_fa="$input_dir/$prefix.fa"  ##example_cactus.fa

## get iqtree strain order
if [ ! -d $workdir/snp_sites ];then mkdir $workdir/snp_sites;fi
cd $workdir/snp_sites
tree_order="strain_name_sort_by_phylogenetic_tree"


## fasta to VCF using snp-sites
$snp_sites_dir/snp-sites -mv -o $prefix2 $input_fa

$python_dir/python $scripts_dir/04_sort_snpsites_output_vcf.py \
    -tree_order $tree_order \
    -vcf_in $prefix2.vcf \
    -vcf_out $prefix2.sort.vcf

$python_dir/python $scripts_dir/06_snp_vcf_filter.py \
    -cutoff $cutoff \
    -vcf_in $prefix2.sort.vcf \
    -vcf_out $prefix2.sort.${flag1}.vcf  \
    -MAF $MAF \

## generate *.ped & *.map file
if [ ! -d $workdir/admixture ];then mkdir $workdir/admixture;fi
cd $workdir/admixture

prefix3="$prefix2.sort.${flag1}"
snp_vcf="$workdir/snp_sites/$prefix3.vcf"
if [ ! -d ./$flag1 ];then mkdir ./$flag1;fi
cd ./$flag1
$plink_dir/plink --double-id --vcf $snp_vcf --recode 12 --out $prefix3

## search for best K value
for K in $(seq 1 40); do $admixture_dir/admixture --cv $prefix3.ped $K -j30 | tee log${K}.out; done
