#!/bin/bash
## Principal component analysis of full curated data
date

work_dir="/path/to/Example_Project/Genome_SNV/PCA_example"
scripts_dir="/path/to/scripts"
python_dir="/path/to/python" #v3.9.12
plink_dir="/path/to/plink/" #v1.9
convertf_dir="/path/to/convertf" 
smartpca_dir="/path/to/smartpca" #v3.0
PCA_prefix="PCA_prefix"
vcf="vcf_file_of_snp"  
vcf_f="output_vcf_file_after_plink_filtering"
missing_data_cutoff=0.1


cd $work_dir
## vcf format modify
$python_dir/python3 $scripts_dir/07_vcf_filter.smartpca.py \
    -missing_data_cutoff $missing_data_cutoff \
    -vcf_in $vcf \
    -vcf_out $PCA_prefix.output.vcf \
    -snp_out $PCA_prefix.output.snp

## run plink
$plink_dir/plink --vcf $vcf_f \
    --geno $missing_data_cutoff \
    --maf 0.01 \
    --recode \
    --double-id \
    --out $PCA_prefix

## run convertf
$convertf_dir/convertf -p $PCA_prefix.mask.convertf.par

## run smartpca
$python_dir/python3 $scripts_dir/08_prepare_smartsnp_ind_file.py
flag="flag"
$smartpca_dir/smartpca -p $PCA_prefix.smartpca.par > $PCA_prefix.smartpca.$flag.log

echo "Done!"
date

