#SCRIPTS for Genomic associations with poxvirus across divergent island populations in Berthelot’s pipit
# Eleanor C. Sheppard, Claudia A. Martin, Claire Armstrong, Catalina González-Quevedo, Juan Carlos Illera, Alexander Suh, Lewis G. Spurgin & David S. Richardson

# 2021-11-17

#This file describes the code used to undertake the Bayenv analyses and associated bioinformatics detailed in the methods section 'Identification of SNPs correlated with population-level pox prevalence' of the above publication.
#For any further queries please contact e.sheppard@uea.ac.uk or david.richardson@uea.ac.uk

# unzip All Pipits .bim .fam and .bed files
gunzip Berthelots.bim.gz
gunzip Berthelots.fam.gz
gunzip Berthelots.bed.gz

# trim the ddRAD input files to generate a set of independent markers
plink --bfile Berthelots --chr-set 34 --maf 0.05 --make-bed --out Berthelots_maf_trim_0.05
plink --bfile Berthelots_maf_trim_0.05 --chr-set 34 --indep-pairwise 50 5 0.5 --make-bed --out Berthelots_maf_trim_0.05_high_LD
plink --bfile Berthelots_maf_trim_0.05_high_LD --chr-set 34 --extract Berthelots_maf_trim_0.05_high_LD.prune.in --make-bed --out Berthelots_maf0.05_LD0.5

# convert file to bayenv format
plink --bfile Berthelots_maf0.05_LD0.5 --chr-set 34 --recode --out ped_Berthelots_maf0.05_LD0.5
java -Xmx1024m -Xms512m -jar PGDSpider2-cli.jar -inputfile ped_Berthelots_maf0.05_LD0.5.ped -inputformat PED -outputfile bayenv_Berthelots_maf0.05_LD0.5.txt -outputformat Bayenv -spid SPID_file_PED.spid

# Run bayenv 10 times with different seeds to generate population covariance matrices 
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 63481 > matrix1.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 32677 > matrix2.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 74777 > matrix3.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 61206 > matrix4.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 55609 > matrix5.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 15768 > matrix6.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 90221 > matrix7.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 32672 > matrix8.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 80025 > matrix9.txt
bayenv2 -i bayenv_Berthelots_maf0.05_LD0.5.txt -p 13 -k 100000 -r 86043 > matrix10.txt

# Extract the last matrix of each run
tail -n 14 matrix1.txt > matrix_single1.txt
tail -n 14 matrix2.txt > matrix_single2.txt
tail -n 14 matrix3.txt > matrix_single3.txt
tail -n 14 matrix4.txt > matrix_single4.txt
tail -n 14 matrix5.txt > matrix_single5.txt
tail -n 14 matrix6.txt > matrix_single6.txt
tail -n 14 matrix7.txt > matrix_single7.txt
tail -n 14 matrix8.txt > matrix_single8.txt
tail -n 14 matrix9.txt > matrix_single9.txt
tail -n 14 matrix10.txt > matrix_single10.txt

### *NOTE! At this point in the analysis, you need to run the R script "Bayenv_input_script.R" to produce the environmental 'ENVIRONFILE_pox.txt' and matrix 'mean_matrix.txt' input files for the next steps in Bayenv.

# Run Bayenv2.0 in test mode for five independent replicates of 100,000 iterations (rename output after each run: bf_environ.pox_100_000_run'X'.txt)
calc_bf.sh bayenv_Berthelots_maf0.05_LD0.5.txt ENVIRONFILE_pox.txt mean_matrix.txt 13 100000 1

# Run Bayenv2.0 in test mode for five independent replicates of 200,000 iterations (rename output after each run: bf_environ.pox_200_000_run'X'.txt)
calc_bf.sh bayenv_Berthelots_maf0.05_LD0.5.txt ENVIRONFILE_pox.txt mean_matrix.txt 13 200000 1

# Run Bayenv2.0 in test mode for five independent replicates of 500,000 iterations (rename output after each run: bf_environ.pox_500_000_run'X'.txt)
calc_bf.sh bayenv_Berthelots_maf0.05_LD0.5.txt ENVIRONFILE_pox.txt mean_matrix.txt 13 500000 1

### *NOTE! At this point in the analysis, you need to run the R script "Bayenv_output_script.R" up to- and including- the section 'Identifying candidate SNPs from 500k iterations' to identify candidate SNPs for the next lines of code.

# Minor Allele Frequency by SNP. These generate the outputs used to create the file MAF_pox_candidateSNPs.csv (used to create Fig 4 in the R script "Bayenv_output_script.R"). 
# Bayenv associated SNPs
plink --bfile Berthelots --chr-set 34 --family --freq --snp 444s109 --out Berthelots_SNP444s109_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 1941s110 --out Berthelots_SNP1941s110_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 2177s14 --out Berthelots_SNP2177s14_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 909s118 --out Berthelots_SNP909s118_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 1063s41 --out Berthelots_SNP1063s41_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 3493s67 --out Berthelots_SNP3493s67_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 2862s117 --out Berthelots_SNP2862s117_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 1796s91 --out Berthelots_SNP1796s91_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 3400s90 --out Berthelots_SNP3400s90_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 1425s80 --out Berthelots_SNP1425s80_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 4301s71 --out Berthelots_SNP4301s71_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 3439s47 --out Berthelots_SNP3439s47_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 1526s83 --out Berthelots_SNP1526s83_MAF
plink --bfile Berthelots --chr-set 34 --family --freq --snp 1916s23 --out Berthelots_SNP1916s23_MAF

# Make bfiles for candidate SNPs to find their chromosomal location
plink --bfile Berthelots --chr-set 34 --snp 444s109 --make-bed --out Berthelots_SNP444s109
plink --bfile Berthelots --chr-set 34 --snp 1941s110 --make-bed --out Berthelots_SNP1941s110
plink --bfile Berthelots --chr-set 34 --snp 2177s14 --make-bed --out Berthelots_SNP2177s14
plink --bfile Berthelots --chr-set 34 --snp 909s118 --make-bed --out Berthelots_SNP909s118
plink --bfile Berthelots --chr-set 34 --snp 1063s41 --make-bed --out Berthelots_SNP1063s41
plink --bfile Berthelots --chr-set 34 --snp 3493s67 --make-bed --out Berthelots_SNP3493s67
plink --bfile Berthelots --chr-set 34 --snp 2862s117 --make-bed --out Berthelots_SNP2862s117
plink --bfile Berthelots --chr-set 34 --snp 1796s91 --make-bed --out Berthelots_SNP1796s91
plink --bfile Berthelots --chr-set 34 --snp 3400s90 --make-bed --out Berthelots_SNP3400s90 
plink --bfile Berthelots --chr-set 34 --snp 1425s80 --make-bed --out Berthelots_SNP1425s80
plink --bfile Berthelots --chr-set 34 --snp 4301s71 --make-bed --out Berthelots_SNP4301s71
plink --bfile Berthelots --chr-set 34 --snp 3439s47 --make-bed --out Berthelots_SNP3439s47
plink --bfile Berthelots --chr-set 34 --snp 1526s83 --make-bed --out Berthelots_SNP1526s83
plink --bfile Berthelots --chr-set 34 --snp 1916s23 --make-bed --out Berthelots_SNP1916s23


plink --bfile Berthelots --chr-set 34 --family --snp 444s109 --make-bed --recode --out Berthelots_SNP444s109
plink --bfile Berthelots --chr-set 34 --family --snp 1941s110 --make-bed --recode --out Berthelots_SNP1941s110
plink --bfile Berthelots --chr-set 34 --family --snp 2177s14 --make-bed --recode --out Berthelots_SNP2177s14
plink --bfile Berthelots --chr-set 34 --family --snp 909s118 --make-bed --recode --out Berthelots_SNP909s118
plink --bfile Berthelots --chr-set 34 --family --snp 1063s41 --make-bed --recode --out Berthelots_SNP1063s41
plink --bfile Berthelots --chr-set 34 --family --snp 3493s67 --make-bed --recode --out Berthelots_SNP3493s67
plink --bfile Berthelots --chr-set 34 --family --snp 2862s117 --make-bed --recode --out Berthelots_SNP2862s117
plink --bfile Berthelots --chr-set 34 --family --snp 1796s91 --make-bed --recode --out Berthelots_SNP1796s91
plink --bfile Berthelots --chr-set 34 --family --snp 3400s90 --make-bed --recode --out Berthelots_SNP3400s90 
plink --bfile Berthelots --chr-set 34 --family --snp 1425s80 --make-bed --recode --out Berthelots_SNP1425s80
plink --bfile Berthelots --chr-set 34 --family --snp 4301s71 --make-bed --recode --out Berthelots_SNP4301s71
plink --bfile Berthelots --chr-set 34 --family --snp 3439s47 --make-bed --recode --out Berthelots_SNP3439s47
plink --bfile Berthelots --chr-set 34 --family --snp 1526s83 --make-bed --recode --out Berthelots_SNP1526s83
plink --bfile Berthelots --chr-set 34 --family --snp 1916s23 --make-bed --recode --out Berthelots_SNP1916s23
