#!/bin/bash

## Written by Sarah B. Carey
## this script will take a phylip file and labeled trees (none, 1, 2 labels)
## and will write a control file for each for PAML and run codeml.
## This script will call the perl script
## perl_AIC.pl, which will pull dN, dS, and dN/dS for GG1 and R40, run an AIC,
## and compile the results in one file
## This script will loop through across all genes in the same directory


## to run on a cluster, add relevant submission script items
## and load necessary modules

## this script was written to run using SLURM, but $SLURM_ARRAY_TASK_ID can be changed
## to work on other workload managers

   
seqfile=`ls *.fa.phy | head -n $SLURM_ARRAY_TASK_ID | tail -n 1`
filename=`echo $seqfile | cut -d '.' -f 1`

treefile_no_label=`echo $filename.nolabel.tre`
treefile_1label=`echo $filename.1label.tre`
treefile_2label=`echo $filename.2label.tre`
outfile_1rate=`echo $filename`.1rate.paml.out
outfile_2rate=`echo $filename`.2rate.paml.out
outfile_3rate=`echo $filename`.3rate.paml.out
   

printf "seqfile = $seqfile\n    outfile = $outfile_1rate\n    treefile = $treefile_no_label \n        noisy = 0  * 0,1,2,3,9: how much rubbish on the screen\n      verbose = 0  * 1: detailed output, 0: concise output\n      runmode = 0  * 0: user tree;  1: semi-automatic;  2: automatic\n                   * 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise\n      seqtype = 1  * 1:codons; 2:AAs; 3:codons-->AAs\n    CodonFreq = 2  * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table\n        model = 0\n                   * models for codons:\n                   * 0:one, 1:every branch has its own omega, 2:user-specified\n      NSsites = 0  * 0:one w;1:neutral;2:selection; 3:discrete;4:freqs;\n                   * 5:gamma;6:2gamma;7:beta;8:beta&w;9:beta&gamma;\n                   * 10:beta&gamma+1; 11:beta&normal>1; 12:0&2normal>1;\n                   * 13:3normal>0\n        icode = 0  * 0:universal code; 1:mammalian mt; 2-11:see below\n    fix_kappa = 0  * 1: kappa fixed, 0: kappa to be estimated\n        kappa = 2  * initial or fixed kappa\n    fix_omega = 0  * 1: omega or omega_1 fixed, 0: estimate\n         omega = 0.2 * initial or fixed omega, for codons or codon-based AAs\n   cleandata = 0\n" > $filename.1rate.ctl

codeml $filename.1rate.ctl

printf "seqfile = $seqfile\n    outfile = $outfile_2rate\n    treefile = $treefile_1label \n        noisy = 0  * 0,1,2,3,9: how much rubbish on the screen\n      verbose = 0  * 1: detailed output, 0: concise output\n      runmode = 0  * 0: user tree;  1: semi-automatic;  2: automatic\n                   * 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise\n      seqtype = 1  * 1:codons; 2:AAs; 3:codons-->AAs\n    CodonFreq = 2  * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table\n        model = 2\n                   * models for codons:\n                   * 0:one, 1:every branch has its own omega, 2:user-specified\n      NSsites = 0  * 0:one w;1:neutral;2:selection; 3:discrete;4:freqs;\n                   * 5:gamma;6:2gamma;7:beta;8:beta&w;9:beta&gamma;\n                   * 10:beta&gamma+1; 11:beta&normal>1; 12:0&2normal>1;\n                   * 13:3normal>0\n        icode = 0  * 0:universal code; 1:mammalian mt; 2-11:see below\n    fix_kappa = 0  * 1: kappa fixed, 0: kappa to be estimated\n        kappa = 2  * initial or fixed kappa\n    fix_omega = 0  * 1: omega or omega_1 fixed, 0: estimate\n         omega = 0.2 * initial or fixed omega, for codons or codon-based AAs\n   cleandata = 0\n" > $filename.2rate.ctl

codeml $filename.2rate.ctl

printf "seqfile = $seqfile\n    outfile = $outfile_3rate\n    treefile = $treefile_2label \n        noisy = 0  * 0,1,2,3,9: how much rubbish on the screen\n      verbose = 0  * 1: detailed output, 0: concise output\n      runmode = 0  * 0: user tree;  1: semi-automatic;  2: automatic\n                   * 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise\n      seqtype = 1  * 1:codons; 2:AAs; 3:codons-->AAs\n    CodonFreq = 2  * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table\n        model = 2\n                   * models for codons:\n                   * 0:one, 1:every branch has its own omega, 2:user-specified\n      NSsites = 0  * 0:one w;1:neutral;2:selection; 3:discrete;4:freqs;\n                   * 5:gamma;6:2gamma;7:beta;8:beta&w;9:beta&gamma;\n                   * 10:beta&gamma+1; 11:beta&normal>1; 12:0&2normal>1;\n                   * 13:3normal>0\n        icode = 0  * 0:universal code; 1:mammalian mt; 2-11:see below\n    fix_kappa = 0  * 1: kappa fixed, 0: kappa to be estimated\n        kappa = 2  * initial or fixed kappa\n    fix_omega = 0  * 1: omega or omega_1 fixed, 0: estimate\n         omega = 0.2 * initial or fixed omega, for codons or codon-based AAs\n   cleandata = 0\n" > $filename.3rate.ctl

codeml $filename.3rate.ctl


perl paml_AIC.pl $outfile_1rate $outfile_2rate $outfile_3rate >> paml_results.out


## then run this after array done to put column headers
#echo -e "w_M0\tw_M2_0\tw_M2_1\tw_M2_2\tLRT\tSig" | cat - paml_results.out > /tmp/out && mv /tmp/out paml_results.out
