#COMMANDS USED FOR: #Lineage Identification affects estimates of Evolutionary Mode in Marine Snails #Felix Vaux, Michael R. Gemmell, Simon F.K. Hills, Bruce A. Marshall, Alan G. Beu, James S. Crampton, Steve A. Trewick, Mary Morgan-Richards #Systematic Biology 2020 ###UNPACKING FASTQ.GZ FILES USING GUNZIP cd /nfs1/FW_HMSC/OMalley_Lab/vauxf/RAW_READS/whelks #INDEX9 gunzip whelk9.fastq.gz #INDEX10 gunzip whelk10.fastq.gz ###PROCESS RADTAGS cd /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/process_radtags_logs #INDEX9 SGE_Batch -q harold -P 10 -f 30G -m 36G -c '/local/cluster/stacks/stacks-1.01/bin/process_radtags -f /nfs1/FW_HMSC/OMalley_Lab/vauxf/RAW_READS/whelks/whelk9.fastq -b /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/barcodes/whelks/barcode_whelk9 -o /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelk9-a -c -q -i fastq -inline_inline --renz_1 nsiI --renz_2 dpnII -E phred33 -r' -r whelk9-a-log #INDEX10 SGE_Batch -q harold -P 10 -f 30G -m 36G -c '/local/cluster/stacks/stacks-1.01/bin/process_radtags -f /nfs1/FW_HMSC/OMalley_Lab/vauxf/RAW_READS/whelks/whelk10.fastq -b /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/barcodes/whelks/barcode_whelk10 -o /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelk10-a -c -q -i fastq -inline_inline --renz_1 nsiI --renz_2 dpnII -E phred33 -r' -r whelk10-a-log #Transfer copies of r1 reads for selected 18 individuals to new folder /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1 #Create population map in Stacks input directory /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/population_maps/whelks #Create new output folder in Stacks output directory /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002 #Create shell file in Stacks output directory /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002 #POPULATION MAP CONTENTS 'nzmono.txt' Pchath_FV14040 PEN Pchath_FV14045 PEN Pchath_FV14006 PEN Pchath_FV14033 PEN Pchath_FV14013 PEN Pcuvie_FV15013 PEN Pcuvie_FV14118 PEN Pcuvie_FV14188 PEN Pjeaki_FV14197 PEN Pjeaki_FV14219 PEN Pjeaki_FV14196 PEN Pjeaki_FV14116 PEN Pjeaki_FV14220 PEN Pfairf_FV15005 PEN Pormes_FV15008 PEN Psulca_FV15009 PEN Psulca_FV15002 PEN Psulca_FV15001 PEN #SHELL FILE CONTENTS 'whelks-phd-002a.sh' #!/usr/bin/env bash /local/cluster/stacks/stacks-1.01/bin/denovo_map.pl -T 15 -X "ustacks:-m 3 -M 5 -N 7 --model_type bounded --bound_high 0.05" -X "cstacks:-n 15" -b 12 -D whelks-phd-002 -S \ -o /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/whelks-phd-002 \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pchath_FV14040-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pchath_FV14045-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pchath_FV14006-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pchath_FV14033-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pchath_FV14013-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pcuvie_FV14118-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pcuvie_FV14188-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pcuvie_FV14188-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pjeaki_FV14197-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pjeaki_FV14219-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pjeaki_FV14196-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pjeaki_FV14116-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pjeaki_FV14220-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pfairf_FV15005-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Pormes_FV15008-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Psulca_FV15009-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Psulca_FV15002-r1.fq \ -s /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/single/whelks/whelks-r1/Psulca_FV15001-r1.fq ###DE NOVO (USTACKS AND CSTACKS) cd /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002 chmod +x /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002/whelks-phd-002a.sh dos2unix /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002/whelks-phd-002a.sh SGE_Batch -q chinook -P 10 -f 30G -m 36G -c ' /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002/whelks-phd-002a.sh' -r whelks-phd-002a-sh ###POPULATIONS #r0.9 cd /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002/p002-p1-r90 SGE_Batch -q harold -P 25 -f 30G -m 36G -c '/local/cluster/stacks/stacks-1.01/bin/populations -P /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002 -M /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/population_maps/whelks/whelks-nzmono.txt -b 12 -t 15 -r 0.9 -m 5 -p 1 --write_single_snp --structure --genepop --plink --vcf --fasta_samples --fasta_loci' -r nzmono10mb-r90.snp #r0.5 cd /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002/p002-p1-r50 SGE_Batch -q harold -P 25 -f 30G -m 36G -c '/local/cluster/stacks/stacks-1.01/bin/populations -P /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/stacks_out/whelks/phd-002 -M /nfs1/FW_HMSC/OMalley_Lab/vauxf/STACKS/population_maps/whelks/whelks-nzmono.txt -b 12 -t 15 -r 0.5 -m 5 -p 1 --write_single_snp --structure --genepop --plink --vcf --fasta_samples --fasta_loci' -r nzmono10mb-r50.snp #For Structure analysis, use .structure.tsv output file. Remember to remove header added by Stacks and check that population flag is numerical #R COMMANDS FOR MAKING PCA IN ADEGENET 2.1.1 PACKAGE ###ADAPTED FROM: http://popgen.nescent.org/DifferentiationSNP.html ###CREDIT: Stéphanie Manel (author), Zhian Kamvar (edits) ###CLEAR BUFFERS rm(list=ls()) ###SET DIRECTORY #setwd("C:/Users/Felix/Documents/R/osu-rdata") setwd("C:/Users/vauxf/Documents/R/osu-rdata") ###LOAD LIBRARIES library(hierfstat) library(adegenet) library(vegan) library(car) library(rgl) ###DATA FORMAT ###PACKAGE WANTS FSTAT .DAT INPUT FILE #1. SAVE GENEPOP FILE AS GENEPOP.TXT #2. CONVERT GENEPOP FILE TO FSTAT .DAT FILE USING PGDSPIDER ###IMPORT DATA ##PHD WHELKS data<-read.fstat("nzmono10mb-r90.dat", quiet=FALSE) data<-read.fstat("nzmono10mb-r50.dat", quiet=FALSE) #PCA FOR SIX PENION (2D) pop <- data$pop #print(pop) X <- tab(data, NA.method="mean") temp <- as.integer(pop(data)) #***~~~~~~~explore only PC1 and PC2 pca1 <- dudi.pca(X,scannf=FALSE,scale=FALSE, nf=2) myCol <- transp(c("magenta","orange","red", "cyan", "forestgreen", "blue"),.7)[temp] myPch <- c(19, 4, 17, 18, 3, 15)[temp] plot(pca1$li, col=myCol, cex=3, pch=myPch) #***~~~~~~~change nf to explore additional axes (e.g. nf=3) pca1 <- dudi.pca(X,scannf=FALSE,scale=FALSE, nf=3) myCol <- transp(c("magenta","orange","red", "cyan", "forestgreen", "blue"),.7)[temp] myPch <- c(19, 4, 17, 18, 3, 15)[temp] plot(pca1$li, col=myCol, cex=3, pch=myPch) #export PC loadings for samples #Export loadings for multiple PCs (need to change nf= in dudi.pca function above) write.csv(pca1$li$Axis1,file="pc1-load.csv",row.names=TRUE,quote=FALSE) write.csv(pca1$li$Axis2,file="pc2-load.csv",row.names=TRUE,quote=FALSE) write.csv(pca1$li$Axis3,file="pc3-load.csv",row.names=TRUE,quote=FALSE) ##export list of eigen values and percentage variances for each PC #eigen values for PCs eig.val<-pca1$eig eig.val #percentages of variance for each PC eig.perc <- 100*pca1$eig/sum(pca1$eig) eig.perc eigen<-data.frame(eig.val,eig.perc) eigen #writing file with both write.csv(eigen,file="eigen-summary.csv",row.names=TRUE,quote=FALSE) #broken stick test to determine number of 'meaningful' PCs #(technically no such thing as statistical significance for PCs) #Load .csv file with listed PCs and eigen values xx<-read.csv("eigen-summary.csv") #xx is the eigenvalues zz<-as.data.frame(bstick(50,tot.var=sum(xx$eig.val))) # zz is the broken stick model components<-seq(from=1, to=nrow(xx), by=1) xx$comp<-components components2<-seq(from=1, to=nrow(zz), by=1) zz$comp<-components2 plot(xx$comp, xx$eig.val, type="h") lines(zz$comp, zz[,1], col="red") #If above red line = meaningful according to broken-stick test #If below red line = non-meaningful according to broken-stick test ###Generally this corresponds to PCs that explain >5% of variance among samples #If no PCs are meaningful, it is likely that variance among samples is low, and that there is limited genetic structuring #There may still be weak trends or minor structure among your samples #Important: still worth exploring non-meaningful PCs to check data, especially if there is an obvious step change in eigen values #'allele contributions' i.e. PC loading plot for each locus #I don't find this very useful for large SNP datasets... #PC1 loadingplot(pca1$c1^2) PC1_loci<-pca1$c1^2 write.csv(PC1_loci,file="PC1_loci.csv",row.names=TRUE,quote=FALSE) #####~~~***~~~3D scatterplot of PCs FOR SNP DATA~~~***~~~##### #population labels for NZMono Penion pop.names <- c("Cha", "Cha", "Fai", "Cha", "Cha", "Cha", "Cuv", "Cuv", "Cuv", "Jea", "Jea", "Jea", "Jea", "Jea", "Orm", "Sul", "Sul", "Sul") write.csv(pop.names,file="nzmono50-popnames.csv",row.names=TRUE,quote=FALSE) #Manually combine the previously exported PC loadings files (loadings for each PC in a separate column), with the pop.names column added too ##Alternatively, combine these vectors as a matrix in R... not working? >:( #load the combined PC loadings file mydata<-read.table("alba-ori-pass10-p11-r90-c-3d1.csv",header=T,sep=",") p1 <- mydata$PC1 p2 <- mydata$PC2 p3 <- mydata$PC3 #3D scatter plot of desired axes scatter3d(x = p1, y = p2, z = p3, groups = mydata$pop, labels = mydata$pop, grid = TRUE, surface = FALSE, axis.ticks= TRUE, ellipsoid = FALSE, level = 0.5, point.col=c("magenta", "red", "forestgreen", "cyan", "gold", "blue"), surface.col=c("magenta", "red", "forestgreen", "cyan", "gold", "blue")) #with 50% confidence ellipsoids scatter3d(x = p1, y = p2, z = p3, groups = mydata$groups, labels = mydata$groups, grid = FALSE, surface = FALSE, axis.ticks= TRUE, ellipsoid = TRUE, level = 0.5, point.col=c("magenta", "red", "forestgreen", "cyan", "gold", "blue"), surface.col=c("magenta", "red", "forestgreen", "cyan", "gold", "blue")) #EXPORT 3D SCATTERPLOT #export 3D plot (in current orientation in rgl window) to SVG (large file size!) rgl.postscript("3Dplot2.svg","svg") #export 3D plot (in current orientation in rgl window) to PDF (rubbish quality) rgl.postscript("3Dplot.pdf","pdf")