Fast P shell script: #!/bin/bash ## we will do FastQC quality check, merge the parired end reads and trim the sequences ## in one go using FastP to get the complete amplicon sequence ~/fastp -i JC-G_S1_L001_R1_001.fastq -I JC-G_S1_L001_R2_001.fastq -l 125 -m --discard_unmerged -o merged.fastq ## next convert the fastq file to fasta format module load fastx_toolkit/0.0.14 fastq_to_fasta -i merged.fastq -Q 33 -o merged.fa Mothur shell script: #!/bin/bash ## we will identify the sequences that match the oligos used, allowing for 1 mismatch. #Run Mothur module load mothur/1.39.5 mothur "#trim.seqs(fasta=merged.fa,oligos=Spider_Diet_General_Oligos.txt,checkorient=t,pdiffs=1)" #split .groups file into A and B grep ’a$’ merged.groups > mergedA.groups grep ’b$’ merged.groups > mergedB.groups #remove ’a’ and ’b’ labels sed -i ’s/a//g’ mergedA.groups sed -i ’s/b//g’ mergedB.groups Demultiplex Perl script: #!/usr/bin/perl unless ($#ARGV == 0) { print "Usage: 3_Demultiplex.pl FastaList_Gen.txt"; die; } open (INLIST, "<$ARGV[0]") || die; # replace ’XXX’ with your username, and if you want to put the output into another # directory you can add that to the ’outdir’ path here $indir = "~/Deplex#2"; $outdir = "~/Deplex#2"; 2 # Loops through the list fo your samples (’SampleList’) and performs the commands for # each one while (<INLIST>) { $lib = $_; chomp($lib); # A shortcut to read or write a file for each of your samples, each file having the # same extension $readidsa = $lib . "_a_ids.txt"; $readidsb = $lib . "_b_ids.txt"; $readidsab = $lib . "_ab_ids.txt"; $fa1 = $lib . ".fa"; $fa2 = $lib . ".fasta"; # split fasta read IDs into files grouped by sample ID. Replace ’XX’ with the name of # you ’.groups’ file (output from mothur) system("grep -w $lib $indir/mergedA.groups | awk ’{print \$1}’ > $outdir/$readidsa"); system("grep -w $lib $indir/mergedB.groups | awk ’{print \$1}’ > $outdir/$readidsb"); # combine the list of sequence names for ’a’ and ’b’ matches system("cat $outdir/$readidsa $outdir/$readidsb >> $outdir/$readidsab"); # split the trimmed fasta file into reads specific to each sample. Replace ’XX’ with # the name of your trimmed fasta file (output from mothur) my $command1 = ’perl -ne’."’".’if(/^>(\S+)/){$c=$i{$1}}$c?print:chomp;$i{$_}=1 if’." @ARGV’"." $outdir/$readidsab $indir/merged.trim.fasta > $outdir/$fa1"; system ($command1); system("awk ’{print \$1}’ $indir/$fa1 > $indir/$fa2"); } exit; Demultiplex shell script: #!/bin/bash perl 3_Demultiplex.pl FastaList_Gen.txt Edit headers Perl script: #!/usr/bin/perl unless ($#ARGV == 0) { print "Usage: 4_Edit_Headers.pl FastaList_Gen.txt"; die; } open (INLIST, "<$ARGV[0]") || die; $indir = "~/Deplex#2/FastaFiles"; $outdir = "~/Deplex#2/FastaFiles"; while (<INLIST>) { $lib = $_; chomp($lib); $fa1 = $lib . ".fasta"; $fa2 = $lib . "_edit.fasta"; system( qq(sed "s/^>/>$lib;/g" "$indir/$fa1" > "$indir/$fa2")); } exit; Edit headers shell script: #!/bin/bash perl 4_Edit_Headers.pl FastaList_Gen.txt Merge sample ID files into one file: cat *edit.fasta > AllGen.fasta Unoise 3 shell script: #!/bin/bash # removes identical replicates from the fasta input, # output for next step = SampleName_rc_uniques.fasta ~/usearch_11 -fastx_uniques AllGen.fasta -fastaout Unique.fasta -sizeout -strand both -relabel Uniq -threads 4 # sort by size ~/usearch_11 -sortbysize Unique.fasta -fastaout Sorted.fasta # Cluster OTUs ~/usearch_11 -cluster_otus Sorted.fasta - otus OTU.fasta -relabel Otu # denoise and cluster using unoise3 to make zOTUs ~/usearch_11 -unoise3 Sorted.fasta -zotus zOTU.fasta # make list of zOTU’s and the number of sequences per zOTU (size) ~/usearch_11 -otutab AllGen.fasta -zotus zOTU.fasta -otutabout zOTUtable_Gen.txt -strand both -threads 4 # make list of OTU’s and the number of sequences per OTU (size) ~/usearch_11 -otutab AllGen.fasta -otus OTU.fasta -otutabout OTUtable_Gen.txt -strand both -threads 4 Blast shell script: #!/bin/bash # blast the clusters from usearch module load blast/2.7.1 export BLASTDB=~/BLAST-DB blastn -query zOTU.fasta -db nt -num_threads 4 -evalue 0.00001 -perc_identity 97 -outfmt 6 -out zOTU.txt blastn -query OTU.fasta -db nt -num_threads 4 -evalue 0.00001 -perc_identity 97 -outfmt 6 -out OTU_blastOutput.txt Filter blast sequences: blast <- read.table("zOTU.txt") summary(blast) library(dplyr) blast_filter <- blast %>% group_by(V1) %>% filter(V12 == max(V12)) write.table(blast_filter, "Gen_zOTU_TopHit_blastOutput.txt") Cluster read counts based on taxa: #Taxonomic names are acquired by running blast data through MEGAN [version 6.12.3] (Huson et al. 2016) and then these names are matched to read count tables using zOTU/OTU identifiers (carried out using =VLOOKUP() function in Excel). To_Agg <- read.csv(“zOTUTable_withTaxonNames.csv”, header = T) Agg <- aggregate(.~Taxon, data=COI_to_Agg, sum) write.table(Agg, “zOTUTable_Aggregated.csv”)