Fast P shell script:

#!/bin/bash
## we will do FastQC quality check, merge the parired end reads and trim the sequences
## in one go using FastP to get the complete amplicon sequence
~/fastp -i JC-G_S1_L001_R1_001.fastq -I
JC-G_S1_L001_R2_001.fastq -l 125 -m --discard_unmerged -o merged.fastq

## next convert the fastq file to fasta format
module load fastx_toolkit/0.0.14
fastq_to_fasta -i merged.fastq -Q 33 -o merged.fa




Mothur shell script:

#!/bin/bash
## we will identify the sequences that match the oligos used, allowing for 1 mismatch.
#Run Mothur
module load mothur/1.39.5
mothur "#trim.seqs(fasta=merged.fa,oligos=Spider_Diet_General_Oligos.txt,checkorient=t,pdiffs=1)"
#split .groups file into A and B
grep ’a$’ merged.groups > mergedA.groups
grep ’b$’ merged.groups > mergedB.groups
#remove ’a’ and ’b’ labels
sed -i ’s/a//g’ mergedA.groups
sed -i ’s/b//g’ mergedB.groups




Demultiplex Perl script:

#!/usr/bin/perl
unless ($#ARGV == 0)
{
print "Usage: 3_Demultiplex.pl FastaList_Gen.txt";
die;
}
open (INLIST, "<$ARGV[0]") || die;
# replace ’XXX’ with your username, and if you want to put the output into another
# directory you can add that to the ’outdir’ path here
$indir = "~/Deplex#2";
$outdir = "~/Deplex#2";
2
# Loops through the list fo your samples (’SampleList’) and performs the commands for
# each one
while (<INLIST>) {
$lib = $_;
chomp($lib);
# A shortcut to read or write a file for each of your samples, each file having the
# same extension
$readidsa = $lib . "_a_ids.txt";
$readidsb = $lib . "_b_ids.txt";
$readidsab = $lib . "_ab_ids.txt";
$fa1 = $lib . ".fa";
$fa2 = $lib . ".fasta";
# split fasta read IDs into files grouped by sample ID. Replace ’XX’ with the name of
# you ’.groups’ file (output from mothur)
system("grep -w $lib $indir/mergedA.groups | awk ’{print \$1}’ > $outdir/$readidsa");
system("grep -w $lib $indir/mergedB.groups | awk ’{print \$1}’ > $outdir/$readidsb");
# combine the list of sequence names for ’a’ and ’b’ matches
system("cat $outdir/$readidsa $outdir/$readidsb >> $outdir/$readidsab");
# split the trimmed fasta file into reads specific to each sample. Replace ’XX’ with
# the name of your trimmed fasta file (output from mothur)
my $command1 = ’perl -ne’."’".’if(/^>(\S+)/){$c=$i{$1}}$c?print:chomp;$i{$_}=1 if’."
@ARGV’"." $outdir/$readidsab $indir/merged.trim.fasta > $outdir/$fa1";
system ($command1);
system("awk ’{print \$1}’ $indir/$fa1 > $indir/$fa2");
}
exit;




Demultiplex shell script:

#!/bin/bash
perl 3_Demultiplex.pl FastaList_Gen.txt




Edit headers Perl script:

#!/usr/bin/perl
unless ($#ARGV == 0)
{
print "Usage: 4_Edit_Headers.pl FastaList_Gen.txt";
die;
}
open (INLIST, "<$ARGV[0]") || die;
$indir = "~/Deplex#2/FastaFiles";
$outdir = "~/Deplex#2/FastaFiles";
while (<INLIST>) {
$lib = $_;
chomp($lib);
$fa1 = $lib . ".fasta";
$fa2 = $lib . "_edit.fasta";
system( qq(sed "s/^>/>$lib;/g" "$indir/$fa1" > "$indir/$fa2"));
}
exit;



Edit headers shell script:

#!/bin/bash
perl 4_Edit_Headers.pl FastaList_Gen.txt




Merge sample ID files into one file:

cat *edit.fasta > AllGen.fasta




Unoise 3 shell script:

#!/bin/bash
# removes identical replicates from the fasta input,
# output for next step = SampleName_rc_uniques.fasta
~/usearch_11 -fastx_uniques AllGen.fasta
-fastaout Unique.fasta -sizeout -strand both -relabel Uniq -threads 4
# sort by size
~/usearch_11 -sortbysize Unique.fasta
-fastaout Sorted.fasta
# Cluster OTUs
~/usearch_11 -cluster_otus Sorted.fasta -
otus OTU.fasta -relabel Otu
# denoise and cluster using unoise3 to make zOTUs
~/usearch_11 -unoise3 Sorted.fasta
-zotus zOTU.fasta
# make list of zOTU’s and the number of sequences per zOTU (size)
~/usearch_11 -otutab AllGen.fasta
-zotus zOTU.fasta -otutabout zOTUtable_Gen.txt -strand both -threads 4
# make list of OTU’s and the number of sequences per OTU (size)
~/usearch_11 -otutab AllGen.fasta
-otus OTU.fasta -otutabout OTUtable_Gen.txt -strand both -threads 4



Blast shell script:

#!/bin/bash
# blast the clusters from usearch
module load blast/2.7.1
export BLASTDB=~/BLAST-DB
blastn -query zOTU.fasta -db nt -num_threads 4 -evalue 0.00001 -perc_identity 97
-outfmt 6 -out zOTU.txt
blastn -query OTU.fasta -db nt -num_threads 4 -evalue 0.00001 -perc_identity 97
-outfmt 6 -out OTU_blastOutput.txt




Filter blast sequences:

blast <- read.table("zOTU.txt")
summary(blast)
library(dplyr)
blast_filter <- blast %>%
group_by(V1) %>%
filter(V12 == max(V12))
write.table(blast_filter, "Gen_zOTU_TopHit_blastOutput.txt")




Cluster read counts based on taxa:

#Taxonomic names are acquired by running blast data through MEGAN [version 6.12.3] (Huson et al. 2016)
and then these names are matched to read count tables using zOTU/OTU identifiers (carried out using
=VLOOKUP() function in Excel).

To_Agg <- read.csv(“zOTUTable_withTaxonNames.csv”, header = T)
Agg <- aggregate(.~Taxon, data=COI_to_Agg, sum)
write.table(Agg, “zOTUTable_Aggregated.csv”)