#Fungi

Begin
# This pipeline was conducted on an external linux server.  
# Open a screen to allow us to run all organism groups in parallel. 
screen -S fungi

# Set the filepath to the location of the raw reads. 
cd /YOUR/FILEPATH/HERE
###
# ADAPTER TRIMMING
### 

# We used the Adapter trimmed reads as supplied by Fasteris, the sequencing provider.
# Trimming was done with Trimmomatic on their side.

###
# FASTQC and MultiQC reports 
###

# Create a subdirectory to store the FASTQC reports.
mkdir ./FASTQC

# Create a fastqc report of the read files, to examine the read quality.
fastqc 211112_SN7280_A_L001_AUXV-6_AdapterTrimmed_R1.fastq.gz -o . &&
fastqc 211112_SN7280_A_L001_AUXV-6_AdapterTrimmed_R2.fastq.gz -o .

# Create a multiqc report to have both quality reports in one html file.
multiqc FASTQC -o FASTQC --interactive

###
#CREATE A FASTA FILE FROM THE BARCODES 
###

# The .txt file consisting of the octamer tags and primers (referred to as barcodes from here on), 
# used for the study was uploaded to the server.

# Take each line of the .txt of the barcodes and adds a line containing info on which sample this barcode belongs to. 
awk '{print">fwd"NR"\n"$0}' /YOUR/FILEPATH/HERE/fungi_fwd_barcodes_big.txt > /YOUR/FILEPATH/HERE/fungi_barcodes_big.fwd.fasta 

# Take each line of the .txt of the barcodes and adds a line containing info on which sample this barcode belongs to. 
awk '{print">rev"NR"\n"$0}' /YOUR/FILEPATH/HERE/fungi_rev_barcodes_big.txt > /YOUR/FILEPATH/HERE/fungi_barcodes_big.rev.fasta 

###
#DEMULTIPLEXING THE READS WITH CUTADAPT
###

# Create a subdirectory for the file that have the barcodes and primers removed. And enter it.
mkdir DEMULTIPLEXED

# Activate the cutadapt environment through conda. 
conda activate cutadaptenv 

# Increase the softlimit of the OS because cutadapt will open a lot of files. 
# One file for each forward and reverse combination. These will be filled with the reads containing the combination.
ulimit -S -n 3000

#Cutadapt Main Commands
# Everything needs to be run twice because the reads are in mixed orientation,
# because of the PCR free library preparation. 
# Because of the dual-indexing approach we need to supply two barcode files. 
cutadapt \
-j 0 \
-e 0.15 --no-indels --minimum-length 50 \
-g file:/YOUR/FILEPATH/HERE/fungi_barcodes_big.fwd.fasta \
-G file:/YOUR/FILEPATH/HERE/fungi_barcodes_big.rev.fasta \
-o ./DEMULTIPLEXED/{name1}-{name2}.round1.1.fastq \
-p ./DEMULTIPLEXED/{name1}-{name2}.round1.2.fastq \
./211112_SN7280_A_L001_AUXV-6_AdapterTrimmed_R1.fastq.gz \
./211112_SN7280_A_L001_AUXV-6_AdapterTrimmed_R2.fastq.gz > fungi_round1_cutadapt.txt &&

cutadapt \
-j 0 \
-e 0.15 --no-indels --minimum-length 50 \
-g file:/YOUR/FILEPATH/HERE/fungi_barcodes_big.fwd.fasta \
-G file:/YOUR/FILEPATH/HERE/fungi_barcodes_big.rev.fasta \
-o ./DEMULTIPLEXED/{name1}-{name2}.round2.1.fastq \
-p ./DEMULTIPLEXED/{name1}-{name2}.round2.2.fastq \
./DEMULTIPLEXED/unknown-unknown.round1.2.fastq \
./DEMULTIPLEXED/unknown-unknown.round1.1.fastq > fungi_round2_cutadapt.txt 

# Close the conda environment.
conda deactivate 

# Before merging the files we need to rename them to make sorting easier and to only include reads that
# are real samples, multiplex controls, blanks and PCR negative controls. The text files were uploaded to the 
# RENAMING/ directory before. 

# We need to enter the directory where we stored the demultiplexed files. 
cd ./DEMULTIPLEXED/

# Then we rename the files. 
paste /YOUR/FILEPATH/HERERENAMING/renaming_R1_1_old_big.txt \
/YOUR/FILEPATH/HERErenaming_R1_1_new_big.txt \
| while read n k; do rename -v $n $k * ; done > ./rename_logR1.1.txt

paste /YOUR/FILEPATH/HERErenaming_R1_2_old_big.txt \
/YOUR/FILEPATH/HERE/renaming_R1_2_new_big.txt \
| while read n k; do rename -v $n $k * ; done > ./rename_logR1.2.txt

paste /YOUR/FILEPATH/HERE/renaming_R2_1_old_big.txt \
 /YOUR/FILEPATH/HERE/RENAMING/renaming_R2_1_new_big.txt \
| while read n k; do rename -v $n $k * ; done > ./rename_logR2.1.txt

paste /YOUR/FILEPATH/HERE/RENAMING/renaming_R2_2_old_big.txt \
 /YOUR/FILEPATH/HERE/RENAMING/renaming_R2_2_new_big.txt \
 | while read n k; do rename -v $n $k * ; done > ./rename_logR2.2.txt

# For use in a later sanity check to see if our merging works, we create a FASTA file of both 
# demultiplexing rounds for one sample to see the read numbers. 
# Transform the fastq file to a fasta file. 
cat A31_B1.round1.1.sample.fastq | \
awk '{if(NR%4==1) {printf(">%s\n",substr($0,2));} else if(NR%4==2) print;}' > A31_B1.round1.1.sample.fa

cat A31_B1.round2.1.sample.fastq | \
awk '{if(NR%4==1) {printf(">%s\n",substr($0,2));} else if(NR%4==2) print;}' > A31_B1.round2.1.sample.fa

# Grep the read numbers by counting the lines beginning with >.
grep -c '^>' *.fa | less 

###
#FILE MERGING FROM THE TWO ROUNDS
###

# Create a subdirectory for the merged files. 
mkdir MERGED

# First create two lists of the filenames for the pairs from the cutadapt results. 

ls -1 *round1.1.sample.fastq | sed 's/round1.1.sample.fastq//' > listround1.1.

ls -1 *round2.1.sample.fastq | sed 's/round2.1.sample.fastq//' > listround2.1.

# Now we can merge the pairs. First for the files from R1.   

paste listround1.1. listround2.1. | while read n k; \
do cat $n"round1.1.sample.fastq" $k"round2.1.sample.fastq" > ./MERGED/$n"sample_demux.1.fastq"; done

# And again for the R2 reads.

ls -1 *round1.2.sample.fastq | sed 's/round1.2.sample.fastq//'  > listround1.2.

ls -1 *round2.2.sample.fastq | sed 's/round2.2.sample.fastq//' > listround2.2.

paste listround1.2. listround2.2. | while read n k; \
do cat $n"round1.2.sample.fastq" $k"round2.2.sample.fastq" > ./MERGED/$n"sample_demux.2.fastq"; done

# To check if the merging has worked, we create a FASTA file of the merged sample 
# and check if the read numbers match the paired files. 

# Transform the fastq file to a fasta file. 
cat ./MERGED/A31_B1.sample_demux.1.fastq | \
 awk '{if(NR%4==1) {printf(">%s\n",substr($0,2));} else if(NR%4==2) print;}' > A31_B1.sample_demux.1.fa

# Grep the read numbers by counting the lines beginning with >.
grep -c '^>' *sample*.fa | less 

# Exit the DEMULTIPLEXED subdirectory. 
cd ..

###
#Removing left over primer sequences.
###

# DADA2 is best run through R. You can find tutorials at https://benjjneb.github.io/dada2/index.html. 
# Therefore we need to initialize an R session on the server. 
# Before moving on to the sample inference we need to remove any leftover primer sequences.

R

# Then we load all required packages. 

library(dada2)
packageVersion('dada2')
library(ShortRead)
packageVersion('ShortRead')
library(Biostrings)
packageVersion('Biostrings')

# Create objects that contain the primer sequences.  
# For the fungi that is:
FWD <- 'GTGARTCATCGAATCTTTG'
REV <- 'TCCTCCGCTTATTGATATGC'

# Make a custom function that creates all the possible orientations of the primers e.g. complement, reverse complement.
allOrients <- function(primer) {
  require(Biostrings)
  # Create all orientations of the input sequence
  dna     <- DNAString(primer)  # turn character to DNAString object
  orients <- c(Forward=dna, Complement=complement(dna), Reverse=reverse(dna),
               RevComp=reverseComplement(dna))
  return(sapply(orients, toString))  # back to character vector
}

# Make and save the orientation files.
FWD.orients <- allOrients(FWD)
FWD.orients

REV.orients <- allOrients(REV)
REV.orients

# Load in the demultiplexed files. 
fnFs <- sort(list.files(path = './DEMULTIPLEXED/MERGED', pattern = "sample_demux.1.fastq", full.names = TRUE))
fnRs <- sort(list.files(path = './DEMULTIPLEXED/MERGED', pattern = "sample_demux.2.fastq", full.names = TRUE))

# Filter out ambiguous Ns with the filterAndTrim function setting maxN to zero.
# Put the N-filtered files into a filtN/ subdirectory.
fnFs.filtN <- file.path(path = './DEMULTIPLEXED/MERGED', "filtN", basename(fnFs)) 
fnRs.filtN <- file.path(path = './DEMULTIPLEXED/MERGED', "filtN", basename(fnRs))
filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE)

# Check for any leftover primers after the removal with Cutadapt.
# If the samples come from the same library prep then it is enough to only process one of the files 
# (see the [1] at the end of the command). 

# Create a function that counts number of reads in which the primer is found.
primerHits <- function(primer, fn) {
        nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
    return(sum(nhits > 0))
}

# Search through all the reads and combine in a dataframe.
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]), 
    FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.filtN[[1]]), 
    REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.filtN[[1]]), 
    REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))
	
# A lot of primer sequences left for the fungi.
# This is somehow expected because of a higher variability in the ITS length and therefore more read through during sequencing 	

# Leave R. 
quit(save = 'no')

# Open the cutadapt environment.
# Remove leftover primers with Cutadapt
conda activate cutadaptenv

# Create a subdirectory for the first primer removal.
mkdir PRIMER_REMOVED1 

# Enter the MERGED subdirectory.
cd DEMULTIPLEXED/MERGED/

# The command below contains all possible orientations: 
# 1) fwd-rcrev + rev-rcfwd; 2) rcfwd-rev + rcrev-fwd; \
# 3) fwd + rcfwd; 4) rcfwd + fwd; 5) rev + rcrev; 6) rcrev + rev

# Use a for loop to run Cutadapt over all samples. 
ls *demu*.fastq | cut -f1 -d'.' > samples

for sample in $(cat samples); do

echo "On sample: $sample"

cutadapt --cores=0 --minimum-length 50 \
 -a ^GTGARTCATCGAATCTTTG...GCATATCAATAAGCGGAGGA -A ^TCCTCCGCTTATTGATATGC...CAAAGATTCGATGAYTCAC \
 -a ^CAAAGATTCGATGAYTCAC...TCCTCCGCTTATTGATATGC -A ^GCATATCAATAAGCGGAGGA...GTGARTCATCGAATCTTTG \
 -a GTGARTCATCGAATCTTTG -A CAAAGATTCGATGAYTCAC \
 -a CAAAGATTCGATGAYTCAC -A GTGARTCATCGAATCTTTG \
 -a TCCTCCGCTTATTGATATGC -A GCATATCAATAAGCGGAGGA \
 -a GCATATCAATAAGCGGAGGA -A TCCTCCGCTTATTGATATGC \
 -o ../../PRIMER_REMOVED1/${sample}.sample_demux_prirm.1.fastq \
 -p ../../PRIMER_REMOVED1/${sample}.sample_demux_prirm.2.fastq \
 ${sample}.sample_demux.1.fastq ${sample}.sample_demux.2.fastq \
 > cutadapt_primer_trimming_stats_{sample}.txt

done 

# Close the conda environment.
conda deactivate 

# Exit the MERGED directory and go to our base directory.
cd ../..

# Run a second check for primers in R.

R

# Load required packages. 

library(dada2)
packageVersion('dada2')
library(ShortRead)
packageVersion('ShortRead')
library(Biostrings)
packageVersion('Biostrings')

# Create objects that contain the primer sequences.  
# For the fungi that is:
FWD <- 'GTGARTCATCGAATCTTTG'
REV <- 'TCCTCCGCTTATTGATATGC'

# Make a custom function that creates all the possible orientations of the primers e.g. complement, reverse complement.
allOrients <- function(primer) {
  require(Biostrings)
  # Create all orientations of the input sequence
  dna     <- DNAString(primer)  # turn character to DNAString object
  orients <- c(Forward=dna, Complement=complement(dna), Reverse=reverse(dna),
               RevComp=reverseComplement(dna))
  return(sapply(orients, toString))  # back to character vector
}

# Make and save the orientation files.
FWD.orients <- allOrients(FWD)
FWD.orients

REV.orients <- allOrients(REV)
REV.orients

# Load in the files with primers removed. 

fnFs <- sort(list.files(path = './PRIMER_REMOVED1', pattern = "sample_demux_prirm.1.fastq", full.names = TRUE))
fnRs <- sort(list.files(path = './PRIMER_REMOVED1', pattern = "sample_demux_prirm.2.fastq", full.names = TRUE))

# Filter out ambiguous Ns with the filterAndTrim function setting maxN to zero.

fnFs.filtN <- file.path(path = './PRIMER_REMOVED1', "filtN", basename(fnFs)) # Put N-filterd files in filtN/ subdirectory
fnRs.filtN <- file.path(path = './PRIMER_REMOVED1', "filtN", basename(fnRs))
filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE)

# Check for any leftover primers after the removal with Cutadapt.
# If the samples come from the same library prep then it is enough to only process one of the files 
# (see the [1] at the end of the command). 

# Create a function that counts number of reads in which the primer is found.
primerHits <- function(primer, fn) {
    # Counts number of reads in which the primer is found
    nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
    return(sum(nhits > 0))
}

# Search through all the reads and combine in a dataframe.
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]), 
    FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.filtN[[1]]), 
    REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.filtN[[1]]), 
    REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))
	
# Primer removal was successful for fungi. We can move on to DADA2.  

###
# Sample inference with DADA2
###

# For the fungi we need to use the _prirm2.fastq files. 

cutFs_1 <- sort(list.files("./PRIMER_REMOVED1", pattern = "1.sample_demux_prirm.1.fastq", full.names = TRUE))
cutRs_1 <- sort(list.files("./PRIMER_REMOVED1", pattern = "1.sample_demux_prirm.2.fastq", full.names = TRUE))

cutFs_2 <- sort(list.files("./PRIMER_REMOVED1", pattern = "2.sample_demux_prirm.1.fastq", full.names = TRUE))
cutRs_2 <- sort(list.files("./PRIMER_REMOVED1", pattern = "2.sample_demux_prirm.2.fastq", full.names = TRUE))

cutFs_3 <- sort(list.files("./PRIMER_REMOVED1", pattern = "3.sample_demux_prirm.1.fastq", full.names = TRUE))
cutRs_3 <- sort(list.files("./PRIMER_REMOVED1", pattern = "3.sample_demux_prirm.2.fastq", full.names = TRUE))

# Create a function to obtain the sample names. "1.s" serves as the point at which the string will be split. 
get.sample.name <- function(fname) strsplit(basename(fname), "1.s")[[1]][1]

# Get the sample names.
sample.names <- unname(sapply(cutFs_1, get.sample.name))
head(sample.names)

# Filter and trim the reads.

# Assign filenames for the output of the filtered reads. 
filtFs_1 <- file.path("./PRIMER_REMOVED1", "filtered", basename(cutFs_1))
filtRs_1 <- file.path("./PRIMER_REMOVED1", "filtered", basename(cutRs_1))

filtFs_2 <- file.path("./PRIMER_REMOVED1", "filtered", basename(cutFs_2))
filtRs_2 <- file.path("./PRIMER_REMOVED1", "filtered", basename(cutRs_2))

filtFs_3 <- file.path("./PRIMER_REMOVED1", "filtered", basename(cutFs_3))
filtRs_3 <- file.path("./PRIMER_REMOVED1", "filtered", basename(cutRs_3))

# Apply the filtering parameters , no truncLen because these are ITS reads
# and therefore very variable in length, changed maxEE to 6,6 since the libraries are in mixed orientation.
out_1 <- filterAndTrim(cutFs_1, filtFs_1, cutRs_1, filtRs_1, maxN = 0, maxEE = c(6, 6), 
    truncQ = 2, minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE)

out_2 <- filterAndTrim(cutFs_2, filtFs_2, cutRs_2, filtRs_2, maxN = 0, maxEE = c(6, 6), 
    truncQ = 2, minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE)
	
out_3 <- filterAndTrim(cutFs_3, filtFs_3, cutRs_3, filtRs_3, maxN = 0, maxEE = c(6, 6), 
    truncQ = 2, minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE)	

# Check if a good number of reads passed the quality filtering. We are filtering out ~30% which is okay. 
head(out_1)
head(out_2)
head(out_3)

# Learn the error rates for the R1 reads. 
errF_1 <- learnErrors(filtFs_1, multithread = TRUE)

errF_2 <- learnErrors(filtFs_2, multithread = TRUE)

errF_3 <- learnErrors(filtFs_3, multithread = TRUE)

# Learn the error rates for the R2 reads. 
errR_1 <- learnErrors(filtRs_1, multithread = TRUE)

errR_2 <- learnErrors(filtRs_2, multithread = TRUE)

errR_3 <- learnErrors(filtRs_3, multithread = TRUE)

# De-replicate identical reads. 

derepFs_1 <- derepFastq(filtFs_1, verbose = TRUE)
derepRs_1 <- derepFastq(filtRs_1, verbose = TRUE)

derepFs_2 <- derepFastq(filtFs_2, verbose = TRUE)
derepRs_2 <- derepFastq(filtRs_2, verbose = TRUE)

derepFs_3 <- derepFastq(filtFs_3, verbose = TRUE)
derepRs_3 <- derepFastq(filtRs_3, verbose = TRUE)

# Name the derep-class objects by the sample names.

names(derepFs_1) <- sample.names
names(derepRs_1) <- sample.names

names(derepFs_2) <- sample.names
names(derepRs_2) <- sample.names

names(derepFs_3) <- sample.names
names(derepRs_3) <- sample.names

# Run the DADA2 core algorithm for the sample inference. 
dadaFs_1 <- dada(derepFs_1, err = errF_1, multithread = TRUE)
dadaRs_1 <- dada(derepRs_1, err = errR_1, multithread = TRUE)

dadaFs_2 <- dada(derepFs_2, err = errF_2, multithread = TRUE)
dadaRs_2 <- dada(derepRs_2, err = errR_2, multithread = TRUE)

dadaFs_3 <- dada(derepFs_3, err = errF_3, multithread = TRUE)
dadaRs_3 <- dada(derepRs_3, err = errR_3, multithread = TRUE)

# Merge the paired reads within the replications.
mergers_1 <- mergePairs(dadaFs_1, derepFs_1, dadaRs_1, derepRs_1, verbose=TRUE)

mergers_2 <- mergePairs(dadaFs_2, derepFs_2, dadaRs_2, derepRs_2, verbose=TRUE)

mergers_3 <- mergePairs(dadaFs_3, derepFs_3, dadaRs_3, derepRs_3, verbose=TRUE)

# Construct the ASV table per replicate. 
seqtab_1 <- makeSequenceTable(mergers_1)
dim(seqtab_1)

seqtab_2 <- makeSequenceTable(mergers_2)
dim(seqtab_2)

seqtab_3 <- makeSequenceTable(mergers_3)
dim(seqtab_3)

# Chimera removal for the replicates. 
seqtab_1.nochim <- removeBimeraDenovo(seqtab_1, method="consensus", multithread=TRUE, verbose=TRUE)

seqtab_2.nochim <- removeBimeraDenovo(seqtab_2, method="consensus", multithread=TRUE, verbose=TRUE)

seqtab_3.nochim <- removeBimeraDenovo(seqtab_3, method="consensus", multithread=TRUE, verbose=TRUE)

# Check for reverse complement synthetic diversity. 
# Because the libraries are in mixed orientation we need to check for identical sequences, 
# which are read in reverse complement orientation.
sq_1 <- getSequences(seqtab_1.nochim)
sq.rc_1 <- dada2:::rc(sq_1)
rcdupes_1 <- sapply(seq_along(sq_1), function(i) {
    sq.rc_1[[i]] %in% sq_1[1:(i-1)]
})

sq_2 <- getSequences(seqtab_2.nochim)
sq.rc_2 <- dada2:::rc(sq_2)
rcdupes_2 <- sapply(seq_along(sq_2), function(i) {
    sq.rc_2[[i]] %in% sq_2[1:(i-1)]
})

sq_3 <- getSequences(seqtab_3.nochim)
sq.rc_3 <- dada2:::rc(sq_3)
rcdupes_3 <- sapply(seq_along(sq_3), function(i) {
    sq.rc_3[[i]] %in% sq_3[1:(i-1)]
})

# Merge the forward and reverse-complement reads.
colnames(seqtab_1.nochim)[rcdupes_1] <- dada2:::rc(colnames(seqtab_1.nochim)[rcdupes_1])
stm_1 <- collapseNoMismatch(seqtab_1.nochim)

colnames(seqtab_2.nochim)[rcdupes_2] <- dada2:::rc(colnames(seqtab_2.nochim)[rcdupes_2])
stm_2 <- collapseNoMismatch(seqtab_2.nochim)

colnames(seqtab_3.nochim)[rcdupes_3] <- dada2:::rc(colnames(seqtab_3.nochim)[rcdupes_3])
stm_3 <- collapseNoMismatch(seqtab_3.nochim)

# Merge the two ASV tables.
# Put the tables in a list before merging them with mergeSequenceTables.
input_tables <- list(stm_1, stm_2, stm_3)

seqtab_merge <- mergeSequenceTables(tables = input_tables, repeats = 'sum', tryRC = TRUE)

# An additional run to remove chimeric sequences. 
seqtab_merge.nochim <- removeBimeraDenovo(seqtab_merge, method="consensus", multithread=TRUE, verbose=TRUE)

# Inspect the sequence lengths to look for anything that seems suspicious. Not the case here.
table(nchar(getSequences(seqtab_merge.nochim)))

# Give the sequence variants more manageable names, e.g. ASVn  
asv_seqs <- colnames(seqtab_merge.nochim)
asv_headers <- vector(dim(seqtab_merge.nochim)[2], mode="character")

for (i in 1:dim(seqtab_merge.nochim)[2]) {
  asv_headers[i] <- paste(">ASV", i, sep="_")
}

# Track the reads through the pipeline.
# This is a last sanity check to see if we are not loosing samples at an unexpected step. 

getN <- function(x) sum(getUniques(x))
track_1 <- cbind(out_1, sapply(dadaFs_1, getN), sapply(dadaRs_1, getN), sapply(mergers_1, getN), rowSums(seqtab_1.nochim))
# If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs)
colnames(track_1) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track_1) <- sample.names

track_2 <- cbind(out_2, sapply(dadaFs_2, getN), sapply(dadaRs_2, getN), sapply(mergers_2, getN), rowSums(seqtab_2.nochim))
# If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs)
colnames(track_2) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track_2) <- sample.names

track_3 <- cbind(out_3, sapply(dadaFs_3, getN), sapply(dadaRs_3, getN), sapply(mergers_3, getN), rowSums(seqtab_3.nochim))
# If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs)
colnames(track_3) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track_3) <- sample.names

head(track_1)
head(track_2)
head(track_3)

# Make and save a fasta of our final ASV sequences.
asv_fasta <- c(rbind(asv_headers, asv_seqs))
write(asv_fasta, "ASVs_fungi.fa")

# Write our chimera screened and merge sequenc table to a text file. This will be our ASV table to work with from here. 
write.table(t(seqtab_merge.nochim), "asv_table_fungi.txt", sep="\t")

# Also save it as an .rds file to not corrupt any structure when we need to reload the table.
saveRDS(seqtab_merge.nochim, 'asv_table_fungi.rds')

###
# Taxonomy assignment.
###

# Read in the UNITE database fasta.  
unite.ref <- './sh_general_release_dynamic_27.10.2022.fasta'  

# Run the taxonomy assignment on the ASV table. 
taxa <- assignTaxonomy(seqtab_merge.nochim, unite.ref, multithread = TRUE, tryRC = TRUE)

# Save the taxonomy table as an R object.
saveRDS(taxa, 'tax_table_fungi_new.rds')

quit(save = 'yes')

###
#Matchlist creation for LULU curation
###

# Open the conda environment containing blast. 
conda activate blastenv

# Create a database containing the algal ASVs in fasta format.
makeblastdb -in ASVs_fungi.fa -parse_seqids -dbtype nucl

# Compare all ASVs to each other and create a match list of sequences that are similar to each other.
blastn -db ASVs_fungi.fa \
-outfmt '6 qseqid sseqid pident' \
-out match_list_fungi.txt \
-qcov_hsp_perc 80 \
-perc_identity 84 \
-num_threads 20 \
-query ASVs_fungi.fa

conda deactivate

# Move to R on the local machine to run decontam and the LULU curation.
