Published July 27, 2017 | Version v2
Dataset Open

RDP LSU taxonomic training data formatted for DADA2 (trainingset 11)

  • 1. Duke University

Description

#Format RDP taxonomic training set for DADA2
#1  Wrangle the RDP trainingsets and unaligned data into the downloads folder by executing this from a terminal and move the file somewhere with >30GB free
wget https://sourceforge.net/projects/rdp-classifier/files/RDP_Classifier_TrainingData/RDPClassifier_fungiLSU_trainsetNo11_rawtrainingdata.zip/download
wget http://rdp.cme.msu.edu/download/current_Fungi_unaligned.fa.gz
#2  Unzip the trainingset file and replace Us with Ts in the fasta by executing in terminal
awk 'NR%2==0 {gsub(/[uU]/,"T"); print} NR%2==1' /media/lauren/96BA-19E6/RDPClassifier_fungiLSU_trainsetNo11_rawtrainingdata/fungiLSU_train_012014.fa > /media/lauren/96BA-19E6/RDPClassifier_fungiLSU_trainsetNo11_rawtrainingdata/fungiLSU_train_012014_lsu_fixed_v2.fa
#3  Summon the dada2 pkg
library(dada2);packageVersion("dada2")
#4  Transform the DADA2 formatted training fastas
path<-"/media/lauren/96BA-19E6/RDPClassifier_fungiLSU_trainsetNo11_rawtrainingdata"
dada2:::makeTaxonomyFasta_RDP(file.path(path, "fungiLSU_train_012014_lsu_fixed_v2.fa"), file.path(path, "fungiLSU_taxid_012014.txt"),"/media/lauren/96BA-19E6/Upload/RDP_LSU_fixed_train_set_v2.fa",compress=FALSE)
dada2:::makeSpeciesFasta_RDP("/media/lauren/96BA-19E6/RDPClassifierLSU/current_Fungi_unaligned.fa", "/media/lauren/96BA-19E6/Upload/rdp_species_assignment_LSU_v2.fa", compress=FALSE)

#5 Make the compressed DADA2 formatted training fastas in gz and zip format
dada2:::makeTaxonomyFasta_RDP("/media/lauren/96BA-19E6/RDPClassifier_fungiLSU_trainsetNo11_rawtrainingdata/fungiLSU_train_012014_lsu_fixed_v2.fa", file.path(path, "fungiLSU_taxid_012014.txt"),"/media/lauren/96BA-19E6/Upload/RDP_LSU_fixed_train_set_v2.fa.gz",compress=TRUE)
dada2:::makeTaxonomyFasta_RDP("/media/lauren/96BA-19E6/RDPClassifier_fungiLSU_trainsetNo11_rawtrainingdata/fungiLSU_train_012014_lsu_fixed_v2.fa", file.path(path, "fungiLSU_taxid_012014.txt"),"/media/lauren/96BA-19E6/Upload/RDP_LSU_fixed_train_set_v2.fa.zip",compress=TRUE)
dada2:::makeSpeciesFasta_RDP("/media/lauren/96BA-19E6/RDPClassifierLSU/current_Fungi_unaligned.fa", "/media/lauren/96BA-19E6/Upload/rdp_species_assignment_LSU_v2.fa.gz", compress=TRUE)
dada2:::makeSpeciesFasta_RDP("/media/lauren/96BA-19E6/RDPClassifierLSU/current_Fungi_unaligned.fa", "/media/lauren/96BA-19E6/Upload/rdp_species_assignment_LSU_v2.fa.zip", compress=TRUE)

 

Notes

This work was supported by NIEHS grant P42-ES010356

Files

RDP_LSU_fixed_train_set_v2.fa.zip

Files (86.7 MB)

Name Size Download all
md5:d70126d4a6a43cd0eb3bfbf0d58d5419
16.6 MB Download
md5:50c03d2cb15361ea83fa5573e75b4114
1.9 MB Download
md5:50c03d2cb15361ea83fa5573e75b4114
1.9 MB Preview Download
md5:592eb1aaca80c80a3083cb27bd4b297c
63.0 MB Download
md5:40e369c21eba15e006db84680ab3ba2e
1.6 MB Download
md5:40e369c21eba15e006db84680ab3ba2e
1.6 MB Preview Download