## malaise.fasta

/home/nnoll/installed_programs/vsearch-2.14.1X-linux-x86_64/bin/vsearch --usearch_global malaise.fasta --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout taxalogue_ids.tsv --userfields query+target+id


gzip -d MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW.fasta.gz
perl5.16.3 fasta_taxon_filter_RAW.pl list_for_MIDORI.txt MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW.fasta


/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global malaise.fasta --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout midori_ids.tsv --userfields query+target+id

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global malaise.fasta --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout malaise_tidybug_ids.tsv --userfields query+target+id

ruby filter_for_taxon.rb Porter_trainseq.fas Arthropoda > Porter_trainseq_Arthropoda.fas

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global malaise.fasta --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout malaise_porter_ids.tsv --userfields query+target+id

ruby extract_otu_seqs_from_lysis_tsv.rb lysis_OTU_table_RAW.tsv > germany_lysis_RAW.fas



## germany_lysis_RAW.fas

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global germany_lysis_RAW.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout germany_lysis_RAW_taxalogue_ids.tsv --userfields query+target+id &> germany_lysis_RAW_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global germany_lysis_RAW.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout germany_lysis_RAW_midori_ids.tsv --userfields query+target+id &> germany_lysis_RAW_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global germany_lysis_RAW.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout germany_lysis_RAW_tidybug_ids.tsv --userfields query+target+id &> germany_lysis_RAW_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global germany_lysis_RAW.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout germany_lysis_RAW_porter_ids.tsv --userfields query+target+id &> germany_lysis_RAW_porter_ids.log



mv COI_366_OTU_centroids_ANDREWDOPHEIDE.fasta new_zealand.fas
## new_zealand.fas

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global new_zealand.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout new_zealand_taxalogue_ids.tsv --userfields query+target+id &> new_zealand_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global new_zealand.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout new_zealand_midori_ids.tsv --userfields query+target+id &> new_zealand_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global new_zealand.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout new_zealand_tidybug_ids.tsv --userfields query+target+id &> new_zealand_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global new_zealand.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout new_zealand_porter_ids.tsv --userfields query+target+id &> new_zealand_porter_ids.log



## honduras.fas

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global honduras.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout honduras_taxalogue_ids.tsv --userfields query+target+id &> honduras_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global honduras.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout honduras_midori_ids.tsv --userfields query+target+id &> honduras_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global honduras.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout honduras_tidybug_ids.tsv --userfields query+target+id &> honduras_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global honduras.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout honduras_porter_ids.tsv --userfields query+target+id &> honduras_porter_ids.log



mv Cdn_benthos_454_200bp.fasta canada.fas
mv canada.fas canada_benthos.fas



## canada_benthos.fas

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_benthos.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_benthos_taxalogue_ids.tsv --userfields query+target+id &> canada_benthos_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_benthos.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_benthos_midori_ids.tsv --userfields query+target+id &> canada_benthos_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_benthos.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_benthos_tidybug_ids.tsv --userfields query+target+id &> canada_benthos_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_benthos.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_benthos_porter_ids.tsv --userfields query+target+id &> canada_benthos_porter_ids.log



mv malaise_midori_THID.tsv costa_rica_malaise_midori_THID.tsv
mv malaise_taxalogue_THID.tsv costa_rica_malaise_taxalogue_THID.tsv
mv malaise_porter_THID.tsv costa_rica_malaise_porter_THID.tsv
mv malaise_tidybug_THID.tsv costa_rica_malaise_tidybug_THID.ts



ruby count_THID.rb canada_benthos_midori_ids.tsv > canada_benthos_midori_THID.tsv
ruby count_THID.rb canada_benthos_porter_ids.tsv > canada_benthos_porter_THID.tsv
ruby count_THID.rb canada_benthos_taxalogue_ids.tsv > canada_benthos_taxalogue_THID.tsv
ruby count_THID.rb canada_benthos_tidybug_ids.tsv > canada_benthos_tidybug_THID.tsv
ruby count_THID.rb germany_lysis_RAW_midori_ids.tsv > germany_lysis_RAW_midori_THID.tsv
ruby count_THID.rb germany_lysis_RAW_porter_ids.tsv > germany_lysis_RAW_porter_THID.tsv
ruby count_THID.rb germany_lysis_RAW_taxalogue_ids.tsv > germany_lysis_RAW_taxalogue_THID.tsv
ruby count_THID.rb germany_lysis_RAW_tidybug_ids.tsv > germany_lysis_RAW_tidybug_THID.tsv
ruby count_THID.rb honduras_midori_ids.tsv > honduras_midori_THID.tsv
ruby count_THID.rb honduras_porter_ids.tsv > honduras_porter_THID.tsv
ruby count_THID.rb honduras_taxalogue_ids.tsv > honduras_taxalogue_THID.tsv
ruby count_THID.rb honduras_tidybug_ids.tsv > honduras_tidybug_THID.tsv
ruby count_THID.rb malaise_midori_ids.tsv > costa_rica_malaise_midori_THID.tsv
ruby count_THID.rb malaise_porter_ids.tsv > costa_rica_malaise_porter_THID.tsv
ruby count_THID.rb malaise_taxalogue_ids.tsv > costa_rica_malaise_taxalogue_THID.tsv
ruby count_THID.rb malaise_tidybug_ids.tsv > costa_rica_malaise_tidybug_THID.tsv
ruby count_THID.rb new_zealand_midori_ids.tsv > new_zealand_midori_THID.tsv
ruby count_THID.rb new_zealand_porter_ids.tsv > new_zealand_porter_THID.tsv
ruby count_THID.rb new_zealand_taxalogue_ids.tsv > new_zealand_taxalogue_THID.tsv
ruby count_THID.rb new_zealand_tidybug_ids.tsv > new_zealand_tidybug_THID.tsv



mv light_trap_portugal.fas portugal_light_trap.fas
mv malaise_canada.fas canada_malaise.fas
mv malaise_china.fas china_malaise.fas
mv germany_lysis_RAW_midori_THID.tsv germany_malaise_RAW_midori_THID.tsv
mv germany_malaise_RAW_porter_THID.tsv
mv germany_lysis_RAW_porter_THID.tsv germany_malaise_RAW_porter_THID.tsv
mv germany_lysis_RAW_taxalogue_THID.tsv germany_malaise_RAW_taxalogue_THID.tsv
mv germany_lysis_RAW_tidybug_THID.tsv germany_malaise_RAW_tidybug_THID.tsv



## china_malaise.fas
/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise_taxalogue_ids.tsv --userfields query+target+id &> china_malaise_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise_midori_ids.tsv --userfields query+target+id &> china_malaise_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise_tidybug_ids.tsv --userfields query+target+id &> china_malaise_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise_porter_ids.tsv --userfields query+target+id &> china_malaise_porter_ids.log


## canada_malaise.fas
/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_malaise.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_malaise_taxalogue_ids.tsv --userfields query+target+id &> canada_malaise_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_malaise.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_malaise_midori_ids.tsv --userfields query+target+id &> canada_malaise_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_malaise.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_malaise_tidybug_ids.tsv --userfields query+target+id &> canada_malaise_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global canada_malaise.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout canada_malaise_porter_ids.tsv --userfields query+target+id &> canada_malaise_porter_ids.log


## portugal_light_trap.fas
/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global portugal_light_trap.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout portugal_light_trap_taxalogue_ids.tsv --userfields query+target+id &> portugal_light_trap_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global portugal_light_trap.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout portugal_light_trap_midori_ids.tsv --userfields query+target+id &> portugal_light_trap_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global portugal_light_trap.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout portugal_light_trap_tidybug_ids.tsv --userfields query+target+id &> portugal_light_trap_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global portugal_light_trap.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout portugal_light_trap_porter_ids.tsv --userfields query+target+id &> portugal_light_trap_porter_ids.log


## china_malaise2.fas
/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise2.fas --db Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise2_taxalogue_ids.tsv --userfields query+target+id &> china_malaise2_taxalogue_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise2.fas --db MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise2_midori_ids.tsv --userfields query+target+id &> china_malaise2_midori_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise2.fas --db boldCOI.derep.fasta --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise2_tidybug_ids.tsv --userfields query+target+id &> china_malaise2_tidybug_ids.log

/home/nnoll/installed_programs/vsearch-2.14.1-linux-x86_64/bin/vsearch --usearch_global china_malaise2.fas --db Porter_trainseq_Arthropoda.fas --threads 32 --strand plus --id 0.7 --maxaccepts 8 --maxrejects 128 --top_hits_only --maxhits 1 --userout china_malaise2_porter_ids.tsv --userfields query+target+id &> china_malaise2_porter_ids.log



ruby count_THID.rb portugal_light_trap_midori_ids.tsv > portugal_light_trap_midori_THID.tsv
ruby count_THID.rb portugal_light_trap_porter_ids.tsv > portugal_light_trap_porter_THID.tsv
ruby count_THID.rb portugal_light_trap_taxalogue_ids.tsv > portugal_light_trap_taxalogue_THID.tsv
ruby count_THID.rb portugal_light_trap_tidybug_ids.tsv > portugal_light_trap_tidybug_THID.tsv
ruby count_THID.rb canada_malaise_midori_ids.tsv > canada_malaise_midori_THID.tsv
ruby count_THID.rb canada_malaise_porter_ids.tsv > canada_malaise_porter_THID.tsv
ruby count_THID.rb canada_malaise_taxalogue_ids.tsv > canada_malaise_taxalogue_THID.tsv
ruby count_THID.rb canada_malaise_tidybug_ids.tsv > canada_malaise_tidybug_THID.tsv
ruby count_THID.rb china_malaise_midori_ids.tsv > china_malaise_midori_THID.tsv
ruby count_THID.rb china_malaise_porter_ids.tsv > china_malaise_porter_THID.tsv
ruby count_THID.rb china_malaise_taxalogue_ids.tsv > china_malaise_taxalogue_THID.tsv
ruby count_THID.rb china_malaise_tidybug_ids.tsv > china_malaise_tidybug_THID.tsv
ruby count_THID.rb china_malaise2_midori_ids.tsv > china_malaise2_midori_THID.tsv
ruby count_THID.rb china_malaise2_porter_ids.tsv > china_malaise2_porter_THID.tsv
ruby count_THID.rb china_malaise2_taxalogue_ids.tsv > china_malaise2_taxalogue_THID.tsv
ruby count_THID.rb china_malaise2_tidybug_ids.tsv > china_malaise2_tidybug_THID.tsv



ruby get_subsamples.rb Arthropoda_derep_all_output_uc_STPf_dg_CONTf_Nf3_Lf400_1569_ConsM_TaxR.fas
ruby get_subsamples.rb Porter_trainseq_Arthropoda.fas
ruby get_subsamples.rb MIDORI_UNIQ_SP_NUC_GB248_CO1_RAW_target.fasta
ruby get_subsamples.rb boldCOI.derep.fasta


./THID_qsub.sh THID_command_subsamples_vs_tidybug.sh
./THID_qsub.sh THID_command_subsamples_vs_midori.sh
./THID_qsub.sh THID_command_subsamples_vs_porter.sh
./THID_qsub.sh THID_command_subsamples_vs_taxalogue.sh



./run_count_thid.sh



ruby combine_THID_results.rb .
ruby combine_ref_vs_ref_THID_results.rb .
