Data Wrangling

# Get the data files from package
input_files <- pkg.data$input_files
biom_file <- input_files$biom_files$silva # Path to silva biom file
tree_file <- input_files$tree_files$silva # Path to silva tree file
metadata_file <- input_files$metadata$two_groups # Path to Nephele metadata
parse_func <- parse_taxonomy_silva_128 # A custom phyloseq parsing function for silva annotations

# Get the phyloseq object
phy_obj <- create_phyloseq(biom_file = biom_file, 
                           tree_file = tree_file, 
                           metadata_file = metadata_file,
                           parse_func = parse_func)

# Get the taxmap object in the raw format
raw_metacoder <- as_MicrobiomeR_format(obj = phy_obj, format = "raw_format")

Taxmap Filtering

Metacoder is a highly useful package that comes with tons of features right out of the box. Functions such as metacoder::filter_ambiguous_taxa(), taxa::filter_taxa(), and taxa::filter_obs() for instance can almost always be used in your workflow.

# Remove Archaea from the taxmap object
metacoder_obj <- taxa::filter_taxa(obj = raw_metacoder,
                                   taxon_names == "Archaea",
                                   subtaxa = TRUE,
                                   invert = TRUE)

# Ambiguous Annotation Filter - Remove taxonomies with ambiguous names
metacoder_obj <- metacoder::filter_ambiguous_taxa(metacoder_obj, 
                                                  subtaxa = TRUE)

Basic Filtering

Three functions are provided by MicrobiomeR that do some basic filtering:

These functions all take the same parameters, most noteably a transformation function (.f_transform), a filtering function (.f_filter), and a coditional function (.f_condition).

# Low Sample Filter - Remove the low samples
# The sample filter should generally be implemented first
metacoder_obj <- sample_id_filter(obj = metacoder_obj,
                               .f_filter = ~sum(.),
                               .f_condition = ~.>= 20, 
                               validated = TRUE)

Advanced Filtering

The advanced filtering functions available with MicrobiomeR do several things. They wrap the basic filtering functions mentioned above, they wrap common metacoder and taxa functions, and they mimic the tools found in the phyloseq package. Below I’ve mentioned some of them and how they relate to the phyloseq package:

# Master Threshold Filter - Add the otu_proportions table and then filter OTUs based on min %
metacoder_obj <- otu_proportion_filter(obj = metacoder_obj,
                                       otu_percentage = 0.00001)

# Taxon Prevalence Filter - Add taxa_abundance and taxa_proportions and then filter OTUs that do not
# appear more than a certian amount of times in a certain percentage of samples at the specified
# agglomerated rank.  This is considered a supervised method, because it relies on intermediate
# taxonomies to filter the data.
# The default minimum abundance is 5 and the sample percentage is 0.5 (5%).
# Phylum
metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj,
                                        rank = "Phylum")
# Class
metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj,
                                        rank = "Class", 
                                        validated = TRUE)
# Order
metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj,
                                        rank = "Order", 
                                        validated = TRUE)

# OTU Prevalence Filter - Filter OTUs that do not appear more than a certian amount of times in a
# certain percentage of samples.  This is considered an unsupervised method, because it relies only
# on the leaf OTU ids to filter the data.
metacoder_obj <- otu_prevalence_filter(obj = metacoder_obj, 
                                       validated = TRUE)

# Coefficient of Variation Filter - Filter OTUs based on the coefficient of variation
metacoder_obj <- cov_filter(obj = metacoder_obj,
                            coefficient_of_variation = 3, 
                            validated = TRUE)

Other Filtering

As mentioned previously taxmap filtering can be done in any way that fits your needs with the taxa and metacoder packages. However, MicrobiomeR also provides some utility based function for filtering/manipulating your observation data by hand. Observation data can be accessed within the taxmap object. Make sure you don’t manipulate existing data inside of your taxmap object unless you’re absolutely sure you know what you’re doing.

Transposing

# Get the abundance tables from the taxmap object
taxa_abund <- metacoder_obj$data$taxa_abundance
otu_abund <- metacoder_obj$data$otu_abundance

# Transposing with one ID (taxon_id)
taxa_abund %>% transposer(ids = "taxon_id", header_name = "samples")
#> # A tibble: 48 x 176
#>    samples     ac    af    ag    ah    ai    aj    ak    al    an    ao
#>    <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 Sample~ 112582 53903  4217  4237    68 46540  1794    11  1524    51
#>  2 Sample~  88194 56426   480   750    23 30145    17     7    38     0
#>  3 Sample~  74933 39952   557  7138    21 26932    73     5    49     0
#>  4 Sample~  80796 41912   629  3234    14 34738   189    12     0     0
#>  5 Sample~ 116806 51677  1111  1207    13 62301    47    27   109    11
#>  6 Sample~  90122 33032   350     9     0 56274     0    11     0     0
#>  7 Sample~ 129778 61153  2346  2882    58 62494    12    24   338   101
#>  8 Sample~ 113377 45411   332  1369     3 65815   228    31    16     1
#>  9 Sample~ 134396 52332  3202  1560    44 76700    98    43   143    28
#> 10 Sample~ 112240 41003  4624   518    54 65317   276    13   300    63
#> # ... with 38 more rows, and 165 more variables: aq <dbl>, as <dbl>,
#> #   at <dbl>, av <dbl>, aw <dbl>, ay <dbl>, az <dbl>, ba <dbl>, bb <dbl>,
#> #   bc <dbl>, bd <dbl>, be <dbl>, bf <dbl>, bg <dbl>, bi <dbl>, bk <dbl>,
#> #   bl <dbl>, bm <dbl>, bo <dbl>, br <dbl>, bs <dbl>, bu <dbl>, bv <dbl>,
#> #   bx <dbl>, by <dbl>, bz <dbl>, cb <dbl>, cc <dbl>, ce <dbl>, cf <dbl>,
#> #   ch <dbl>, ci <dbl>, cj <dbl>, cl <dbl>, co <dbl>, cq <dbl>, cr <dbl>,
#> #   cs <dbl>, cu <dbl>, cv <dbl>, cz <dbl>, da <dbl>, dc <dbl>, dd <dbl>,
#> #   de <dbl>, df <dbl>, dh <dbl>, dj <dbl>, dl <dbl>, dn <dbl>, do <dbl>,
#> #   dq <dbl>, dr <dbl>, dt <dbl>, dx <dbl>, dy <dbl>, dz <dbl>, ea <dbl>,
#> #   ec <dbl>, ed <dbl>, ee <dbl>, ef <dbl>, ei <dbl>, ej <dbl>, eo <dbl>,
#> #   ep <dbl>, eq <dbl>, es <dbl>, et <dbl>, eu <dbl>, ex <dbl>, ey <dbl>,
#> #   fa <dbl>, fe <dbl>, ff <dbl>, fi <dbl>, fj <dbl>, fl <dbl>, fm <dbl>,
#> #   fn <dbl>, fp <dbl>, fq <dbl>, fr <dbl>, fs <dbl>, ft <dbl>, fu <dbl>,
#> #   fv <dbl>, fx <dbl>, fy <dbl>, fz <dbl>, ga <dbl>, gb <dbl>, gd <dbl>,
#> #   ge <dbl>, gf <dbl>, gg <dbl>, gh <dbl>, gj <dbl>, gk <dbl>, gl <dbl>,
#> #   ...

# Re-Transposing with one ID (taxon_id)
taxa_abund %>% 
  transposer("taxon_id", "samples") %>% 
  transposer("samples", "taxon_id")
#> # A tibble: 175 x 49
#>    taxon_id Sample_1 Sample_10 Sample_11 Sample_12 Sample_13 Sample_14
#>    <chr>       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
#>  1 ac         112582     88194     74933     80796    116806     90122
#>  2 af          53903     56426     39952     41912     51677     33032
#>  3 ag           4217       480       557       629      1111       350
#>  4 ah           4237       750      7138      3234      1207         9
#>  5 ai             68        23        21        14        13         0
#>  6 aj          46540     30145     26932     34738     62301     56274
#>  7 ak           1794        17        73       189        47         0
#>  8 al             11         7         5        12        27        11
#>  9 an           1524        38        49         0       109         0
#> 10 ao             51         0         0         0        11         0
#> # ... with 165 more rows, and 42 more variables: Sample_15 <dbl>,
#> #   Sample_16 <dbl>, Sample_17 <dbl>, Sample_18 <dbl>, Sample_19 <dbl>,
#> #   Sample_2 <dbl>, Sample_20 <dbl>, Sample_21 <dbl>, Sample_22 <dbl>,
#> #   Sample_23 <dbl>, Sample_24 <dbl>, Sample_25 <dbl>, Sample_26 <dbl>,
#> #   Sample_27 <dbl>, Sample_28 <dbl>, Sample_29 <dbl>, Sample_3 <dbl>,
#> #   Sample_30 <dbl>, Sample_31 <dbl>, Sample_35 <dbl>, Sample_36 <dbl>,
#> #   Sample_4 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>, Sample_45 <dbl>,
#> #   Sample_46 <dbl>, Sample_47 <dbl>, Sample_5 <dbl>, Sample_50 <dbl>,
#> #   Sample_51 <dbl>, Sample_52 <dbl>, Sample_55 <dbl>, Sample_56 <dbl>,
#> #   Sample_57 <dbl>, Sample_58 <dbl>, Sample_59 <dbl>, Sample_6 <dbl>,
#> #   Sample_60 <dbl>, Sample_61 <dbl>, Sample_7 <dbl>, Sample_8 <dbl>,
#> #   Sample_9 <dbl>

# Transposing with two IDs (taxon_id, otu_id)
# The column headers will be a combination of the categorical data that will
# be parsed and split back into individual columns if retransposed.
otu_abund %>% 
  transposer("otu_id", "samples")
#> # A tibble: 48 x 1,667
#>    samples `ac<_>DQ797054.~ `af<_>EU510171.~ `af<_>New.Clean~
#>    <chr>              <dbl>            <dbl>            <dbl>
#>  1 Sample~              130                0              108
#>  2 Sample~                0                4              106
#>  3 Sample~                0               14              152
#>  4 Sample~                0               57               71
#>  5 Sample~              119                3               34
#>  6 Sample~              443                2                1
#>  7 Sample~               15                0               74
#>  8 Sample~               49               14               49
#>  9 Sample~                0                1               12
#> 10 Sample~               33                1               55
#> # ... with 38 more rows, and 1,663 more variables:
#> #   `af<_>New.CleanUp.ReferenceOTU1675` <dbl>,
#> #   `af<_>New.CleanUp.ReferenceOTU1778` <dbl>,
#> #   `af<_>New.CleanUp.ReferenceOTU2195` <dbl>,
#> #   `af<_>New.CleanUp.ReferenceOTU6283` <dbl>,
#> #   `af<_>New.ReferenceOTU1018` <dbl>, `af<_>New.ReferenceOTU1460` <dbl>,
#> #   `af<_>New.ReferenceOTU1898` <dbl>, `af<_>New.ReferenceOTU662` <dbl>,
#> #   `aq<_>EU462203.1.1270` <dbl>, `aq<_>FJ881207.1.1492` <dbl>,
#> #   `bd<_>GQ451183.1.1507` <dbl>, `bd<_>New.CleanUp.ReferenceOTU1` <dbl>,
#> #   `bd<_>New.ReferenceOTU307` <dbl>, `bv<_>AF371685.1.1445` <dbl>,
#> #   `bv<_>EU462407.1.1376` <dbl>, `bv<_>EU469250.1.1356` <dbl>,
#> #   `bv<_>EU777569.1.1267` <dbl>, `bv<_>GQ448792.1.1389` <dbl>,
#> #   `bv<_>HQ681868.1.1479` <dbl>, `bv<_>New.CleanUp.ReferenceOTU21` <dbl>,
#> #   `bv<_>New.CleanUp.ReferenceOTU82` <dbl>,
#> #   `bv<_>New.ReferenceOTU648` <dbl>, `cf<_>AF371949.1.1454` <dbl>,
#> #   `cf<_>New.CleanUp.ReferenceOTU19053` <dbl>,
#> #   `ch<_>New.CleanUp.ReferenceOTU579` <dbl>,
#> #   `cq<_>AB506276.1.1515` <dbl>, `cq<_>EU467330.1.1365` <dbl>,
#> #   `cq<_>JQ185323.1.1351` <dbl>, `cq<_>JQ187807.1.1343` <dbl>,
#> #   `cq<_>KC163104.1.1515` <dbl>, `cq<_>New.CleanUp.ReferenceOTU45` <dbl>,
#> #   `dc<_>FJ753766.1.1449` <dbl>, `dc<_>HQ716634.1.1482` <dbl>,
#> #   `dc<_>JQ191107.1.1383` <dbl>,
#> #   `dc<_>New.CleanUp.ReferenceOTU5376` <dbl>,
#> #   `de<_>AB506217.1.1514` <dbl>, `de<_>DQ794395.1.1386` <dbl>,
#> #   `de<_>DQ801522.1.1389` <dbl>, `de<_>DQ804804.1.1390` <dbl>,
#> #   `de<_>DQ806210.1.1390` <dbl>, `de<_>DQ806419.1.1375` <dbl>,
#> #   `de<_>DQ810016.1.1387` <dbl>, `de<_>EF399608.1.1495` <dbl>,
#> #   `de<_>EF640147.1.1517` <dbl>, `de<_>EF640148.1.1608` <dbl>,
#> #   `de<_>EU462246.1.1393` <dbl>, `de<_>EU462618.1.1379` <dbl>,
#> #   `de<_>EU472017.1.1394` <dbl>, `de<_>EU472558.1.1391` <dbl>,
#> #   `de<_>EU761737.1.1355` <dbl>, `de<_>EU762031.1.1361` <dbl>,
#> #   `de<_>EU768267.1.1370` <dbl>, `de<_>EU774369.1.1254` <dbl>,
#> #   `de<_>FJ367054.1.1360` <dbl>, `de<_>FJ368433.1.1378` <dbl>,
#> #   `de<_>FJ372296.1.1359` <dbl>, `de<_>FJ678233.1.1439` <dbl>,
#> #   `de<_>FJ678914.1.1380` <dbl>, `de<_>FJ681481.1.1448` <dbl>,
#> #   `de<_>FJ681993.1.1389` <dbl>, `de<_>FJ681994.1.1392` <dbl>,
#> #   `de<_>FJ684811.1.1391` <dbl>, `de<_>FJ880611.1.1492` <dbl>,
#> #   `de<_>GQ135595.1.1382` <dbl>, `de<_>HQ716148.1.1427` <dbl>,
#> #   `de<_>HQ716567.1.1448` <dbl>, `de<_>HQ775630.1.1451` <dbl>,
#> #   `de<_>HQ780641.1.1454` <dbl>, `de<_>HQ781391.1.1449` <dbl>,
#> #   `de<_>HQ785245.1.1449` <dbl>, `de<_>HQ790863.1.1455` <dbl>,
#> #   `de<_>HQ792930.1.1435` <dbl>, `de<_>HQ808140.1.1436` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU1867` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU2137` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU233` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU292` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU3186` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU4248` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU446` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU451` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU509` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU574` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU66` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU7980` <dbl>,
#> #   `de<_>New.CleanUp.ReferenceOTU92` <dbl>,
#> #   `de<_>New.ReferenceOTU477` <dbl>, `df<_>AB506215.1.1522` <dbl>,
#> #   `df<_>DQ796118.1.1378` <dbl>, `df<_>DQ798544.1.1272` <dbl>,
#> #   `df<_>DQ809209.1.1280` <dbl>, `df<_>EF401803.1.1493` <dbl>,
#> #   `df<_>EU461642.1.1389` <dbl>, `df<_>EU462752.1.1365` <dbl>,
#> #   `df<_>EU771433.1.1280` <dbl>, `df<_>EU772441.1.1390` <dbl>,
#> #   `df<_>EU776836.1.1286` <dbl>, `df<_>EU777365.1.1388` <dbl>,
#> #   `df<_>EU777419.1.1375` <dbl>, `df<_>EU778354.1.1387` <dbl>, ...

# Re-Transposing with two IDs (taxon_id, otu_id)
# When you transpose categorical data, the column headers are lost.
# To re-transpose you have to supply these headers
otu_abund %>% 
  transposer("otu_id", "samples") %>% 
  transposer("samples", "otu_id", 
             separated_categories = c("taxon_id", "otu_id"))
#> # A tibble: 1,666 x 50
#>    taxon_id otu_id Sample_1 Sample_10 Sample_11 Sample_12 Sample_13
#>    <chr>    <chr>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
#>  1 ac       DQ797~      130         0         0         0       119
#>  2 af       EU510~        0         4        14        57         3
#>  3 af       New.C~      108       106       152        71        34
#>  4 af       New.C~       26         9        52         5         5
#>  5 af       New.C~        0         0        15        20        10
#>  6 af       New.C~        1         0        12        42         2
#>  7 af       New.C~       45        19        17        22         0
#>  8 af       New.R~       84        23         6         6         4
#>  9 af       New.R~        1         2        25        58         0
#> 10 af       New.R~       30        23        80        30         4
#> # ... with 1,656 more rows, and 43 more variables: Sample_14 <dbl>,
#> #   Sample_15 <dbl>, Sample_16 <dbl>, Sample_17 <dbl>, Sample_18 <dbl>,
#> #   Sample_19 <dbl>, Sample_2 <dbl>, Sample_20 <dbl>, Sample_21 <dbl>,
#> #   Sample_22 <dbl>, Sample_23 <dbl>, Sample_24 <dbl>, Sample_25 <dbl>,
#> #   Sample_26 <dbl>, Sample_27 <dbl>, Sample_28 <dbl>, Sample_29 <dbl>,
#> #   Sample_3 <dbl>, Sample_30 <dbl>, Sample_31 <dbl>, Sample_35 <dbl>,
#> #   Sample_36 <dbl>, Sample_4 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>,
#> #   Sample_45 <dbl>, Sample_46 <dbl>, Sample_47 <dbl>, Sample_5 <dbl>,
#> #   Sample_50 <dbl>, Sample_51 <dbl>, Sample_52 <dbl>, Sample_55 <dbl>,
#> #   Sample_56 <dbl>, Sample_57 <dbl>, Sample_58 <dbl>, Sample_59 <dbl>,
#> #   Sample_6 <dbl>, Sample_60 <dbl>, Sample_61 <dbl>, Sample_7 <dbl>,
#> #   Sample_8 <dbl>, Sample_9 <dbl>

Transforming

# Transforming to proportions/percentages by COLUMN
taxa_abund %>% 
  transformer(func = ~./sum(.))
#> # A tibble: 175 x 49
#>    taxon_id Sample_1 Sample_2 Sample_3 Sample_6 Sample_9 Sample_10
#>    <chr>       <dbl>    <dbl>    <dbl>    <dbl>    <dbl>     <dbl>
#>  1 ac        1.73e-1  1.75e-1  1.71e-1  1.69e-1  1.71e-1 0.171    
#>  2 af        8.27e-2  6.51e-2  8.16e-2  6.78e-2  6.80e-2 0.109    
#>  3 ag        6.47e-3  2.31e-2  7.75e-4  5.58e-3  1.21e-3 0.000931 
#>  4 ah        6.50e-3  1.44e-3  9.05e-3  9.40e-4  2.18e-3 0.00146  
#>  5 ai        1.04e-4  2.70e-5  1.45e-4  1.46e-6  2.59e-5 0.0000446
#>  6 aj        7.14e-2  8.46e-2  7.64e-2  9.29e-2  9.87e-2 0.0585   
#>  7 ak        2.75e-3  1.47e-4  3.80e-4  9.21e-5  7.60e-5 0.0000330
#>  8 al        1.69e-5  8.29e-5  5.47e-5  1.46e-6  3.71e-6 0.0000136
#>  9 an        2.34e-3  2.28e-4  2.22e-4  7.31e-6  5.74e-5 0.0000737
#> 10 ao        7.83e-5  1.66e-5  9.02e-5  6.14e-4  2.72e-4 0        
#> # ... with 165 more rows, and 42 more variables: Sample_13 <dbl>,
#> #   Sample_14 <dbl>, Sample_17 <dbl>, Sample_21 <dbl>, Sample_22 <dbl>,
#> #   Sample_24 <dbl>, Sample_25 <dbl>, Sample_26 <dbl>, Sample_30 <dbl>,
#> #   Sample_50 <dbl>, Sample_59 <dbl>, Sample_60 <dbl>, Sample_61 <dbl>,
#> #   Sample_7 <dbl>, Sample_27 <dbl>, Sample_45 <dbl>, Sample_5 <dbl>,
#> #   Sample_57 <dbl>, Sample_20 <dbl>, Sample_29 <dbl>, Sample_11 <dbl>,
#> #   Sample_12 <dbl>, Sample_15 <dbl>, Sample_16 <dbl>, Sample_18 <dbl>,
#> #   Sample_19 <dbl>, Sample_23 <dbl>, Sample_28 <dbl>, Sample_31 <dbl>,
#> #   Sample_35 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>, Sample_46 <dbl>,
#> #   Sample_47 <dbl>, Sample_51 <dbl>, Sample_55 <dbl>, Sample_56 <dbl>,
#> #   Sample_58 <dbl>, Sample_8 <dbl>, Sample_4 <dbl>, Sample_52 <dbl>,
#> #   Sample_36 <dbl>

# Transforming by ROW is also possible
# This function will transpose/retranspose so other information is needed
taxa_abund %>% 
  transformer(by = "row", 
              func = ~./sum(.), 
              ids = "taxon_id", 
              header_name = "samples", 
              separated_categories = c("taxon_id"))
#> # A tibble: 175 x 49
#>    taxon_id Sample_1 Sample_10 Sample_11 Sample_12 Sample_13 Sample_14
#>    <chr>       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
#>  1 ac        0.0241    0.0189    0.0160    0.0173    0.0250  0.0193   
#>  2 af        0.0279    0.0292    0.0206    0.0217    0.0267  0.0171   
#>  3 ag        0.0325    0.00370   0.00430   0.00485   0.00857 0.00270  
#>  4 ah        0.0326    0.00577   0.0549    0.0249    0.00929 0.0000692
#>  5 ai        0.00688   0.00233   0.00213   0.00142   0.00132 0        
#>  6 aj        0.0191    0.0124    0.0111    0.0143    0.0256  0.0231   
#>  7 ak        0.232     0.00220   0.00945   0.0245    0.00608 0        
#>  8 al        0.00441   0.00281   0.00201   0.00482   0.0108  0.00441  
#>  9 an        0.285     0.00711   0.00917   0         0.0204  0        
#> 10 ao        0.00928   0         0         0         0.00200 0        
#> # ... with 165 more rows, and 42 more variables: Sample_15 <dbl>,
#> #   Sample_16 <dbl>, Sample_17 <dbl>, Sample_18 <dbl>, Sample_19 <dbl>,
#> #   Sample_2 <dbl>, Sample_20 <dbl>, Sample_21 <dbl>, Sample_22 <dbl>,
#> #   Sample_23 <dbl>, Sample_24 <dbl>, Sample_25 <dbl>, Sample_26 <dbl>,
#> #   Sample_27 <dbl>, Sample_28 <dbl>, Sample_29 <dbl>, Sample_3 <dbl>,
#> #   Sample_30 <dbl>, Sample_31 <dbl>, Sample_35 <dbl>, Sample_36 <dbl>,
#> #   Sample_4 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>, Sample_45 <dbl>,
#> #   Sample_46 <dbl>, Sample_47 <dbl>, Sample_5 <dbl>, Sample_50 <dbl>,
#> #   Sample_51 <dbl>, Sample_52 <dbl>, Sample_55 <dbl>, Sample_56 <dbl>,
#> #   Sample_57 <dbl>, Sample_58 <dbl>, Sample_59 <dbl>, Sample_6 <dbl>,
#> #   Sample_60 <dbl>, Sample_61 <dbl>, Sample_7 <dbl>, Sample_8 <dbl>,
#> #   Sample_9 <dbl>

Excel-Like VLookup

This function was # borrowed from .

# Get analyzed Data
metacoder_obj <- as_MicrobiomeR_format(obj = metacoder_obj, format = "analyzed_format")

# Create agglomerated taxmaps for phylum/class
phylum_mo <- agglomerate_taxmap(obj = metacoder_obj, rank = "Phylum")
class_mo <- agglomerate_taxmap(obj = metacoder_obj, rank = "Class")
# Get some observation data
phylum_data <- phylum_mo$data$stats_tax_data
class_data <- class_mo$data$stats_tax_data

# Take the Phylum in the "phylum_data" and cross reference these in the "class_data".
# Reutrn the "wilcox_p_value" of the class_data.
class_p_value <- vlookup(lookup_vector = phylum_data$Phylum, 
                         df = class_data, 
                         match_var = "Phylum", 
                         return_var = "wilcox_p_value")

# Create a new column in the phylum_data.
new_data <- phylum_data %>% dplyr::mutate(class_p_value = class_p_value)
new_data[c("taxon_id", "Phylum", "Class", "class_p_value", "wilcox_p_value")]
#> # A tibble: 13 x 5
#>    taxon_id Phylum           Class class_p_value wilcox_p_value
#>    <chr>    <fct>            <fct>         <dbl>          <dbl>
#>  1 ac       <NA>             <NA>      NaN       NaN           
#>  2 af       Firmicutes       <NA>        3.40e-5   0.0000340   
#>  3 ag       Proteobacteria   <NA>        3.69e-1   0.369       
#>  4 ah       Spirochaetae     <NA>        6.65e-2   0.0665      
#>  5 ai       Verrucomicrobia  <NA>        8.12e-3   0.00812     
#>  6 aj       Bacteroidetes    <NA>        3.78e-4   0.000378    
#>  7 ak       Fibrobacteres    <NA>        3.02e-2   0.0302      
#>  8 al       Actinobacteria   <NA>        9.24e-8   0.0000000924
#>  9 an       Cyanobacteria    <NA>        6.69e-2   0.0669      
#> 10 ao       Elusimicrobia    <NA>        3.94e-1   0.394       
#> 11 aq       Saccharibacteria <NA>       NA         0.000895    
#> 12 as       Planctomycetes   <NA>        1.48e-3   0.00148     
#> 13 at       Tenericutes      <NA>        5.46e-1   0.546