PR2 version 5.0.0
Update pr2_traits to 9 ranks

Author

Daniel Vaulot

Published

March 14, 2023

Aim

Update pr2_traits for use of 9 ranks taxonomy

Initialization

source(here::here("R",'PR2_init.R'), echo=FALSE)

Set up files

  pr2.env$date = format(Sys.time(), "%Y-%m-%d")
  dir_pr2_update <- here::here("5.0", "H - pr2_traits 9 levels")
  
  dir.create(dir_pr2_update)
  
  full_path <- function(file_name){
    str_c(dir_pr2_update,"/", file_name)
    }

  file_pr2_taxonomy <- full_path(str_c("pr2_taxonomy_", pr2.env$date, ".xlsx")) 

# create the directory for taxonomy output
  dir.create(full_path("taxo"), showWarnings = FALSE)

Read pr2_taxonomy 8/9 levels and pr2_traits from database

pr2_db <- db_info("pr2_google")
pr2_db_con <- db_connect(pr2_db)

pr2_taxo <- tbl(pr2_db_con, "pr2_taxonomy_4.14") %>%
  filter (is.na(taxo_removed_version)) %>% 
  collect()

pr2_traits <- tbl(pr2_db_con, "pr2_traits") %>%
  collect() 

db_disconnect(pr2_db_con)

Create taxonomy lists for pr2 8 and 9 levels and compare

  • Go from wide to long fro both 8 and 9 levels
  • Join the 2 lists
  • Filter those that are different
ending = "_8"

pr2_taxo_8 <- pr2_taxo %>% 
  select(kingdom_8:species_8)  %>% 
  rename_with(~ str_replace(.,ending, ""), contains(ending)) %>% 
  pivot_longer(cols = kingdom:species,
               names_to = "taxon_level_8", 
               values_to = "taxon_name") %>% 
  distinct()

ending = "_9"

pr2_taxo_9 <- pr2_taxo %>% 
  select(domain_9:species_9)  %>% 
  rename_with(~ str_replace(.,ending, ""), contains(ending)) %>% 
  pivot_longer(cols = domain:species,
               names_to = "taxon_level_9", 
               values_to = "taxon_name") %>% 
  distinct()

pr2_taxo_8_vs_9 <- pr2_taxo_8 %>% 
  left_join(pr2_taxo_9) 

pr2_taxo_8_vs_9_different <- pr2_taxo_8_vs_9 %>% 
  filter(taxon_level_8 != taxon_level_9)

Join the 8 vs 9 list with pr2_traits

  • Only keep the one that are different
  • Save as Excel file
  • Excel file is edited manually and used to update the pr2_traits table
    • taxon_name have been edited to fit with the new names
    • Edited: 167
    • Added: 4
by = join_by(taxon_name)

pr2_traits_updated <- pr2_traits %>% 
  left_join(pr2_taxo_8_vs_9, by) %>% 
  filter(taxon_level != taxon_level_9 | is.na(taxon_level_9))

rio::export(pr2_traits_updated, full_path("pr2_traits_5.0.xlsx"))