rm(list = ls())
library(dplyr)
source('../code/rlib_doc.R')
dict = read.csv("http://biobank.ctsu.ox.ac.uk/~bbdatan/Data_Dictionary_Showcase.csv")
qtrait_sub = read.csv('../external_data/martin_et_al_2019ng_table_s6.csv')
ethnicityid = 21000
myextract = dict %>% filter(FieldID %in% c(qtrait_sub$UKBB.code, ethnicityid))
myextract = myextract %>% left_join(
rbind(
qtrait_sub %>% select(UKBB.code, Trait),
data.frame(UKBB.code = ethnicityid, Trait = 'ethnicity')
), by = c('FieldID' = 'UKBB.code'))
mydata = meta_info_to_query(myextract$Trait, myextract$FieldID, myextract$Instances, myextract$Array)
current_yaml = yaml::read_yaml('../misc/first_query.yaml')
good_fields = trim_space(read.delim2('../external_data/fields.txt', header = T, sep = '|')$column_name)
for(n in names(mydata)) {
if(! mydata[[n]] %in% good_fields) {
mydata[[n]] = NULL
}
}
for(n in names(mydata)) {
current_yaml$data[[n]] = mydata[[n]]
}
yaml::write_yaml(current_yaml, '../output/query_martin_et_al_qtraits.yaml')
The fields available in current ukbREST instance on nucleus
is saved into RDS here.
saveRDS(good_fields, '../output/fields_on_nucleus.rds')