library(dplyr)
library(data.table)
library(pander)
library(ggplot2)
panderOptions('table.split.table', Inf)
theme_set(theme_bw(base_size=12))
source('https://gist.githubusercontent.com/liangyy/43912b3ecab5d10c89f9d4b2669871c9/raw/8151c6fe70e3d4ee43d9ce340ecc0eb65172e616/my_ggplot_theme.R')
th$panel.border = element_rect(colour = th$axis.line$colour)
color_mixer = c('African' = '#E74C3C', 'British_test' = '#28B463', 'British_valid' = '#D4AC0D', 'Chinese' = '#3498DB', 'Indian' = '#9B59B6', 'British_insample' = '#AAB7B8')
my_parser = function(x) {
as.numeric(stringr::str_remove(x, 'pval_'))
}
Here we implemented elastic net based PTRS predictor along regularization path. We trained PTRS using self reported T2D status and HbA1c concentration. For the binary trait, we train elastic net encoding case as 1 and control as 0 and fit MSE objective.
pred_models = c('gtex', 'mesa')
traits = c('t2d', 'hba1c')
df = list()
for(tt in traits) {
dfi = list()
for(pred_expr in pred_models) {
tmp = paste0('~/Desktop/tmp/ptrs-tf/from_nucleus/elastic_net_', tt, '_ptrs_', pred_expr, '_british.performance.csv')
tmp = read.csv(tmp, stringsAsFactors = F)
to_replace = tmp$pred_expr_source
to_replace[tmp$pred_expr_source == 'train'] = 'EUR'
to_replace[tmp$pred_expr_source != 'train'] = 'AFHI'
tmp$pop = to_replace
dfi[[length(dfi) + 1]] = tmp[!is.na(tmp[, 1]), ] %>% mutate(type = paste0(pred_expr, '\n', pop))
}
df[[tt]] = do.call(rbind, dfi)
}
df$hba1c %>% ggplot() + geom_path(aes(x = log(lambda), y = partial_r2, color = sample)) + facet_grid(type~alpha, scales = 'free_y') + th +
theme(legend.position = 'bottom') +
scale_color_manual(values = color_mixer) +
ggtitle('EN PTRS HbA1c')
df$t2d %>% ggplot() + geom_path(aes(x = log(lambda), y = roc_auc, color = sample)) + facet_grid(type~alpha, scales = 'free_y') + th +
theme(legend.position = 'bottom') +
scale_color_manual(values = color_mixer) +
ggtitle('EN PTRS T2D')
df_best = rbind(
df$hba1c %>% filter(sample != 'British_insample') %>% group_by(sample, type, trait) %>%
summarize(best_perf = max(partial_r2)) %>% ungroup(),
df$t2d %>% filter(sample != 'British_insample') %>% group_by(sample, type, trait) %>%
summarize(best_perf = max(roc_auc)) %>% ungroup()
)
df_best %>% ggplot() + geom_bar(aes(x = sample, y = best_perf, fill = type), stat = 'identity', position = 'dodge') + facet_wrap(~trait, scales = 'free_y') + th +
theme(legend.position = 'bottom')