library(dplyr)
library(data.table)
library(pander)
library(ggplot2)
panderOptions('table.split.table', Inf)
theme_set(theme_bw(base_size=12))
source('https://gist.githubusercontent.com/liangyy/43912b3ecab5d10c89f9d4b2669871c9/raw/8151c6fe70e3d4ee43d9ce340ecc0eb65172e616/my_ggplot_theme.R')
th$panel.border = element_rect(colour = th$axis.line$colour)
color_mixer = c('African' = '#E74C3C', 'British_test' = '#28B463', 'British_valid' = '#D4AC0D', 'Chinese' = '#3498DB', 'Indian' = '#9B59B6', 'British_insample' = '#AAB7B8')
my_parser = function(x) {
  as.numeric(stringr::str_remove(x, 'pval_'))
}

1 Overview

Here we implemented elastic net based PTRS predictor along regularization path. We trained PTRS using self reported T2D status and HbA1c concentration. For the binary trait, we train elastic net encoding case as 1 and control as 0 and fit MSE objective.

2 Load data

pred_models = c('gtex', 'mesa')
traits = c('t2d', 'hba1c')
df = list()
for(tt in traits) {
  dfi = list()
  for(pred_expr in pred_models) {
    tmp = paste0('~/Desktop/tmp/ptrs-tf/from_nucleus/elastic_net_', tt, '_ptrs_', pred_expr, '_british.performance.csv')
    tmp = read.csv(tmp, stringsAsFactors = F)
    to_replace = tmp$pred_expr_source
    to_replace[tmp$pred_expr_source == 'train'] = 'EUR'
    to_replace[tmp$pred_expr_source != 'train'] = 'AFHI'
    tmp$pop = to_replace
    dfi[[length(dfi) + 1]] = tmp[!is.na(tmp[, 1]), ] %>% mutate(type = paste0(pred_expr, '\n', pop))
  }
  df[[tt]] = do.call(rbind, dfi)
}

3 EN PTRS along regularization path (HbA1c)

df$hba1c %>% ggplot() + geom_path(aes(x = log(lambda), y = partial_r2, color = sample)) + facet_grid(type~alpha, scales = 'free_y') + th + 
  theme(legend.position = 'bottom') + 
  scale_color_manual(values = color_mixer) + 
  ggtitle('EN PTRS HbA1c')

4 EN PTRS along regularization path (T2D)

df$t2d %>% ggplot() + geom_path(aes(x = log(lambda), y = roc_auc, color = sample)) + facet_grid(type~alpha, scales = 'free_y') + th + 
  theme(legend.position = 'bottom') + 
  scale_color_manual(values = color_mixer) + 
  ggtitle('EN PTRS T2D')

5 Best performance along regularization

df_best = rbind(
  df$hba1c %>% filter(sample != 'British_insample') %>% group_by(sample, type, trait) %>% 
    summarize(best_perf = max(partial_r2)) %>% ungroup(),
  df$t2d %>% filter(sample != 'British_insample') %>% group_by(sample, type, trait) %>% 
    summarize(best_perf = max(roc_auc)) %>% ungroup()
)
df_best %>% ggplot() + geom_bar(aes(x = sample, y = best_perf, fill = type), stat = 'identity', position = 'dodge') + facet_wrap(~trait, scales = 'free_y') + th +
  theme(legend.position = 'bottom')