library(ggplot2)
library(ggrepel)
library(dplyr)
library(pander)
options(stringsAsFactors = F)
panderOptions('table.split.table', Inf)
source('../code/rlib_doc.R')
source('https://gist.githubusercontent.com/liangyy/43912b3ecab5d10c89f9d4b2669871c9/raw/3ca651cfa53ffccb8422f432561138a46e93710f/my_ggplot_theme.R')
theme_set(theme_bw(base_size = 15))
o = get_meta_for_supp()
df_color_category = o$df_color_category
color_mixer = o$color_mixer
pop_color_mixer = o$pop_color_mixer
# type_shape = o$type_shape
type_shape = c('PTRS' = 15, 'PRS' = 19)
score_color_code = o$score_color_code
non_eur_pops = c('African', 'Chinese', 'Indian', 'Caribbean')
pop_map = data.frame(
pop = c('African', 'Chinese', 'Indian', 'Caribbean', 'British_test'),
pop2 = c('AFR', 'E.ASN', 'S.ASN', 'CAR', 'EUR')
)
pop_map2 = data.frame(
pop = c('African', 'Chinese', 'Indian', 'Caribbean', 'British_test', 'British_validation', 'British_valid'),
pop2 = c('AFR', 'E.ASN', 'S.ASN', 'CAR', 'EUR test', 'EUR ref.', 'EUR ref.')
)
pop_color_mixer2 = c('AFR' = "#E74C3C", "British_test" = "#28B463", "British_validation" = "#D4AC0D",
'E.ASN' = "#3498DB", 'S.ASN' = "#9B59B6", 'CAR' = "#FFFF00")
score_color_code = c('PRS' = "#999999", 'PTRS (GTEx EUR)' = "#E69F00", 'PTRS (MESA EUR)' = "#E69F00", 'PTRS (MESA AFHI)' = "#56B4E9", 'PTRS (MESA ALL)' = "#F633FF")
df_h2 = load_hsq('~/Desktop/tmp/ptrs-ukb_misc/hsq.data_split.British-test-1.txt')
bad_traits = NULL # df_h2$trait[ is.na(df_h2$h_sq) | df_h2$h_sq < 0.05 ]
bad_traits_pve = NULL # df_h2$trait[ is.na(df_h2$h_sq) | df_h2$h_sq < 0.05 ]
df_pve = load_hsq('~/Desktop/tmp/ptrs-ukb_misc/reml_from_hail_martin_et_al_traits_x_ctimp_Whole_Blood_x_British-test-1.tsv') %>% mutate(pop = 'British_test')
df_pve = df_pve[ ! df_pve$trait %in% bad_traits_pve, ]
df_pve10 = load_hsq('~/Desktop/tmp/ptrs-ukb_misc/reml_from_hail-multi-tissue-tissue_svd_x_martin_et_al_traits_x_ctimp_x_British-test-1.tsv')
df_pve10 = df_pve10[ ! df_pve10$trait %in% bad_traits_pve, ]
df_pve_ma = load_hsq('~/Desktop/tmp/ptrs-ukb_misc/old/reml_from_hail_martin_et_al_traits_x_ALL_x_British-test-1.tsv') %>% mutate(pop = 'British_test')
df_pve_ma = df_pve_ma[ ! df_pve_ma$trait %in% bad_traits_pve, ]
ratio_fe_wb = calc_regu(df_h2, df_pve)
ratio_fe_ma = calc_regu(df_h2, df_pve_ma)
ratio_fe_10 = calc_regu(df_h2, df_pve10)
rbind(
as.data.frame(ratio_fe_ma$fe, col.names = c('ratio_mean', 'ratio_se')) %>% mutate(tissue = 'MESA (ALL) Monocyte'),
as.data.frame(ratio_fe_wb$fe, col.names = c('ratio_mean', 'ratio_se')) %>% mutate(tissue = 'GTEx EUR Whole Blood'),
as.data.frame(ratio_fe_10$fe, col.names = c('ratio_mean', 'ratio_se')) %>% mutate(tissue = 'GTEx EUR Top 10 Tissues')
) %>%
rename(regulability_mean = ratio_mean, regulability_se = ratio_se) %>%
pander
regulability_mean | regulability_se | tissue |
---|---|---|
0.2176 | 0.03079 | MESA (ALL) Monocyte |
0.2285 | 0.02939 | GTEx EUR Whole Blood |
0.3554 | 0.04688 | GTEx EUR Top 10 Tissues |
unstable_traits = df_h2$trait[ is.na(df_h2$h_sq) | df_h2$h_sq < 0.05 ]
rbind(
ratio_fe_wb$raw %>% mutate(tissue = 'GTEx EUR Whole blood'),
ratio_fe_ma$raw %>% mutate(tissue = 'MESA EUR Monocyte'),
ratio_fe_10$raw %>% mutate(tissue = 'GTEx EUR Top 10 tissues')) %>%
mutate(tissue = factor(tissue, levels = c('MESA EUR Monocyte', 'GTEx EUR Whole blood', 'GTEx EUR Top 10 tissues'))) %>%
filter(! trait %in% unstable_traits) %>%
ggplot() +
geom_violin(aes(x = tissue, y = ratio_mean * 100)) +
geom_boxplot(aes(x = tissue, y = ratio_mean * 100), width = .2) + th +
ylab('% of heritability explained')
Takeaway: Proportion of heritability explained by predicted transcrptome is shown for MESA ALL and MESA EUR models and GTEx 10 tissues combined (using European individuals).
ratio_min = ratio_fe_wb$fe$m - 1.96 * ratio_fe_wb$fe$se
ratio_max = ratio_fe_wb$fe$m + 1.96 * ratio_fe_wb$fe$se
df_merge = inner_join(
df_h2, df_pve %>% filter(pop == 'British_test'),
by = c('trait'),
suffix = c('.h2', '.pve')
)
df_merge %>% left_join(df_color_category, by = 'trait') %>%
ggplot(aes(color = group)) +
geom_abline(slope = ratio_min, intercept = 0) +
geom_abline(slope = ratio_max, intercept = 0) +
geom_errorbar(aes(x = h_sq.h2, ymin = h_sq.pve - 1.96 * h_sq_se.pve, ymax = h_sq.pve + 1.96 * h_sq_se.pve)) +
geom_errorbarh(aes(xmin = h_sq.h2 - 1.96 * h_sq_se.h2, xmax = h_sq.h2 + 1.96 * h_sq_se.h2, y = h_sq.pve)) +
geom_point(aes(x = h_sq.h2, y = h_sq.pve)) + th +
scale_color_manual(values = color_mixer) + coord_equal() +
theme(
legend.position = c(.15, .8),
legend.text = element_text(size = 12),
legend.title = element_blank(),
legend.background = element_rect(fill = alpha('white', 0))
) +
xlab('Estimated heritability') +
ylab('PVE of predicted transcriptome \n in GTEx whole blood')
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
Takeaway: Comparing heritability vs PTRS PVE (using European individuals).
df_cau = rbind(
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/new_split/reml_from_hail_martin_et_al_traits_x_CAU_x_African.tsv') %>%
mutate(population = 'AFR'),
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/reml_from_hail_martin_et_al_traits_x_CAU_x_British-test-1.tsv') %>%
mutate(population = 'EUR'),
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/new_split/reml_from_hail_martin_et_al_traits_x_CAU_x_Indian.tsv') %>%
mutate(population = 'S.ASN'),
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/reml_from_hail_martin_et_al_traits_x_CAU_x_Chinese.tsv') %>%
mutate(population = 'E.ASN')
)
df_afhi = rbind(
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/new_split/reml_from_hail_martin_et_al_traits_x_AFHI_x_African.tsv') %>%
mutate(population = 'AFR'),
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/reml_from_hail_martin_et_al_traits_x_AFHI_x_British-test-1.tsv') %>%
mutate(population = 'EUR'),
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/new_split/reml_from_hail_martin_et_al_traits_x_AFHI_x_Indian.tsv') %>%
mutate(population = 'S.ASN'),
load_hsq('~/Desktop/tmp/ptrs-ukb_misc/reml_from_hail_martin_et_al_traits_x_AFHI_x_Chinese.tsv') %>%
mutate(population = 'E.ASN')
)
df_mesa = rbind(
df_afhi %>% mutate(model = 'AFHI'), df_cau %>% mutate(model = 'CAU')
)
df_mesa = df_mesa[ ! df_mesa$trait %in% bad_traits, ]
afr = df_mesa[ df_mesa$population == 'AFR', ]
# t.test(afr$h_sq[afr$model == 'AFHI'], afr$h_sq[afr$model == 'CAU'], paired = T)
tmp_m = df_mesa %>% reshape2::dcast(trait + population ~ model, value.var = 'h_sq')
tmp_se = df_mesa %>% reshape2::dcast(trait + population ~ model, value.var = 'h_sq_se')
diff = inner_join(tmp_m, tmp_se, by = c('trait', 'population'), suffix = c('.mean', '.se'))
diff = diff %>% mutate(diff = AFHI.mean - CAU.mean, diff_se = sqrt(AFHI.se ^ 2 + CAU.se ^ 2))
diff_sum = diff %>% filter(!(is.na(AFHI.se) | is.na(CAU.se))) %>%
group_by(population) %>% do(as.data.frame(meta_fixed(.$diff, .$diff_se)))
diff_sum = diff %>% filter(!(is.na(AFHI.se) | is.na(CAU.se))) %>%
group_by(population) %>% do(test_afhi_vs_cau(.$AFHI.mean, .$CAU.mean))
# diff_sum %>% mutate(pop2 = order_pop(population)) %>% ggplot() +
# geom_hline(yintercept = 0, linetype = 2) +
# geom_errorbar(aes(x = pop2, ymin = m - 1.96 * se, ymax = m + 1.96 * se), width = 0.1) +
# geom_point(aes(x = pop2, y = m), size = 5, color = 'gray') +
# ylab('Change in PVE \n (MESA AFHI - MESA EUR)') +
# xlab('Target population') + th
diff_sum %>% mutate(pop2 = order_pop(population)) %>% ggplot() +
geom_hline(yintercept = 0, linetype = 2) +
geom_errorbar(aes(x = pop2, ymin = ci_low, ymax = ci_high), width = 0.1) +
geom_point(aes(x = pop2, y = delta_pve), size = 5, color = 'gray') +
ylab('Change in PVE \n (MESA AFHI - MESA EUR)') +
xlab('Target population') + th
diff_sum %>% pander(caption = 'MESA AFHI PVE vs. MESA EUR PVE')
population | delta_pve | ci_low | ci_high | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|
AFR | 0.01128 | -0.0004882 | 0.02305 | 104 | 0.0654 |
E.ASN | 0.01687 | -0.01051 | 0.04424 | 34 | 0.2031 |
EUR | 0.003171 | -0.005731 | 0.01207 | 56 | 0.4973 |
S.ASN | 0.003004 | -0.005168 | 0.01118 | 74 | 0.4543 |
Takeaway: The PTRS PVE difference between MESA AFHI and EUR models.
diff %>% mutate(pop2 = order_pop(population)) %>% left_join(df_color_category, by = 'trait') %>% ggplot() +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(x = CAU.mean, y = AFHI.mean, color = group)) +
facet_wrap(~pop2, ncol = 5) + coord_equal() +
scale_color_manual(values = color_mixer) +
th2 +
theme(legend.position = 'none') +
xlab('PVE of predicted transcriptome \n in MESA EUR monocyte') +
ylab('PVE of predicted transcriptome \n in MESA AFHI monocyte') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
Takeaway: As a supplementary for the PTRS PVE difference between MESA AFHI and EUR models, we show the PVE of MESA AFHI and EUR models directly.
df1 = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/elastic_net_ptrs_gtex_british_updated_indivs_lambda.performance.csv') %>% filter(!trait %in% bad_traits)
df1pt = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/pt_ptrs_gtex_british_updated_indivs_lambda.performance.csv') %>% filter(!trait %in% bad_traits)
df2_cau = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/elastic_net_ptrs_mesa_british_updated_lambda.performance.csv', desired_tag = 'train') %>% filter(!trait %in% bad_traits)
df2pt_cau = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/pt_ptrs_mesa_british_updated_lambda.performance.csv', desired_tag = 'train') %>% filter(!trait %in% bad_traits)
df2_afhi = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/elastic_net_ptrs_mesa_british_updated_lambda.performance.csv', desired_tag = 'against') %>% filter(!trait %in% bad_traits)
df2pt_afhi = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/pt_ptrs_mesa_british_updated_lambda.performance.csv', desired_tag = 'against') %>% filter(!trait %in% bad_traits)
df3 = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/elastic_net_ptrs_mesa_all_british_updated_lambda.performance.csv') %>% filter(!trait %in% bad_traits)
df3pt = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/pt_ptrs_mesa_all_british_updated_lambda.performance.csv') %>% filter(!trait %in% bad_traits)
df4 = load_best('~/Desktop/tmp/ptrs-tf/from_nucleus/prs_british_updated_indivs_lambda.performance.csv') %>% filter(!trait %in% bad_traits)
df4$sample[ df4$sample == 'British_validation' ] = 'British_valid'
prs = inner_join(df4 %>% filter(sample == 'British_test'), df_h2, by = 'trait')
ptrs = inner_join(df1 %>% filter(sample == 'British_test'), df_pve %>% filter(pop == 'British_test') %>% select(-pop), by = 'trait')
rbind(prs %>% mutate(method = 'PRS'), ptrs %>% mutate(method = 'PTRS')) %>%
left_join(df_color_category, by = 'trait') %>%
ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
# geom_errorbar(aes(x = h_sq, ymin = r2_mean - 1.96 * r2_sd, ymax = r2_mean + 1.96 * r2_sd)) +
geom_errorbarh(aes(xmin = h_sq - 1.96 * h_sq_se, xmax = h_sq + 1.96 * h_sq_se, y = r2_max)) +
geom_point(aes(x = h_sq, y = r2_max, shape = method), size = 3) +
th +
scale_color_manual(values = color_mixer) +
scale_shape_manual(values = type_shape) +
coord_equal() +
theme(legend.position = 'top', legend.title = element_blank()) +
xlab('Heritability or PTRS PVE') +
ylab(expression(tilde(R)^2 * 'of PRS or PTRS (GTEx EUR)'))
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
Takeaway: Comparing heritability vs PRS R2 and PTRS PVE vs PTRS R2 (GTEx EUR) using European individuals.
# DO IT LATER
# ptrs = inner_join(df1, df_pve, by = c('trait', 'sample' = 'pop'))
# ptrs %>%
# filter(sample %in% non_eur_pops) %>%
# left_join(pop_map, by = c('sample' = 'pop')) %>%
# mutate(pop2 = order_pop(pop2)) %>%
# left_join(df_color_category, by = 'trait') %>%
# ggplot(aes(color = pop2)) +
# geom_abline(slope = 1, intercept = 0) +
# # geom_errorbar(aes(x = h_sq, ymin = r2_mean - 1.96 * r2_sd, ymax = r2_mean + 1.96 * r2_sd)) +
# # geom_errorbarh(aes(xmin = h_sq - 1.96 * h_sq_se, xmax = h_sq + 1.96 * h_sq_se, y = r2_mean)) +
# geom_point(aes(x = h_sq, y = r2_max), size = 3) +
# th +
# scale_color_manual(values = pop_color_mixer2) +
# coord_equal() +
# theme(legend.position = 'top', legend.title = element_blank()) +
# xlab('PTRS PVE') +
# ylab(expression(tilde(R)^2 * 'of PTRS (GTEx EUR)'))
prs = inner_join(df4 %>% filter(sample == 'British_test'), df_h2, by = 'trait')
ptrs = inner_join(df2_cau %>% filter(sample == 'British_test'), df_pve_ma %>% filter(pop == 'British_test') %>% select(-pop), by = 'trait')
rbind(prs %>% mutate(method = 'PRS'), ptrs %>% mutate(method = 'PTRS')) %>%
left_join(df_color_category, by = 'trait') %>%
filter(method != 'PRS') %>%
ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
# geom_errorbar(aes(x = h_sq, ymin = r2_mean - 1.96 * r2_sd, ymax = r2_mean + 1.96 * r2_sd)) +
geom_errorbarh(aes(xmin = h_sq - 1.96 * h_sq_se, xmax = h_sq + 1.96 * h_sq_se, y = r2_max)) +
geom_point(aes(x = h_sq, y = r2_max, shape = method), size = 3) +
th +
scale_color_manual(values = color_mixer) +
scale_shape_manual(values = type_shape) +
coord_equal() +
theme(legend.position = 'top', legend.title = element_blank()) +
xlab('PTRS PVE') +
ylab(expression(tilde(R)^2 * 'of PTRS (MESA EUR)'))
## Warning: Removed 3 rows containing missing values (geom_errorbarh).
Takeaway: Comparing PTRS PVE vs PTRS R2 (MESA EUR) using European individuals.
kk = inner_join(
df1 %>% filter(sample == 'British_test'),
df4 %>% filter(sample == 'British_test'),
by = 'trait', suffix = c('.ptrs', '.prs')
)
kk %>% left_join(df_color_category, by = 'trait') %>% ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(x = r2_max.prs, y = r2_max.ptrs)) +
# geom_errorbar(aes(x = r2_mean.prs, ymin = r2_mean.ptrs - 1.96 * r2_sd.ptrs, ymax = r2_mean.ptrs + 1.96 * r2_sd.ptrs)) +
# geom_errorbarh(aes(xmin = r2_max.prs - 1.96 * r2_sd.prs, xmax = r2_max.prs + 1.96 * r2_sd.prs, y = r2_mean.ptrs)) +
geom_label_repel(aes(x = r2_max.prs, y = r2_max.ptrs, label = trait)) + th +
xlab('Prediction accuracy of PRS') +
ylab('Prediction accuracy of PTRS') +
scale_color_manual(values = color_mixer) + coord_equal() + theme(legend.position = 'top', legend.title = element_blank()) + guides(color = guide_legend(nrow = 2, byrow = TRUE))
Takeaway: R2 comparison: PRS vs PTRS (GTEx EUR) using European individuals.
kk = inner_join(
df1 %>% filter(sample %in% c('British_test', non_eur_pops)),
df4 %>% filter(sample %in% c('British_test', non_eur_pops)),
by = c('trait', 'sample'), suffix = c('.ptrs', '.prs')
)
kk %>% left_join(df_color_category, by = 'trait') %>% ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(x = r2_max.prs, y = r2_max.ptrs)) +
# geom_errorbar(aes(x = r2_mean.prs, ymin = r2_mean.ptrs - 1.96 * r2_sd.ptrs, ymax = r2_mean.ptrs + 1.96 * r2_sd.ptrs)) +
# geom_errorbarh(aes(xmin = r2_mean.prs - 1.96 * r2_sd.prs, xmax = r2_mean.prs + 1.96 * r2_sd.prs, y = r2_mean.ptrs)) +
# geom_label_repel(aes(x = r2_mean.prs, y = r2_mean.ptrs, label = trait)) + th +
xlab('Prediction accuracy of PRS') +
ylab('Prediction accuracy of \n PTRS (GTEx EUR)') +
scale_color_manual(values = color_mixer) +
# coord_equal() +
theme(legend.position = 'top', legend.title = element_blank()) + guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
facet_wrap(~sample, ncol = 5, scales = 'free') + th2 + theme(legend.position = 'none')
Takeaway: As a supplementary of PRS vs PTRS R2 comparison, we also show non-European results.
kk = inner_join(
df1 %>% filter(sample == 'British_test'),
df1pt %>% filter(sample == 'British_test'),
by = 'trait', suffix = c('.en', '.pt')
)
kk %>% left_join(df_color_category, by = 'trait') %>% ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(x = r2_max.pt, y = r2_max.en)) +
geom_label_repel(aes(x = r2_max.pt, y = r2_max.en, label = trait)) + th +
xlab('Prediction accuracy of PTRS (P+T)') +
ylab('Prediction accuracy of PTRS (EN)') +
scale_color_manual(values = color_mixer) + coord_equal() + theme(legend.position = 'top', legend.title = element_blank()) + guides(color = guide_legend(nrow = 2, byrow = TRUE))
Takeaway: Comparing GTEx EUR PTRS R2 for PTRS (EN) and PTRS (P+T) using European individuals.
kk = inner_join(
df3 %>% filter(sample == 'British_test'),
df3pt %>% filter(sample == 'British_test'),
by = 'trait', suffix = c('.en', '.pt')
)
kk %>% left_join(df_color_category, by = 'trait') %>% ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(x = r2_max.pt, y = r2_max.en)) +
geom_label_repel(aes(x = r2_max.pt, y = r2_max.en, label = trait)) + th +
xlab('Prediction accuracy of PTRS (P+T)') +
ylab('Prediction accuracy of PTRS (EN)') +
scale_color_manual(values = color_mixer) + coord_equal() + theme(legend.position = 'top', legend.title = element_blank()) + guides(color = guide_legend(nrow = 2, byrow = TRUE))
Takeaway: As a supplementary, we also compare MESA ALL PTRS R2 for PTRS (EN) and PTRS (P+T) using European individuals.
tmp = inner_join(
df2_cau %>% filter(sample %in% c(non_eur_pops, 'British_test')) %>% mutate(model = 'MESA EUR'),
df2_afhi %>% filter(sample %in% c(non_eur_pops, 'British_test')) %>% mutate(model = 'MESA AFHI'),
by = c('sample', 'trait'), suffix = c('.eur', '.afhi')
)
tmp %>% left_join(df_color_category, by = 'trait') %>% left_join(pop_map, by = c('sample' = 'pop')) %>%
mutate(pop2 = order_pop(pop2)) %>%
ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(x = r2_max.eur, y = r2_max.afhi)) +
facet_wrap(~pop2, scales = 'free', ncol = 5) +
coord_cartesian() + th2 +
scale_color_manual(values = color_mixer) + theme(legend.position = 'none') +
ylab(expression(tilde(R)^2 * 'of PTRS (MESA AFHI)')) +
xlab(expression(tilde(R)^2 * 'of PTRS (MESA CAU)')) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
diff_sum = tmp %>% group_by(sample) %>% do(test_afhi_vs_cau(.$r2_max.afhi, .$r2_max.eur))
diff_sum %>% left_join(pop_map, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>% ggplot() +
geom_hline(yintercept = 0, linetype = 2) +
geom_errorbar(aes(x = pop2, ymin = ci_low, ymax = ci_high), width = 0.1) +
geom_point(aes(x = pop2, y = delta_pve), size = 5, color = 'gray') +
ylab('Change in accuracy \n (MESA AFHI - MESA EUR)') +
xlab('Target population') + th
diff_sum %>% pander('Predictive performance MESA AFHI vs MESA EUR')
sample | delta_pve | ci_low | ci_high | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|
African | 0.0004768 | -0.0006085 | 0.001562 | 86 | 0.6777 |
British_test | -0.00718 | -0.01038 | -0.003977 | 0 | 1.526e-05 |
Chinese | -0.002348 | -0.005267 | 0.0005703 | 44 | 0.1324 |
Indian | -0.003565 | -0.00464 | -0.00249 | 1 | 3.052e-05 |
Takeaway: Comparing PTRS R2 for MESA EUR and MESA AFHI.
tmp = inner_join(
df2pt_cau %>% filter(sample %in% c(non_eur_pops, 'British_test')) %>% mutate(model = 'MESA EUR'),
df2pt_afhi %>% filter(sample %in% c(non_eur_pops, 'British_test')) %>% mutate(model = 'MESA AFHI'),
by = c('sample', 'trait'), suffix = c('.eur', '.afhi')
)
tmp %>% left_join(df_color_category, by = 'trait') %>% left_join(pop_map, by = c('sample' = 'pop')) %>%
mutate(pop2 = order_pop(pop2)) %>%
ggplot(aes(color = group)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(aes(x = r2_max.eur, y = r2_max.afhi)) +
facet_wrap(~pop2, scales = 'free', ncol = 5) +
coord_cartesian() + th2 +
scale_color_manual(values = color_mixer) + theme(legend.position = 'none') +
ylab(expression(tilde(R)^2 * 'of PTRS (MESA AFHI)')) +
xlab(expression(tilde(R)^2 * 'of PTRS (MESA CAU)')) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
diff_sum = tmp %>% group_by(sample) %>% do(test_afhi_vs_cau(.$r2_max.afhi, .$r2_max.eur))
diff_sum %>% left_join(pop_map, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>% ggplot() +
geom_hline(yintercept = 0, linetype = 2) +
geom_errorbar(aes(x = pop2, ymin = ci_low, ymax = ci_high), width = 0.1) +
geom_point(aes(x = pop2, y = delta_pve), size = 5, color = 'gray') +
ylab('Change in accuracy \n (MESA AFHI - MESA EUR)') +
xlab('Target population') + th
diff_sum %>% pander('Predictive performance MESA AFHI vs MESA EUR')
sample | delta_pve | ci_low | ci_high | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|
African | -0.0005255 | -0.001392 | 0.0003406 | 49 | 0.2069 |
British_test | -0.006546 | -0.008901 | -0.004191 | 0 | 1.526e-05 |
Chinese | -0.003234 | -0.005262 | -0.001207 | 20 | 0.005569 |
Indian | -0.004591 | -0.006102 | -0.003079 | 0 | 1.526e-05 |
Takeaway: As a supplementary, we also compare PTRS (P+T) R2 for MESA EUR and MESA AFHI.
df1p = calc_port_max(df1)
df1ptp = calc_port_max(df1pt)
ref_mesa = df2_cau %>% filter(sample == 'British_valid')
df2p_cau = calc_port_max(df2_cau)
df2p_afhi = calc_port_max(df2_afhi, ref_mesa)
df2ptp_cau = calc_port_max(df2pt_cau)
df2ptp_afhi = calc_port_max(df2pt_afhi, ref_mesa)
df3p = calc_port_max(df3)
df3ptp = calc_port_max(df3pt)
df4p = calc_port_max(df4)
tmp = rbind(
df1p %>% mutate(method = 'PTRS (GTEx EUR)'),
df4p %>% mutate(method = 'PRS')
) %>% filter(sample != 'British_insample')
tmp %>%
left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
ggplot() +
geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
theme(legend.position = c(0.8, 0.8), legend.title = element_blank())
tmp %>%
reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
group_by(sample) %>%
do(test_a_vs_b(.$PRS, .$`PTRS (GTEx EUR)`)) %>%
pander('Portability: GTEx EUR whole blood')
## Warning in wilcox.test.default(a, b, paired = TRUE): cannot compute exact
## p-value with zeroes
sample | delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|---|
African | -0.09734 | -0.1647 | -0.02995 | 0.007447 | 24 | 0.01099 |
British_test | -0.1061 | -0.336 | 0.1238 | 0.3423 | 70 | 0.7819 |
British_valid | 0 | NA | NA | NA | 0 | NA |
Chinese | 0.05609 | -0.1889 | 0.3011 | 0.634 | 97 | 0.3529 |
Indian | 0.02279 | -0.1603 | 0.2059 | 0.7953 | 94 | 0.4307 |
Takeaway: GTEx EUR PTRS portability
tmp = rbind(
df1ptp %>% mutate(method = 'PTRS (GTEx EUR)'),
df4p %>% mutate(method = 'PRS')
) %>% filter(sample != 'British_insample')
tmp %>%
left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
ggplot() +
geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
theme(legend.position = c(0.8, 0.8), legend.title = element_blank())
tmp %>%
reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
group_by(sample) %>%
do(test_a_vs_b(.$PRS, .$`PTRS (GTEx EUR)`)) %>%
pander('Portability: GTEx EUR whole blood')
## Warning in wilcox.test.default(a, b, paired = TRUE): cannot compute exact
## p-value with zeroes
sample | delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|---|
African | -0.09622 | -0.1591 | -0.03331 | 0.0051 | 27 | 0.01743 |
British_test | -0.07947 | -0.2143 | 0.05541 | 0.2296 | 61 | 0.4874 |
British_valid | 0 | NA | NA | NA | 0 | NA |
Chinese | 0.1429 | -0.05519 | 0.3409 | 0.1458 | 113 | 0.08865 |
Indian | 0.04915 | -0.08561 | 0.1839 | 0.4507 | 98 | 0.3289 |
Takeaway: GTEx EUR PTRS (P+T) portability
tmp = rbind(
df3p %>% mutate(method = 'PTRS (MESA ALL)'),
df4p %>% mutate(method = 'PRS')
) %>% filter(sample != 'British_insample')
# tmp %>%
# left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
# ggplot() +
# geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
# geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
# theme(legend.position = c(0.8, 0.8), legend.title = element_blank())
# tmp %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% group_by(sample) %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA ALL)`)) %>%
# pander('Portability: MESA ALL')
opops = c('Caribbean', 'African')
outlier = tmp %>% filter(sample %in% opops, portability > 0.5)
tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
group_by(sample) %>%
do(test_a_vs_b(.$PRS, .$`PTRS (MESA ALL)`)) %>%
pander('Portability: MESA ALL (remove outlier)')
## Warning in wilcox.test.default(a, b, paired = TRUE): cannot compute exact
## p-value with zeroes
sample | delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|---|
African | -0.03247 | -0.0946 | 0.02967 | 0.2829 | 55 | 0.5282 |
British_test | -0.03034 | -0.1432 | 0.08251 | 0.5766 | 75 | 0.9632 |
British_valid | 0 | NA | NA | NA | 0 | NA |
Chinese | 0.1888 | -0.02222 | 0.3998 | 0.07608 | 120 | 0.03954 |
Indian | 0.07757 | -0.008981 | 0.1641 | 0.07562 | 112 | 0.09837 |
tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
ggplot() +
geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
theme(legend.position = c(0.8, 0.8), legend.title = element_blank()) +
theme(axis.title.x = element_blank())
Takeaway: MESA ALL PTRS portability (removed outliers)
tmp = rbind(
df3ptp %>% mutate(method = 'PTRS (MESA ALL)'),
df4p %>% mutate(method = 'PRS')
) %>% filter(sample != 'British_insample')
tmp %>%
left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
ggplot() +
geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
theme(legend.position = c(0.8, 0.8), legend.title = element_blank())
tmp %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% group_by(sample) %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA ALL)`)) %>%
pander('Portability: MESA ALL')
## Warning in wilcox.test.default(a, b, paired = TRUE): cannot compute exact
## p-value with zeroes
sample | delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|---|
African | 0.005371 | -0.05576 | 0.0665 | 0.8546 | 88 | 0.6112 |
British_test | 0.02336 | -0.07121 | 0.1179 | 0.6076 | 88 | 0.6112 |
British_valid | 0 | NA | NA | NA | 0 | NA |
Chinese | 0.185 | -0.02036 | 0.3903 | 0.07427 | 118 | 0.05054 |
Indian | 0.1246 | -0.00587 | 0.255 | 0.05994 | 124 | 0.02322 |
# opops = c('Caribbean', 'African')
# outlier = tmp %>% filter(sample %in% opops, portability > 0.5)
# tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
# reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
# group_by(sample) %>%
# do(test_a_vs_b(.$PRS, .$`PTRS (MESA ALL)`)) %>%
# pander('Portability: MESA ALL (remove outlier)')
# tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
# left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
# ggplot() +
# geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
# geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
# theme(legend.position = c(0.8, 0.8), legend.title = element_blank()) +
# theme(axis.title.x = element_blank())
Takeaway: MESA ALL PTRS (P+T) portability
tmp = rbind(
df2p_cau %>% mutate(method = 'PTRS (MESA EUR)'),
df4p %>% mutate(method = 'PRS')
) %>% filter(sample != 'British_insample')
tmp %>%
left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
ggplot() +
geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
theme(legend.position = c(0.8, 0.8), legend.title = element_blank())
tmp %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% group_by(sample) %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`)) %>%
pander('Portability: MESA EUR')
## Warning in wilcox.test.default(a, b, paired = TRUE): cannot compute exact
## p-value with zeroes
sample | delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|---|
African | -0.02218 | -0.07387 | 0.02951 | 0.3765 | 58 | 0.4038 |
British_test | -0.005655 | -0.1703 | 0.159 | 0.9429 | 84 | 0.7467 |
British_valid | 0 | NA | NA | NA | 0 | NA |
Chinese | 0.08961 | -0.1082 | 0.2874 | 0.3511 | 102 | 0.2435 |
Indian | 0.1303 | -0.04619 | 0.3069 | 0.1371 | 112 | 0.09837 |
opops = c('Caribbean', 'African')
# outlier = tmp %>% filter(sample %in% opops, portability > 0.5)
# tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
# reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
# group_by(sample) %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`)) %>%
# pander('Portability: MESA EUR (remove outlier)')
# tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
# left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
# ggplot() +
# geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
# geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
# theme(legend.position = c(0.8, 0.8), legend.title = element_blank()) +
# theme(axis.title.x = element_blank())
Takeaway: MESA EUR PTRS portability
tmp = rbind(
df2ptp_cau %>% mutate(method = 'PTRS (MESA EUR)'),
df4p %>% mutate(method = 'PRS')
) %>% filter(sample != 'British_insample')
tmp %>%
left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
ggplot() +
geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
theme(legend.position = c(0.8, 0.8), legend.title = element_blank())
tmp %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% group_by(sample) %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`)) %>%
pander('Portability: MESA EUR')
## Warning in wilcox.test.default(a, b, paired = TRUE): cannot compute exact
## p-value with zeroes
sample | delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval |
---|---|---|---|---|---|---|
African | -0.01252 | -0.07119 | 0.04615 | 0.657 | 77 | 1 |
British_test | 0.05953 | -0.06504 | 0.1841 | 0.3261 | 94 | 0.4307 |
British_valid | 0 | NA | NA | NA | 0 | NA |
Chinese | 0.04772 | -0.1249 | 0.2203 | 0.5659 | 91 | 0.5171 |
Indian | 0.1367 | -0.02054 | 0.2939 | 0.08395 | 121 | 0.03479 |
opops = c('Caribbean', 'African')
# outlier = tmp %>% filter(sample %in% opops, portability > 0.5)
# tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
# reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
# group_by(sample) %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`)) %>%
# pander('Portability: MESA EUR (remove outlier)')
# tmp %>% filter((!sample %in% opops) | (!trait %in% outlier$trait)) %>%
# left_join(pop_map2, by = c('sample' = 'pop')) %>% mutate(pop2 = order_pop(pop2)) %>%
# ggplot() +
# geom_violin(aes(x = pop2, y = portability, color = method), position = position_dodge(0.65)) +
# geom_boxplot(aes(x = pop2, y = portability, color = method), width = 0.1, position = position_dodge(0.65)) + th + scale_color_manual(values = score_color_code) +
# theme(legend.position = c(0.8, 0.8), legend.title = element_blank()) +
# theme(axis.title.x = element_blank())
Takeaway: MESA EUR PTRS (P+T) portability
tmp2 = rbind(
df2p_afhi %>% mutate(method = 'PTRS (MESA AFHI)') %>% filter(sample != 'British_insample', sample != 'British_test'),
df4p %>% mutate(method = 'PRS'),
df2p_cau %>% mutate(method = 'PTRS (MESA EUR)')
)
outlier = tmp %>% filter(sample == 'African', portability > 0.5)
# tmp2 %>% filter(sample == 'African') %>%
# mutate(method = order_method(method)) %>%
# ggplot() +
# geom_violin(aes(x = sample, y = portability, color = method), position = position_dodge(0.5)) +
# geom_boxplot(aes(x = sample, y = portability, color = method), width = 0.1, position = position_dodge(0.5)) +
# scale_color_manual(values = score_color_code) + th
tmp2 %>% filter(sample == 'African' & (!trait %in% outlier$trait)) %>%
mutate(method = order_method(method)) %>%
ggplot() +
geom_violin(aes(x = sample, y = portability, color = method), position = position_dodge(0.5)) +
geom_boxplot(aes(x = sample, y = portability, color = method), width = 0.1, position = position_dodge(0.5)) +
scale_color_manual(values = score_color_code) + th
res1 = tmp2 %>% filter((sample == 'African') & (!trait %in% outlier$trait)) %>%
reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`))
rownames(res1) = NULL
res2 = tmp2 %>% filter((sample == 'African') & (!trait %in% outlier$trait)) %>%
reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
do(test_a_vs_b(.$`PTRS (MESA EUR)`, .$`PTRS (MESA AFHI)`))
rownames(res2) = NULL
rbind(
res1 %>% mutate(type = 'PRS vs MESA EUR'),
res2 %>% mutate(type = 'MESA EUR vs MESA AFHI')
) %>% pander
delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval | type |
---|---|---|---|---|---|---|
-0.02218 | -0.07387 | 0.02951 | 0.3765 | 58 | 0.4038 | PRS vs MESA EUR |
-0.03581 | -0.09475 | 0.02313 | 0.2161 | 59 | 0.4307 | MESA EUR vs MESA AFHI |
# tmp2 %>% filter((sample != 'African') | (!trait %in% outlier$trait)) %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`))
# tmp2 %>% filter((sample != 'African') | (!trait %in% outlier$trait)) %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% do(test_a_vs_b(.$`PTRS (MESA EUR)`, .$`PTRS (MESA AFHI)`))
Takeaway: Comparing the portabilities of MESA EUR and AFHI PTRS in AFR individuals.
tmp2 = rbind(
df2ptp_afhi %>% mutate(method = 'PTRS (MESA AFHI)') %>% filter(sample != 'British_insample', sample != 'British_test'),
df4p %>% mutate(method = 'PRS'),
df2ptp_cau %>% mutate(method = 'PTRS (MESA EUR)')
)
outlier = tmp %>% filter(sample == 'African', portability > 0.5)
# tmp2 %>% filter(sample == 'African') %>%
# mutate(method = order_method(method)) %>%
# ggplot() +
# geom_violin(aes(x = sample, y = portability, color = method), position = position_dodge(0.5)) +
# geom_boxplot(aes(x = sample, y = portability, color = method), width = 0.1, position = position_dodge(0.5)) +
# scale_color_manual(values = score_color_code) + th
tmp2 %>% filter(sample == 'African' & (!trait %in% outlier$trait)) %>%
mutate(method = order_method(method)) %>%
ggplot() +
geom_violin(aes(x = sample, y = portability, color = method), position = position_dodge(0.5)) +
geom_boxplot(aes(x = sample, y = portability, color = method), width = 0.1, position = position_dodge(0.5)) +
scale_color_manual(values = score_color_code) + th
res1 = tmp2 %>% filter((sample == 'African') & (!trait %in% outlier$trait)) %>%
reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`))
rownames(res1) = NULL
res2 = tmp2 %>% filter((sample == 'African') & (!trait %in% outlier$trait)) %>%
reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>%
do(test_a_vs_b(.$`PTRS (MESA EUR)`, .$`PTRS (MESA AFHI)`))
rownames(res2) = NULL
rbind(
res1 %>% mutate(type = 'PRS vs MESA EUR'),
res2 %>% mutate(type = 'MESA EUR vs MESA AFHI')
) %>% pander
delta | ci_low | ci_high | pval | wilcox_stat | wilcox_pval | type |
---|---|---|---|---|---|---|
-0.01252 | -0.07119 | 0.04615 | 0.657 | 77 | 1 | PRS vs MESA EUR |
0.01566 | -0.03042 | 0.06174 | 0.4816 | 108 | 0.1454 | MESA EUR vs MESA AFHI |
# tmp2 %>% filter((sample != 'African') | (!trait %in% outlier$trait)) %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% do(test_a_vs_b(.$PRS, .$`PTRS (MESA EUR)`))
# tmp2 %>% filter((sample != 'African') | (!trait %in% outlier$trait)) %>% reshape2::dcast(trait + sample ~ method, value.var = 'portability') %>% do(test_a_vs_b(.$`PTRS (MESA EUR)`, .$`PTRS (MESA AFHI)`))
Takeaway: As a supplementary, we also compare the PTRS (P+T) portabilities of MESA EUR and AFHI PTRS in AFR individuals.