library(ggplot2)
library(dplyr)
library(pander)
options(stringsAsFactors = F)
source('../code/rlib_doc.R')
source('https://gist.githubusercontent.com/liangyy/43912b3ecab5d10c89f9d4b2669871c9/raw/8151c6fe70e3d4ee43d9ce340ecc0eb65172e616/my_ggplot_theme.R')
th$panel.border = element_rect(colour = th$axis.line$colour)
theme_set(theme_bw(base_size = 10))
The predicted expression is inverse normalized gene by gene. The phenotype measure and covariates are in their original scales.
Generated by ../submission_scripts/gcta_regulability/
df_trait = read.delim2('../external_data/martin_et_al_2019ng_table_s6_trait_description.tsv', sep = '\t', header = T)
df_info = read.csv('../external_data/martin_et_al_2019ng_table_s6.csv') %>% mutate(trait = tolower(Trait))
traits = tolower(df_trait$short)
df_trait$trait = traits
class(df_info$UKBB.code) = 'character'
result_dir = '/Users/yanyul/Desktop/tmp/gcta_regu'
pops = c('African', 'British-test-1', 'Chinese', 'Indian')
est_list = list()
lrt_list = list()
for(t in traits) {
for(p in pops) {
filename = paste0(result_dir, '/', 'reml_from_gcta_martin_et_al_traits_x_ctimp_Whole_Blood_x_', p, '_x_', t, '.hsq')
df = read.delim2(filename)
class(df$Variance) = 'numeric'
class(df$SE) = 'numeric'
est_list[[length(est_list) + 1]] = df[1:4, ] %>% mutate(population = p, trait = t)
lrt_list[[length(lrt_list) + 1]] = df[5:10, ] %>% mutate(population = p, trait = t)
}
}
est = do.call(rbind, est_list)
lrt = do.call(rbind, lrt_list)
est = est %>% inner_join(df_trait %>% select(trait, long), by = 'trait')
lrt = lrt %>% inner_join(df_trait %>% select(trait, long), by = 'trait')
hail_list = list()
for(p in pops) {
filename = paste0(result_dir, '/', 'reml_from_hail_martin_et_al_traits_x_ctimp_Whole_Blood_x_', p, '.tsv')
df = read.delim2(filename)
hail_list[[length(hail_list) + 1]] = df %>% mutate(population = p)
}
hail = do.call(rbind, hail_list)
class(hail$h_sq_se) = 'numeric'
class(hail$h_sq) = 'numeric'
hail = hail %>% inner_join(df_trait %>% select(trait, long), by = 'trait')
df_ldsc = read.delim2('~/Downloads/ukb31063_h2_topline.02Oct2019.tsv.gz') %>% mutate(ukbb_code = unlist(lapply(strsplit(phenotype, '_'), function(x){x[1]}))) %>% filter(ukbb_code %in% df_info$UKBB.code)
class(df_ldsc$h2_observed) = 'numeric'
class(df_ldsc$h2_observed_se) = 'numeric'
df_ldsc = df_ldsc %>% inner_join(df_info %>% select(UKBB.code, trait), by = c('ukbb_code' = 'UKBB.code'))
lrt %>% filter(Source == 'n') %>% select(Variance, population) %>% distinct() %>% pander
Variance | population |
---|---|
2835 | African |
5000 | British-test-1 |
1326 | Chinese |
4789 | Indian |
V(P) means the phenotypic variation. So it will be phenotype-specific.
est %>% filter(Source == 'Vp') %>% ggplot() + th + geom_point(aes(x = population, y = sqrt(Variance))) +
# scale_y_log10() +
facet_wrap(~long, ncol = 3, scales = 'free_y') +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
Phenotypic variation explained by predicted expression.
est %>% filter(Source == 'V(G)') %>% ggplot() + th + geom_point(aes(x = population, y = sqrt(Variance))) +
# scale_y_log10() +
facet_wrap(~long, ncol = 3, scales = 'free_y') +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
Proportion of phenotypic variation explained by predicted expression.
d = .8
est %>% filter(Source == 'V(G)/Vp') %>%
ggplot() + th +
geom_bar(aes(x = trait, y = Variance, fill = population), stat = 'identity', , position = position_dodge(d)) +
geom_errorbar(aes(x = trait, group = population, ymax = Variance + 1.95 * SE, ymin = Variance - 1.96 * SE), position = position_dodge(d), width = .2) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
theme(legend.position = 'bottom') +
ggtitle('GCTA runs')
d = .8
hail %>%
ggplot() + th +
geom_bar(aes(x = trait, y = h_sq, fill = population), stat = 'identity', position = position_dodge(d)) +
geom_errorbar(aes(x = trait, group = population, ymax = h_sq + 1.95 * h_sq_se, ymin = h_sq - 1.96 * h_sq_se), position = position_dodge(d), width = .2) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
theme(legend.position = 'bottom') +
ggtitle('Hail runs')
## Warning: Removed 8 rows containing missing values (geom_bar).
## Warning: Removed 8 rows containing missing values (geom_errorbar).
hail$hail = hail$h_sq
hail$hail[is.na(hail$hail)] = -0.1
est %>% filter(Source == 'V(G)/Vp') %>% rename(gcta = Variance) %>% inner_join(hail, by = c('trait', 'population')) %>%
ggplot() + th +
geom_abline(slope = c(1), intercept = 0, linetype = 2, color = 'gray') +
geom_hline(yintercept = 0, linetype = 2, color = 'gray') +
geom_vline(xintercept = 0, linetype = 2, color = 'gray') +
geom_point(aes(y = gcta, x = hail), alpha = .5, size = 2) +
geom_errorbarh(aes(y = gcta, xmin = hail - h_sq_se * 1.96, xmax = hail + h_sq_se * 1.96), alpha = .5) +
geom_errorbar(aes(ymin = gcta - SE * 1.96, ymax = gcta + SE * 1.96, x = hail), alpha = .5) +
coord_equal(ratio = 1) +
facet_wrap(~population, ncol = 2)
## Warning: Removed 8 rows containing missing values (geom_errorbarh).
df_ldsc %>%
ggplot() + th +
geom_bar(aes(x = trait, y = h2_observed), stat = 'identity') +
geom_errorbar(aes(x = trait, ymax = h2_observed + 1.96 * h2_observed_se, ymin = h2_observed - 1.96 * h2_observed_se), width = .1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
est %>% filter(Source == 'V(G)/Vp') %>% rename(regu = Variance) %>% inner_join(df_ldsc, by = 'trait') %>%
ggplot() + th +
geom_abline(slope = c(.2, 1), intercept = 0, linetype = 2, color = 'gray') +
geom_hline(yintercept = 0, linetype = 2, color = 'gray') +
geom_vline(xintercept = 0, linetype = 2, color = 'gray') +
geom_point(aes(y = regu, x = h2_observed), alpha = .5, size = 2) +
geom_errorbarh(aes(y = regu, xmin = h2_observed - h2_observed_se * 1.96, xmax = h2_observed + h2_observed_se * 1.96), alpha = .5) +
geom_errorbar(aes(ymin = regu - SE * 1.96, ymax = regu + SE * 1.96, x = h2_observed), alpha = .5) +
coord_equal(ratio = 1) +
facet_wrap(~population, ncol = 2)
saveRDS(hail, '../analysis_output/regulability_ctimp.rds')
saveRDS(df_ldsc, '../analysis_output/hsq_neale_lab.rds')