1 Load results

Generated by ../notebook/first_attempt_on_hail.ipynb.

df = fread("zcat < ~/Desktop/first_attempt_on_hail.output.tsv.gz | sed 's#\\[##g' | sed 's#\\]##g' | tr ',' '\t' | tail -n +2", header = F, stringsAsFactors = F, data.table = F)
header = fread('zcat < ~/Desktop/first_attempt_on_hail.output.tsv.gz | head', header = T, data.table = F, stringsAsFactors = F)
cname = colnames(header)
traits = c('bmi', 'height')
single_cols = c(1,3,4)
new_cols = c()
for(i in 1 : length(cname)) {
  if(i %in% single_cols) {
    new_cols = c(new_cols, cname[i])
    next
  }
  for(j in traits) {
    new_cols = c(new_cols, paste0(cname[i], '.', j))
  }
}
colnames(df) = new_cols

2 Load Neale’s lab sum stats

source('https://gist.githubusercontent.com/liangyy/605437935751bb77b1739666b18517bf/raw/85b1e83d1cbf7c8b5671030e927000342ed16f97/my_qqplot_by_group.R')
# my_qqplot_by_group(df$p_value, group = 1)
df_neale = fread("zcat < ~/Desktop/50_raw.gwas.imputed_v3.both_sexes.tsv.bgz|grep '^22:'", header = F, stringsAsFactors = F)
df_neale_maf_gt_0.01 = df_neale %>% filter(V3 > 0.01)
df_neale_maf_gt_0.01 = df_neale_maf_gt_0.01 %>% mutate(var_id = parse_neale_snp(V1))

3 Join and plot

join = inner_join(df, df_neale_maf_gt_0.01, by = c('locus' = 'var_id'))
class(join$p_value.height) = 'numeric' 
class(join$p_value.bmi) = 'numeric' 
subsample = subsample_for_vis(rep(1, nrow(join)), nmax = 50000)
vis = join[subsample, ]

my_qqplot_by_group(c(vis$p_value.height, vis$V11), group = c(rep('nmyrun', nrow(vis)), rep('neale', nrow(vis))))

vis %>% ggplot() + geom_point(aes(x = p_value.height, y = V11), alpha = 0.5) + scale_x_log10() + scale_y_log10() + geom_abline(slope = 1, intercept = 0)

hist(join$V11)

hist(join$p_value.height)

hist(join$p_value.bmi)