t_predictions <- dplyr::rename(t_predictions, allele_name = haplotype)
t_predictions$f_tmh <- stringr::str_match(
string = t_predictions$human,
pattern = "(.*) \\((.*)/(.*)\\)"
)[, 2]
t_predictions$n <- as.numeric(
stringr::str_match(
string = t_predictions$human,
pattern = "(.*) \\((.*)/(.*)\\)"
)[, 4]
)
t_predictions$human <- NULL
t_predictions$f_tmh <- as.numeric(t_predictions$f_tmh) / 100.0
testthat::expect_equal(
names(t_predictions),
c("allele_name","f_tmh","n")
)
# Merge the tibbles
t_iedb$type <- "observed"
t_predictions$type <- "predicted"
ts <- dplyr::bind_rows(t_iedb, t_predictions)
ts$type <- as.factor(ts$type)
ts$mhc_class <- NA
ts$mhc_class[ts$allele_name %in% bbbq::get_mhc1_allele_names()] <- "I"
ts$mhc_class[ts$allele_name %in% bbbq::get_mhc2_allele_names()] <- "II"
ts$mhc_class[stringr::str_detect(ts$allele_name, "HLA-[AB]")] <- "I"
ts$mhc_class[stringr::str_detect(ts$allele_name, "HLA-D")] <- "II"
# Only keep MHC-I, as MHC-II has no overlap
ts <- ts[ts$mhc_class == "I", ]
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
p
# ts$allele_name <- bbbq::simplify_haplotype_names(ts$allele_name)
# Remove unknown alleles
#ts <- ts[!is.na(ts$allele_name), ]
#ts$allele_name <- paste0(ts$mhc_class, "_", ts$allele_name)
# readr::write_lines(x = ts$allele_name, "~/allele_names.txt")
ts
t_coincidence <- readr::read_csv(
coincidence_filename,
show_col_types = FALSE
)
t_coincidence
t_coincidence %>% filter(target == "human")
t_coincidence <- readr::read_csv(
coincidence_filename,
show_col_types = FALSE
) %>% filter(target == "human")
t_coincidence
t_coincidence <- readr::read_csv(
coincidence_filename,
show_col_types = FALSE
) %>% filter(target == "human")
t_coincidence
t_coincidence %>%
dplyr::filter(target == "human") %>%
dplyr::select(mhc_class, conf_99_low, conf_99_high)
t_coincidence_all <- readr::read_csv(
coincidence_filename,
show_col_types = FALSE
)
t_coincidence <- t_coincidence_all %>%
dplyr::filter(target == "human") %>%
dplyr::select(mhc_class, conf_99_low, conf_99_high)
t_coincidence
# Merge the tibbles
t_iedb$type <- "observed"
t_predictions$type <- "predicted"
ts <- dplyr::bind_rows(t_iedb, t_predictions)
ts$type <- as.factor(ts$type)
ts$mhc_class <- NA
ts$mhc_class[ts$allele_name %in% bbbq::get_mhc1_allele_names()] <- "I"
ts$mhc_class[ts$allele_name %in% bbbq::get_mhc2_allele_names()] <- "II"
ts$mhc_class[stringr::str_detect(ts$allele_name, "HLA-[AB]")] <- "I"
ts$mhc_class[stringr::str_detect(ts$allele_name, "HLA-D")] <- "II"
# Only keep MHC-I, as MHC-II has no overlap
ts <- ts[ts$mhc_class == "I", ]
ts
ts
# ts$allele_name <- bbbq::simplify_haplotype_names(ts$allele_name)
# Remove unknown alleles
#ts <- ts[!is.na(ts$allele_name), ]
#ts$allele_name <- paste0(ts$mhc_class, "_", ts$allele_name)
# readr::write_lines(x = ts$allele_name, "~/allele_names.txt")
merge(ts, t_coincidence, by = mhc_class)
# ts$allele_name <- bbbq::simplify_haplotype_names(ts$allele_name)
# Remove unknown alleles
#ts <- ts[!is.na(ts$allele_name), ]
#ts$allele_name <- paste0(ts$mhc_class, "_", ts$allele_name)
# readr::write_lines(x = ts$allele_name, "~/allele_names.txt")
merge(ts, t_coincidence, by = "mhc_class")
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
ggplot2::geom_vline(data = t_coincidence, ggplot2::aes(x = mhc_class, y = conf_99_low)) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
t_coincidence %>% filter(mhc_class == "I")
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
ggplot2::geom_vline(intercept = t_coincidence %>% filter(mhc_class == "I")$conf_99_low) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
t_coincidence %>% filter(mhc_class == "I")$conf_99_low
(t_coincidence %>% filter(mhc_class == "I")$conf_99_low
)
(t_coincidence %>% filter(mhc_class == "I")$conf_99_low)
t_coincidence %>% filter(mhc_class == "I")
t_coincidence %>% filter(mhc_class == "I"))$conf_99_low
(t_coincidence %>% filter(mhc_class == "I"))$conf_99_low
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
ggplot2::geom_vline(intercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_low) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
ggplot2::geom_hline(xintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_low) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
ggplot2::geom_hline(yintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_low) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
p
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
ggplot2::geom_hline(col = "red", yintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_low) +
ggplot2::geom_hline(col = "red", yintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_high) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
p <- ggplot2::ggplot(ts,
ggplot2::aes(x = allele_name, y = f_tmh, fill = type)) +
ggplot2::geom_col(position = "dodge") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.5)
) +
ggplot2::scale_x_discrete(
"MHC allele name"
) + #ggplot2::geom_text(
#ggplot2::aes(label = n),
#vjust = -0.5,
#size = 6
#) +
# ggplot2::facet_grid(
#   . ~ mhc_class,
#   scales = "free_x",
#   labeller = ggplot2::as_labeller(
#     c(
#       I = "MHC-I",
#       II = "MHC-II"
#     )
#   )
# ) +
ggplot2::geom_hline(color = "red", yintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_low) +
ggplot2::geom_hline(color = "red", yintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_high) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90))
p
t_coincidence %>% filter(mhc_class == "I"))$conf_99_high
(t_coincidence %>% filter(mhc_class == "I"))$conf_99_high)
t_coincidence
results_per_allele_filename <- "results_per_allele.csv"
testthat::expect_true(file.exists(results_per_allele_filename))
library(dplyr)
t <- readr::read_csv(
results_per_allele_filename,
show_col_types = FALSE
)
t
# Coincidence intervals
coincidence_filename <- "~/GitHubs/bbbq_1_smart/table_coincidence.csv"
testthat::expect_true(file.exists(coincidence_filename))
t_coincidence_all <- readr::read_csv(
coincidence_filename,
show_col_types = FALSE
)
t_coincidence <- t_coincidence_all %>%
dplyr::filter(target == "human") %>%
dplyr::select(mhc_class, conf_99_low, conf_99_high)
# Only keep MHC ligand
t <- t[t$dataset == "iedb_mhc_ligand", ]
t$mhc_class <- as.character(as.roman(t$mhc_class))
t$mhc_class <- as.factor(t$mhc_class)
p <- ggplot2::ggplot(t,
ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_col(fill = "#BBBBBB") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.4)
) +
ggplot2::scale_x_discrete(
"MHC class"
) + ggplot2::geom_text(
ggplot2::aes(label = n),
vjust = -0.5,
size = 8
)  + bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24))
p <- ggplot2::ggplot(t,
ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_col(fill = "#BBBBBB") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.4)
) +
ggplot2::scale_x_discrete(
"MHC class"
) + ggplot2::geom_text(
ggplot2::aes(label = n),
vjust = -0.5,
size = 8
)  +
ggplot2::geom_hline(color = "red", yintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_low) +
ggplot2::geom_hline(color = "red", yintercept = (t_coincidence %>% filter(mhc_class == "I"))$conf_99_high) +
bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24))
p
results_per_allele_filename <- "results_per_allele.csv"
testthat::expect_true(file.exists(results_per_allele_filename))
library(dplyr)
t <- readr::read_csv(
results_per_allele_filename,
show_col_types = FALSE
)
t
# Only keep MHC ligand
t <- t[t$dataset == "iedb_mhc_ligand", ]
t$mhc_class <- as.character(as.roman(t$mhc_class))
t$mhc_class <- as.factor(t$mhc_class)
# Coincidence interval
coincidence_filename <- "~/GitHubs/bbbq_1_smart/table_coincidence.csv"
testthat::expect_true(file.exists(coincidence_filename))
t_coincidence_all <- readr::read_csv(
coincidence_filename,
show_col_types = FALSE
)
t_coincidence <- t_coincidence_all %>%
dplyr::filter(target == "human") %>%
dplyr::select(mhc_class, conf_99_low, conf_99_high)
t_coincidence
t_coincidence_all
t_coincidence <- t_coincidence_all %>%
dplyr::filter(target == "human") %>%
dplyr::select(mhc_class, conf_99_low, f_tmh, conf_99_high)
p <- ggplot2::ggplot(t,
ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_col(fill = "#BBBBBB") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.4)
) +
ggplot2::scale_x_discrete(
"MHC class"
) + ggplot2::geom_text(
ggplot2::aes(label = n),
vjust = -0.5,
size = 8
)  + bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24))
p
p <- ggplot2::ggplot(t,
ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_col(fill = "#BBBBBB") +
ggplot2::geom_col(data = t_coincidence, ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.4)
) +
ggplot2::scale_x_discrete(
"MHC class"
) + ggplot2::geom_text(
ggplot2::aes(label = n),
vjust = -0.5,
size = 8
)  + bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24))
p
p <- ggplot2::ggplot(t,
ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_col(fill = "#BBBBBB") +
ggplot2::geom_col(data = t_coincidence, ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_errorbar(data = t_coincidence, ggplot2::aes(x = mhc_class, ymin = conf_99_low, ymax = conf_99_high)) +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.4)
) +
ggplot2::scale_x_discrete(
"MHC class"
) + ggplot2::geom_text(
ggplot2::aes(label = n),
vjust = -0.5,
size = 8
)  + bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24))
p
p <- ggplot2::ggplot(t,
ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_col(fill = "#BBBBBB") +
# ggplot2::geom_col(data = t_coincidence, ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_errorbar(data = t_coincidence, ggplot2::aes(x = mhc_class, ymin = conf_99_low, ymax = conf_99_high)) +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.4)
) +
ggplot2::scale_x_discrete(
"MHC class"
) + ggplot2::geom_text(
ggplot2::aes(label = n),
vjust = -0.5,
size = 8
)  + bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24))
p
p <- ggplot2::ggplot(t,
ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_col(fill = "#BBBBBB") +
# ggplot2::geom_col(data = t_coincidence, ggplot2::aes(x = mhc_class, y = f_tmh)) +
ggplot2::geom_errorbar(data = t_coincidence, ggplot2::aes(x = mhc_class, ymin = conf_99_low, ymax = conf_99_high), col = "red") +
ggplot2::scale_y_continuous(
"Epitopes derived from TMH",
labels = scales::percent,
limits = c(0.0, 0.4)
) +
ggplot2::scale_x_discrete(
"MHC class"
) + ggplot2::geom_text(
ggplot2::aes(label = n),
vjust = -0.5,
size = 8
)  + bbbq::get_bbbq_theme() +
ggplot2::theme(text = ggplot2::element_text(size = 24))
p
