here::i_am("figure_generation/cfDNA_summary.Rmd")
library(here)
library(tidyr)
library(purrr)
library(ggplot2)
library(tibble)
library(stringr)
library(dplyr)
library(readr)
library(forcats)
library(ggbeeswarm)
library(RColorBrewer)
dir.create(here("output"), showWarnings = FALSE)
This file plots some metrics for the cfDNA samples, without involving
the CUPiD predictions. First we read in the output of the Nextflow
pipeline, included in the git repo and called
cfDNA_qseaSummary.csv. This contains 211 samples in total,
with 143 patients with known tumour types, 27 non-cancer controls and 41
patients with CUP.
CUPiDcols <- read_rds(here("figure_inputs", "CUPiD_colour_palette.rds"))
qseaSummary <- read_csv(here("figure_inputs", "cfDNA_qseaSummary.csv"))
colsToUse <- qseaSummary %>%
pull(correct_class) %>%
na.omit() %>%
unique() %>%
sort() %>%
setdiff("CUP")
qseaSummary %>%
distinct(sample_name, correct_class) %>%
filter(correct_class != "CUP") %>%
ggplot(aes(y = fct_reorder(correct_class,correct_class, .fun = length), fill = correct_class)) +
geom_bar() +
scale_fill_manual(values = CUPiDcols[colsToUse]) +
theme_bw() +
geom_text(
aes(label = after_stat(count)),
stat = 'count',
nudge_x = 2,
size = 5
) +
labs(x = "Count",
y = "Class") +
xlim(c(0,38))
ggsave(here("output/cfDNA_n.pdf"), width = 4, height = 4)
qseaSummary %>%
filter(correct_class != "CUP") %>%
mutate(ichorTumourFraction = 100*ichorTumourFraction) %>%
distinct(sample_name, correct_class, ichorTumourFraction) %>%
ggplot(aes(y = fct_reorder(correct_class, ichorTumourFraction),
x = ichorTumourFraction,
col = correct_class)) +
geom_boxplot(outlier.shape = NA) +
geom_quasirandom() +
geom_vline(xintercept = 3, linetype = "dotted") +
scale_colour_manual(values = CUPiDcols[colsToUse]) +
scale_x_log10() +
labs(y = "Tumour Class",
x = "Estimated Tumour Fraction (%, log scale)",
title = "IchorCNA estimated tumour fraction",
subtitle = "Cutoff of 3% shown") +
theme_bw()
ggsave(here("output/ichorTumourFraction.pdf"), width = 5, height = 5)
newOrder <- qseaSummary %>%
distinct(sample_name, correct_class, ichorTumourFraction, valid_fragments, hyperStableEdgar, relH_MeCap = relH, relH_Input = input_relH) %>%
group_by(correct_class) %>%
summarise(median = median(relH_MeCap)) %>%
arrange(median) %>%
pull(correct_class)
qseaSummary %>%
filter(correct_class != "CUP") %>%
distinct(sample_name, correct_class, ichorTumourFraction, valid_fragments, hyperStableEdgar, relH_MeCap = relH, relH_Input = input_relH) %>%
pivot_longer(matches("relH"), names_to = "enrich", values_to = "relH", names_prefix = "relH_") %>%
mutate(enrich = ifelse(enrich == "MeCap","Enriched","Non-enriched")) %>%
mutate(enrich = factor(enrich, levels = c("Enriched","Non-enriched") ),
correct_class = factor(correct_class, levels = newOrder)) %>%
ggplot(aes(y = correct_class, x = relH, col = enrich)) +
geom_boxplot(outlier.shape = NA, position = "identity") +
geom_quasirandom() +
theme_bw() +
#cbctools::scale_color_cruk() +
labs(y = "Class",
x = "Relative Enrichment (relH)",
colour = "Fraction")
ggsave(here("output/cfDNA_relH.pdf"), width = 4, height = 4)
qseaSummary %>%
dplyr::select(sample_name, correct_class, age, sex, source, "cfDNA yield (ng/mL)" = cfDNA_yield, "DNA input (ng)" = DNA_input,
"ichorCNA Tumour Fraction" = ichorTumourFraction, "Hyper-stable Fraction" = hyperStableEdgar,
"Initial Reads" = raw_total_sequences, "Mapped Reads" = reads_mapped,
"Deduplicated Reads" = reads_post_dedup, "Total Fragments in QSEA" = total_fragments,
"Final Valid Fragments" = valid_fragments,
"enrichment score (relH)" = relH, "enrichment score (GoGe)" = GoGe,
"Initial Reads (non-enriched)" = input_raw_total_sequences, "Mapped Reads (non-enriched)" = input_reads_mapped,
"Deduplicated Reads (non-enriched)" = input_reads_post_dedup, "Total Fragments in QSEA (non-enriched)" = input_total_fragments,
"Final Valid Fragments (non-enriched)" = input_valid_fragments,
"enrichment score (relH), non-enriched" = input_relH,
"enrichment score (GoGe), non-enriched" = input_GoGe) %>%
write_csv(here("output/cfDNA_methylation_metrics.csv"))
CNVmatrix <- read_rds(here("figure_inputs/CNV_knownTypes.rds")) %>%
as_tibble() %>%
dplyr::select(-tidyselect::matches("width|strand")) %>%
dplyr::mutate(window = paste0(seqnames, ":",start, "-",end)) %>%
tibble::remove_rownames() %>%
tibble::column_to_rownames("window") %>%
dplyr::select(-seqnames, -start, -end) %>%
as.matrix() %>%
t()
colAnno <- CNVmatrix %>%
colnames() %>%
enframe() %>%
dplyr::select(value) %>%
mutate(chr = as.numeric(str_remove(value,":.*")) %% 2 ) %>%
column_to_rownames("value")
rowAnno <- qseaSummary %>%
filter(correct_class != "CUP", correct_class != "NCC") %>%
dplyr::select(sample_name, correct_class, tumourFraction = ichorTumourFraction)
avgTFs <- rowAnno %>%
group_by(correct_class) %>%
summarise(median = median(tumourFraction)) %>%
arrange(desc(median))
rowAnno <- rowAnno %>%
mutate(correct_class = factor(correct_class, levels = avgTFs$correct_class)) %>%
dplyr::arrange(correct_class, desc(tumourFraction)) %>%
column_to_rownames("sample_name")
CNVmatrix[rownames(rowAnno), ] %>%
pheatmap::pheatmap(cluster_rows = FALSE,
cluster_cols = FALSE,
show_colnames = FALSE,
show_rownames = FALSE,
annotation_row = rowAnno,
breaks = seq(from = -1, to = 1, length.out = 8),
annotation_colors = list(chr = c("1" = "darkgrey", "2" = "lightgrey"),
correct_class = CUPiDcols[colsToUse]),
annotation_col = colAnno,
gaps_row = rowAnno %>% dplyr::pull(correct_class) %>% as.character() %>% rle() %>% {.$lengths} %>% cumsum(),
gaps_col = colAnno %>% dplyr::pull(chr) %>% diff() %>% {which(. != 0)},
color = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 7)),
filename = here("output/CNVplot.pdf"),
height = 8.5,
width = 9.5,
)
dir.create(here("source_data"), showWarnings = FALSE)
CNVmatrix %>%
as.data.frame() %>%
write_csv(here("source_data/CNVmatrix.csv"))
qseaSummary %>%
filter(correct_class == "CUP") %>%
distinct(sample_name, correct_class, ichorTumourFraction, valid_fragments, hyperStableEdgar, relH_MeCap = relH, relH_Input = input_relH) %>%
pivot_longer(matches("relH"), names_to = "enrich", values_to = "relH", names_prefix = "relH_") %>%
mutate(enrich = ifelse(enrich == "MeCap","Enriched","Non-enriched")) %>%
mutate(enrich = factor(enrich, levels = c("Enriched","Non-enriched") )) %>%
ggplot(aes(y = relH, x = enrich, col = enrich)) +
geom_boxplot(outlier.shape = NA, position = "identity") +
#geom_jitter(height = 0, width = 0.2) +
geom_quasirandom() +
theme_bw() +
#cbctools::scale_color_cruk() +
labs(x = "Fraction",
y = "Relative Enrichment (relH)",
colour = "Fraction")
ggsave(here("output/CUP_relH.pdf"), width = 4, height = 4)
qseaSummary %>%
filter(correct_class == "CUP") %>%
dplyr::select(sample_name, correct_class, age, sex, source, "cfDNA yield (ng/mL)" = cfDNA_yield, "DNA input (ng)" = DNA_input,
"ichorCNA Tumour Fraction" = ichorTumourFraction, "Hyper-stable Fraction" = hyperStableEdgar,
"Initial Reads" = raw_total_sequences, "Mapped Reads" = reads_mapped,
"Deduplicated Reads" = reads_post_dedup, "Total Fragments in QSEA" = total_fragments,
"Final Valid Fragments" = valid_fragments,
"enrichment score (relH)" = relH, "enrichment score (GoGe)" = GoGe,
"Initial Reads (non-enriched)" = input_raw_total_sequences, "Mapped Reads (non-enriched)" = input_reads_mapped,
"Deduplicated Reads (non-enriched)" = input_reads_post_dedup, "Total Fragments in QSEA (non-enriched)" = input_total_fragments,
"Final Valid Fragments (non-enriched)" = input_valid_fragments,
"enrichment score (relH), non-enriched" = input_relH,
"enrichment score (GoGe), non-enriched" = input_GoGe) %>%
write_csv(here("output/CUP_methylation_metrics.csv"))
NCC01summary <- read_csv(here("figure_inputs/NCC01_qseaSummary.csv"))
NCC01summary %>%
dplyr::select(sample_name, age, sex, source, "cfDNA yield (ng/mL)" = cfDNA_yield, "DNA input (ng)" = DNA_input,
"ichorCNA Tumour Fraction" = ichorTumourFraction, "Hyper-stable Fraction" = hyperStableEdgar,
"Initial Reads" = raw_total_sequences, "Mapped Reads" = reads_mapped,
"Deduplicated Reads" = reads_post_dedup, "Total Fragments in QSEA" = total_fragments,
"Final Valid Fragments" = valid_fragments,
"enrichment score (relH)" = relH, "enrichment score (GoGe)" = GoGe,
"Initial Reads (non-enriched)" = input_raw_total_sequences, "Mapped Reads (non-enriched)" = input_reads_mapped,
"Deduplicated Reads (non-enriched)" = input_reads_post_dedup, "Total Fragments in QSEA (non-enriched)" = input_total_fragments,
"Final Valid Fragments (non-enriched)" = input_valid_fragments,
"enrichment score (relH), non-enriched" = input_relH,
"enrichment score (GoGe), non-enriched" = input_GoGe) %>%
write_csv(here("output/NCC01_methylation_metrics.csv"))
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] GenomicRanges_1.52.0 GenomeInfoDb_1.36.2 IRanges_2.34.1
## [4] S4Vectors_0.38.1 BiocGenerics_0.46.0 RColorBrewer_1.1-3
## [7] ggbeeswarm_0.7.2 forcats_1.0.0 readr_2.1.4
## [10] dplyr_1.1.4 stringr_1.5.1 tibble_3.2.1
## [13] ggplot2_3.4.4 purrr_1.0.2 tidyr_1.3.1
## [16] here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.4 beeswarm_0.4.0 xfun_0.40
## [4] bslib_0.5.1 tzdb_0.4.0 vctrs_0.6.5
## [7] tools_4.3.1 bitops_1.0-7 generics_0.1.3
## [10] parallel_4.3.1 fansi_1.0.6 highr_0.10
## [13] pkgconfig_2.0.3 pheatmap_1.0.12 lifecycle_1.0.4
## [16] GenomeInfoDbData_1.2.10 compiler_4.3.1 farver_2.1.1
## [19] textshaping_0.3.6 munsell_0.5.0 vipor_0.4.5
## [22] htmltools_0.5.6 sass_0.4.7 RCurl_1.98-1.12
## [25] yaml_2.3.7 pillar_1.9.0 crayon_1.5.2
## [28] jquerylib_0.1.4 cachem_1.0.8 tidyselect_1.2.0
## [31] digest_0.6.33 stringi_1.8.3 labeling_0.4.3
## [34] rprojroot_2.0.3 fastmap_1.1.1 grid_4.3.1
## [37] colorspace_2.1-0 cli_3.6.2 magrittr_2.0.3
## [40] utf8_1.2.4 withr_3.0.0 scales_1.2.1
## [43] bit64_4.0.5 rmarkdown_2.24 XVector_0.40.0
## [46] bit_4.0.5 ragg_1.2.5 hms_1.1.3
## [49] evaluate_0.21 knitr_1.43 rlang_1.1.3
## [52] glue_1.7.0 rstudioapi_0.15.0 vroom_1.6.3
## [55] jsonlite_1.8.7 R6_2.5.1 zlibbioc_1.46.0
## [58] systemfonts_1.0.4