knitr::opts_knit$set(root.dir = '..', fig.width = 12, fig.height = 8, warning = FALSE, message = FALSE)
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
This document looks at what mutations are found within the CUP
cohort. The data for these has been put through a GRCh38 snakemake-based
pipeline (not included in this repo), and high confidence mutations are
determined as those which are called by both CLC and Mutect2. This file
must be ran after the CUPiD_cfDNA_application.Rmd file, as this
generates the output/CUPsummary.csv file that describes the
predictions made on the CUP samples.
if(!file.exists(here("output/CUPsummary.csv"))){
stop("Please run CUPiD_cfDNA_application.Rmd first, to generate the CUPiD predictions.")
}
CUPIDpredict <- read_csv(here("output/CUPsummary.csv"))
CUPclin <- read_csv(here("figure_inputs/CUPClin.csv")) %>%
left_join(CUPIDpredict %>% dplyr::select(sample_name, CUPiD_Prediction = call)) %>%
dplyr::rename(Tumor_Sample_Barcode = sample_name,
Primary_Diagnosis = diagnosed_class) %>%
mutate(Primary_Diagnosis = ifelse(Primary_Diagnosis == "DLBC","Lymphoma", Primary_Diagnosis))
combinedMAFdata <- read_csv(here("figure_inputs/CUP_highconfidence_mutations.csv")) %>%
filter(Variant_Classification %in% c("Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site",
"Translation_Start_Site","Nonsense_Mutation", "Nonstop_Mutation",
"In_Frame_Del","In_Frame_Ins", "Missense_Mutation")) %>%
mutate(gnomAD_AF = replace_na(gnomAD_AF,0)) %>%
filter(gnomAD_AF <= 0.01) %>%
janitor::remove_empty(which = "cols") %>%
dplyr::select(-matches("gnomAD_[A-Z]*_AF|^[A-Z]*_AF$|^AF$", ignore.case = FALSE)) %>%
mutate(actionable = ifelse(HIGHEST_SENSITIVE_LEVEL %in% c("LEVEL_1","LEVEL_2","LEVEL_3A","LEVEL_3B"),"Actionable","Not")) %>%
dplyr::select(Hugo_Symbol, Tumor_Sample_Barcode, everything()) %>%
janitor::remove_constant()
combinedMAFdata %>%
janitor::remove_empty("cols", quiet = FALSE) %>%
dplyr::select(sample_name = Tumor_Sample_Barcode, everything()) %>%
write_csv(here("output/CUP_final_mutations.csv"))
combinedMAFdata %>%
group_by(Tumor_Sample_Barcode) %>%
summarise(medianAF = median(Tumour_AF)) %>%
left_join(CUPclin,.) %>%
mutate(medianAF = replace_na(medianAF, 0)) %>%
filter(Tumor_Sample_Barcode != "C006169") %>%
pull(medianAF) %>%
max()
## [1] 0.6129952
combinedMAFdata %>%
dplyr::distinct(Tumor_Sample_Barcode) %>%
nrow()
## [1] 33
combinedMAFdata %>%
filter(actionable == "Actionable") %>%
dplyr::distinct(Tumor_Sample_Barcode) %>%
nrow()
## [1] 7
combinedMAFdata %>%
filter(actionable == "Actionable") %>%
nrow()
## [1] 9
combinedMAFdata %>%
filter(actionable == "Actionable") %>%
dplyr::select(Tumor_Sample_Barcode, Hugo_Symbol, HGVSc, HIGHEST_SENSITIVE_LEVEL, Tumour_AF, Normal_AF, actionable) %>%
arrange(HIGHEST_SENSITIVE_LEVEL) %>%
DT::datatable()
combinedMAFdata %>%
filter(str_detect(ONCOGENIC,"Onco")) %>%
nrow()
## [1] 60
Now we plot an Oncoplot for the mutations found, restricted to those mutations marked as (potentially) oncogenic by OncoKB. C006169 is removed due to potential genomic contamination by NGSCheckMate.
samplesWithMutations <- combinedMAFdata %>%
pull(Tumor_Sample_Barcode) %>%
unique()
### maftools doesn't like samples with no mutations in the maf object.
# So we can add a silent mutation to these samples, which will get ignored later
combMafDataWithExtraSilents <- combinedMAFdata %>%
filter(Variant_Classification == "Silent") %>%
dplyr::slice(1) %>%
dplyr::select(-Tumor_Sample_Barcode) %>%
left_join(CUPclin %>%
filter(Tumor_Sample_Barcode != "C006169") %>%
filter(!(Tumor_Sample_Barcode %in% samplesWithMutations)) %>%
dplyr::select(Tumor_Sample_Barcode) %>%
mutate(Variant_Classification = "Silent"),
multiple = "all") %>%
bind_rows(combinedMAFdata,.)
# don't know why complaining with a warning here, but works fine.
cup_maf <- read.maf(combMafDataWithExtraSilents,
verbose = T,
clinicalData = CUPclin %>% filter(Tumor_Sample_Barcode != "C006169"),
vc_nonSyn = c("Frame_Shift_Del", "Frame_Shift_Ins",
"Splice_Site","Translation_Start_Site","Nonsense_Mutation",
"Nonstop_Mutation", "In_Frame_Del","In_Frame_Ins", "Missense_Mutation"))
## -Validating
## -Summarizing
## --Possible FLAGS among top ten genes:
## TTN
## MUC16
## SYNE1
## -Processing clinical data
## -Finished in 0.035s elapsed (0.031s cpu)
## quartz_off_screen
## 2
avgVAFdata <- combinedMAFdata %>%
dplyr::rename(sample_name = Tumor_Sample_Barcode) %>%
group_by(sample_name) %>%
summarise(avgVAF = median(as.numeric(Tumour_AF), na.rm = TRUE),
avgVAF = replace_na(avgVAF,0))
ichorVsVAFdata <- read_csv(here("figure_inputs/cfDNA_qseaSummary.csv")) %>%
filter(correct_class == "CUP", sample_name != "C006169") %>%
distinct(sample_name, ichorTumourFraction) %>%
left_join(avgVAFdata) %>%
mutate(avgVAF = replace_na(avgVAF, 0 ),
ichorTumourFraction = 100*ichorTumourFraction,
avgVAF = avgVAF*100
)
ichorVsVAFdata %>%
ggpubr::ggscatter(x = "ichorTumourFraction",
y = "avgVAF",
xlab = "ichorCNA estimated tumour fraction (%)",
ylab = "Median Varient Allele Frequency (%)",
cor.coef = TRUE,
cor.coef.coord = 100*c(0.3, 0.05),
ggtheme = theme_bw(),
add = "reg.line",
add.params = list(color = "grey30", linetype = "dashed"),
conf.int = FALSE,
cor.coeff.args = list(p.accuracy = 1e-5))
ggsave(here("output/CUP_VAF_vs_ichor.pdf"), width = 4, height = 4)
combinedMAFdata %>%
mutate(actionable_drug = case_when(!is.na(LEVEL_1) ~ LEVEL_1,
!is.na(LEVEL_2) ~ LEVEL_2,
!is.na(LEVEL_3A) ~ LEVEL_3A,
!is.na(LEVEL_3B) ~ LEVEL_3B,
)) %>%
dplyr::select(ID = Tumor_Sample_Barcode, Hugo_Symbol, HGVSc, HGVSp_Short, Transcript_ID,
Variant_Classification, ONCOGENIC, HIGHEST_SENSITIVE_LEVEL, actionable,actionable_drug,
Tumour_depth, Normal_depth, Tumour_AF, Normal_AF,Tumour_alt, Chromosome, Start_Position, End_Position) %>%
write_csv(here("output/Mutation_table.csv"))
dir.create(here("source_data"), showWarnings = FALSE)
ichorVsVAFdata %>%
write_csv(here("source_data/CUP_ichor_vs_VAF.csv"))
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.30 ggpubr_0.6.0 janitor_2.2.0 maftools_2.16.0
## [5] forcats_1.0.0 readr_2.1.4 dplyr_1.1.4 stringr_1.5.1
## [9] tibble_3.2.1 ggplot2_3.4.4 purrr_1.0.2 tidyr_1.3.1
## [13] here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.4 xfun_0.40 bslib_0.5.1 htmlwidgets_1.6.2
## [5] rstatix_0.7.2 lattice_0.21-8 tzdb_0.4.0 crosstalk_1.2.0
## [9] vctrs_0.6.5 tools_4.3.1 generics_0.1.3 parallel_4.3.1
## [13] fansi_1.0.6 highr_0.10 pkgconfig_2.0.3 Matrix_1.6-1
## [17] data.table_1.14.8 RColorBrewer_1.1-3 lifecycle_1.0.4 farver_2.1.1
## [21] compiler_4.3.1 textshaping_0.3.6 munsell_0.5.0 carData_3.0-5
## [25] snakecase_0.11.1 htmltools_0.5.6 sass_0.4.7 yaml_2.3.7
## [29] pillar_1.9.0 car_3.1-2 crayon_1.5.2 jquerylib_0.1.4
## [33] ellipsis_0.3.2 cachem_1.0.8 abind_1.4-5 nlme_3.1-163
## [37] tidyselect_1.2.0 digest_0.6.33 stringi_1.8.3 labeling_0.4.3
## [41] splines_4.3.1 rprojroot_2.0.3 fastmap_1.1.1 grid_4.3.1
## [45] colorspace_2.1-0 cli_3.6.2 magrittr_2.0.3 survival_3.5-7
## [49] utf8_1.2.4 broom_1.0.5 withr_3.0.0 scales_1.2.1
## [53] backports_1.4.1 bit64_4.0.5 lubridate_1.9.2 timechange_0.2.0
## [57] rmarkdown_2.24 bit_4.0.5 ggsignif_0.6.4 ragg_1.2.5
## [61] hms_1.1.3 DNAcopy_1.74.1 evaluate_0.21 knitr_1.43
## [65] mgcv_1.9-0 rlang_1.1.3 glue_1.7.0 rstudioapi_0.15.0
## [69] vroom_1.6.3 jsonlite_1.8.7 R6_2.5.1 systemfonts_1.0.4