knitr::opts_knit$set(root.dir = '..', fig.width = 12, fig.height = 8, warning = FALSE, message = FALSE) 

knitr::opts_chunk$set(message = FALSE, warning = FALSE)

Introduction

This document looks at what mutations are found within the CUP cohort. The data for these has been put through a GRCh38 snakemake-based pipeline (not included in this repo), and high confidence mutations are determined as those which are called by both CLC and Mutect2. This file must be ran after the CUPiD_cfDNA_application.Rmd file, as this generates the output/CUPsummary.csv file that describes the predictions made on the CUP samples.

Read in the CUPiD predictions and some clinically resolved diagnosis:

if(!file.exists(here("output/CUPsummary.csv"))){
  stop("Please run CUPiD_cfDNA_application.Rmd first, to generate the CUPiD predictions.")
}

CUPIDpredict <- read_csv(here("output/CUPsummary.csv"))

CUPclin <- read_csv(here("figure_inputs/CUPClin.csv")) %>%
    left_join(CUPIDpredict %>% dplyr::select(sample_name, CUPiD_Prediction = call)) %>%
    dplyr::rename(Tumor_Sample_Barcode = sample_name,
                  Primary_Diagnosis = diagnosed_class) %>%
  mutate(Primary_Diagnosis = ifelse(Primary_Diagnosis == "DLBC","Lymphoma", Primary_Diagnosis))

Read a csv file with the mutations found for each sample:

combinedMAFdata <- read_csv(here("figure_inputs/CUP_highconfidence_mutations.csv")) %>%
  filter(Variant_Classification %in% c("Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", 
                                       "Translation_Start_Site","Nonsense_Mutation", "Nonstop_Mutation",
                                       "In_Frame_Del","In_Frame_Ins", "Missense_Mutation")) %>%
  mutate(gnomAD_AF = replace_na(gnomAD_AF,0)) %>%
  filter(gnomAD_AF <= 0.01) %>%
  janitor::remove_empty(which = "cols") %>%
  dplyr::select(-matches("gnomAD_[A-Z]*_AF|^[A-Z]*_AF$|^AF$", ignore.case = FALSE)) %>%
  mutate(actionable = ifelse(HIGHEST_SENSITIVE_LEVEL %in% c("LEVEL_1","LEVEL_2","LEVEL_3A","LEVEL_3B"),"Actionable","Not")) %>%
  dplyr::select(Hugo_Symbol, Tumor_Sample_Barcode, everything()) %>%
  janitor::remove_constant()
  
combinedMAFdata %>%
  janitor::remove_empty("cols", quiet = FALSE) %>%
  dplyr::select(sample_name = Tumor_Sample_Barcode, everything()) %>%
  write_csv(here("output/CUP_final_mutations.csv"))

Maximum VAF in any sample:

combinedMAFdata %>% 
  group_by(Tumor_Sample_Barcode) %>% 
  summarise(medianAF = median(Tumour_AF)) %>% 
  left_join(CUPclin,.) %>% 
  mutate(medianAF = replace_na(medianAF, 0)) %>% 
  filter(Tumor_Sample_Barcode != "C006169") %>% 
  pull(medianAF) %>% 
  max()
## [1] 0.6129952

Number of patients with mutations:

combinedMAFdata %>%
  dplyr::distinct(Tumor_Sample_Barcode) %>%
  nrow()
## [1] 33

Number of patients with actionable mutations (Level 3 and above):

combinedMAFdata %>%
  filter(actionable == "Actionable") %>%
  dplyr::distinct(Tumor_Sample_Barcode) %>%
  nrow()
## [1] 7

Number of actionable mutations:

combinedMAFdata %>%
  filter(actionable == "Actionable") %>%
  nrow()
## [1] 9

Table of actionable mutations:

combinedMAFdata %>% 
  filter(actionable == "Actionable") %>%
  dplyr::select(Tumor_Sample_Barcode, Hugo_Symbol, HGVSc, HIGHEST_SENSITIVE_LEVEL, Tumour_AF, Normal_AF, actionable) %>%
  arrange(HIGHEST_SENSITIVE_LEVEL) %>%
  DT::datatable()

Number of oncogenic mutations:

combinedMAFdata %>%
    filter(str_detect(ONCOGENIC,"Onco")) %>%
    nrow()
## [1] 60

Plot an Oncoplot

Now we plot an Oncoplot for the mutations found, restricted to those mutations marked as (potentially) oncogenic by OncoKB. C006169 is removed due to potential genomic contamination by NGSCheckMate.

samplesWithMutations <- combinedMAFdata %>% 
  pull(Tumor_Sample_Barcode) %>% 
  unique()

###  maftools doesn't like samples with no mutations in the maf object. 
# So we can add a silent mutation to these samples, which will get ignored later
combMafDataWithExtraSilents <- combinedMAFdata %>% 
  filter(Variant_Classification == "Silent") %>% 
  dplyr::slice(1) %>%
  dplyr::select(-Tumor_Sample_Barcode) %>%
  left_join(CUPclin %>% 
              filter(Tumor_Sample_Barcode != "C006169") %>%
              filter(!(Tumor_Sample_Barcode %in% samplesWithMutations)) %>%
              dplyr::select(Tumor_Sample_Barcode) %>% 
              mutate(Variant_Classification = "Silent"), 
            multiple = "all") %>%
  bind_rows(combinedMAFdata,.)

# don't know why complaining with a warning here, but works fine. 
cup_maf <- read.maf(combMafDataWithExtraSilents, 
                    verbose = T, 
                    clinicalData = CUPclin %>% filter(Tumor_Sample_Barcode != "C006169"), 
                    vc_nonSyn = c("Frame_Shift_Del", "Frame_Shift_Ins",
                                  "Splice_Site","Translation_Start_Site","Nonsense_Mutation",
                                  "Nonstop_Mutation", "In_Frame_Del","In_Frame_Ins", "Missense_Mutation"))
## -Validating
## -Summarizing
## --Possible FLAGS among top ten genes:
##   TTN
##   MUC16
##   SYNE1
## -Processing clinical data
## -Finished in 0.035s elapsed (0.031s cpu)

Plot Oncoplot

## quartz_off_screen 
##                 2

Plot average VAF vs ichorCNA Tumour Fraction

avgVAFdata <- combinedMAFdata %>%
  dplyr::rename(sample_name = Tumor_Sample_Barcode) %>%
  group_by(sample_name) %>%
  summarise(avgVAF = median(as.numeric(Tumour_AF), na.rm = TRUE),
            avgVAF = replace_na(avgVAF,0))

ichorVsVAFdata <- read_csv(here("figure_inputs/cfDNA_qseaSummary.csv")) %>%
  filter(correct_class == "CUP", sample_name != "C006169") %>%
  distinct(sample_name, ichorTumourFraction) %>%
  left_join(avgVAFdata) %>%
  mutate(avgVAF = replace_na(avgVAF, 0 ),
         ichorTumourFraction = 100*ichorTumourFraction,
         avgVAF = avgVAF*100
         ) 

ichorVsVAFdata %>%
  ggpubr::ggscatter(x = "ichorTumourFraction",
                    y = "avgVAF",
                    xlab = "ichorCNA estimated tumour fraction (%)",
                    ylab = "Median Varient Allele Frequency (%)",
                    cor.coef = TRUE,
                    cor.coef.coord = 100*c(0.3, 0.05),
                    ggtheme = theme_bw(),
                    add = "reg.line",
                    add.params = list(color = "grey30", linetype = "dashed"),
                    conf.int = FALSE,
                    cor.coeff.args = list(p.accuracy = 1e-5))

ggsave(here("output/CUP_VAF_vs_ichor.pdf"), width = 4, height = 4)

Write supplementary data table for mutations, and a source data file

combinedMAFdata %>% 
  mutate(actionable_drug = case_when(!is.na(LEVEL_1) ~ LEVEL_1,
                          !is.na(LEVEL_2) ~ LEVEL_2,
                          !is.na(LEVEL_3A) ~ LEVEL_3A,
                          !is.na(LEVEL_3B) ~ LEVEL_3B,
                          )) %>%
  dplyr::select(ID = Tumor_Sample_Barcode, Hugo_Symbol, HGVSc, HGVSp_Short, Transcript_ID, 
                Variant_Classification, ONCOGENIC, HIGHEST_SENSITIVE_LEVEL, actionable,actionable_drug,
                Tumour_depth, Normal_depth, Tumour_AF, Normal_AF,Tumour_alt, Chromosome, Start_Position, End_Position) %>%
  write_csv(here("output/Mutation_table.csv"))

dir.create(here("source_data"), showWarnings = FALSE)
ichorVsVAFdata %>%
  write_csv(here("source_data/CUP_ichor_vs_VAF.csv"))
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DT_0.30         ggpubr_0.6.0    janitor_2.2.0   maftools_2.16.0
##  [5] forcats_1.0.0   readr_2.1.4     dplyr_1.1.4     stringr_1.5.1  
##  [9] tibble_3.2.1    ggplot2_3.4.4   purrr_1.0.2     tidyr_1.3.1    
## [13] here_1.0.1     
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.4       xfun_0.40          bslib_0.5.1        htmlwidgets_1.6.2 
##  [5] rstatix_0.7.2      lattice_0.21-8     tzdb_0.4.0         crosstalk_1.2.0   
##  [9] vctrs_0.6.5        tools_4.3.1        generics_0.1.3     parallel_4.3.1    
## [13] fansi_1.0.6        highr_0.10         pkgconfig_2.0.3    Matrix_1.6-1      
## [17] data.table_1.14.8  RColorBrewer_1.1-3 lifecycle_1.0.4    farver_2.1.1      
## [21] compiler_4.3.1     textshaping_0.3.6  munsell_0.5.0      carData_3.0-5     
## [25] snakecase_0.11.1   htmltools_0.5.6    sass_0.4.7         yaml_2.3.7        
## [29] pillar_1.9.0       car_3.1-2          crayon_1.5.2       jquerylib_0.1.4   
## [33] ellipsis_0.3.2     cachem_1.0.8       abind_1.4-5        nlme_3.1-163      
## [37] tidyselect_1.2.0   digest_0.6.33      stringi_1.8.3      labeling_0.4.3    
## [41] splines_4.3.1      rprojroot_2.0.3    fastmap_1.1.1      grid_4.3.1        
## [45] colorspace_2.1-0   cli_3.6.2          magrittr_2.0.3     survival_3.5-7    
## [49] utf8_1.2.4         broom_1.0.5        withr_3.0.0        scales_1.2.1      
## [53] backports_1.4.1    bit64_4.0.5        lubridate_1.9.2    timechange_0.2.0  
## [57] rmarkdown_2.24     bit_4.0.5          ggsignif_0.6.4     ragg_1.2.5        
## [61] hms_1.1.3          DNAcopy_1.74.1     evaluate_0.21      knitr_1.43        
## [65] mgcv_1.9-0         rlang_1.1.3        glue_1.7.0         rstudioapi_0.15.0 
## [69] vroom_1.6.3        jsonlite_1.8.7     R6_2.5.1           systemfonts_1.0.4