install.packages("devtools")
devtools::install_github("AxelitoMartin/gnomeR")
gnomeR requires the following packages - ComplexHeatmap
, iClusterPlus
, cluster
(installed with gnomeR).
gnomeR is a R package that aims to process and analyze genetic data from cBioPortal. We include in this package the mutation, copy number alteration (CNA), fusion and clin.patientsical information of all publicly available data from cBioPortal.
as.tbl(mut) %>% select(Tumor_Sample_Barcode,Hugo_Symbol,Variant_Classification,Variant_Type,Reference_Allele,Tumor_Seq_Allele2)
#> # A tibble: 3,179 x 6
#> Tumor_Sample_Ba… Hugo_Symbol Variant_Classif… Variant_Type Reference_Allele
#> <fct> <fct> <fct> <fct> <fct>
#> 1 P-0000062-T01-I… TP53 Missense_Mutati… SNP G
#> 2 P-0000062-T01-I… EZH2 Missense_Mutati… SNP G
#> 3 P-0000062-T01-I… MDM2 Missense_Mutati… SNP A
#> 4 P-0000062-T01-I… IGF1R Missense_Mutati… SNP T
#> 5 P-0000062-T01-I… KEAP1 Missense_Mutati… SNP G
#> 6 P-0000062-T01-I… KDM5C Missense_Mutati… SNP G
#> 7 P-0000065-T01-I… KRAS Missense_Mutati… SNP C
#> 8 P-0000065-T01-I… TERT 5'Flank SNP G
#> 9 P-0000065-T01-I… MAP2K1 Missense_Mutati… SNP T
#> 10 P-0000065-T01-I… NCOR1 Nonsense_Mutati… SNP C
#> # … with 3,169 more rows, and 1 more variable: Tumor_Seq_Allele2 <fct>
as.tbl(cna[1:5,1:5])
#> # A tibble: 5 x 5
#> Hugo_Symbol P.0001890.T01.I… P.0000569.T01.I… P.0000795.T01.I…
#> <fct> <int> <int> <int>
#> 1 ABL1 0 0 0
#> 2 ACVR1 0 0 0
#> 3 AKT1 0 0 0
#> 4 AKT2 0 0 0
#> 5 AKT3 0 0 0
#> # … with 1 more variable: P.0002060.T01.IM3 <int>
as.tbl(fusion) %>% select(Tumor_Sample_Barcode,Hugo_Symbol,Fusion)
#> # A tibble: 127 x 3
#> Tumor_Sample_Barcode Hugo_Symbol Fusion
#> <fct> <fct> <fct>
#> 1 P-0010011-T01-IM5 PAX8 PAX8-intragenic
#> 2 P-0010977-T01-IM5 TFE3 ASPSCR1-TFE3 fusion
#> 3 P-0010977-T01-IM5 ASPSCR1 ASPSCR1-TFE3 fusion
#> 4 P-0010398-T01-IM5 BRAF OSBPL9-BRAF fusion
#> 5 P-0010398-T01-IM5 OSBPL9 OSBPL9-BRAF fusion
#> 6 P-0010177-T01-IM5 ALK EML4-ALK fusion
#> 7 P-0010177-T01-IM5 EML4 EML4-ALK fusion
#> 8 P-0010604-T01-IM5 MLL3 MLL3-intragenic
#> 9 P-0010794-T01-IM5 ERG TMPRSS2-ERG fusion
#> 10 P-0010794-T01-IM5 TMPRSS2 TMPRSS2-ERG fusion
#> # … with 117 more rows
as.tbl(head(clin.patients))
#> # A tibble: 6 x 6
#> X.Patient.Ident… Sex Patient.s.Vital… Smoking.History Overall.Surviva…
#> <fct> <fct> <fct> <fct> <fct>
#> 1 P-0000062 Male DECEASED Never 23.44
#> 2 P-0000065 Fema… DECEASED Prev/Curr Smok… 12.69
#> 3 P-0000080 Fema… DECEASED Never <NA>
#> 4 P-0000088 Male DECEASED Never 26.13
#> 5 P-0000096 Male DECEASED Prev/Curr Smok… 17.62
#> 6 P-0000113 Fema… ALIVE Prev/Curr Smok… 31.85
#> # … with 1 more variable: Overall.Survival.Status <fct>
as.tbl(head(clin.sample))
#> # A tibble: 6 x 16
#> X.Patient.Ident… Sample.Identifi… Sample.Collecti… Specimen.Preser…
#> <fct> <fct> <fct> <fct>
#> 1 P-0002656 P-0002656-T01-I… Outside FFPE
#> 2 P-0001680 P-0001680-T02-I… Outside FFPE
#> 3 P-0001741 P-0001741-T01-I… Outside FFPE
#> 4 P-0002640 P-0002640-T01-I… Outside FFPE
#> 5 P-0002091 P-0002091-T01-I… In-House FFPE
#> 6 P-0001197 P-0001197-T01-I… Outside FFPE
#> # … with 12 more variables: Specimen.Type <fct>, DNA.Input <fct>,
#> # Sample.coverage <fct>, Tumor.Purity <fct>, Matched.Status <fct>,
#> # Sample.Type <fct>, Primary.Tumor.Site <fct>, Metastatic.Site <fct>,
#> # Sample.Class <fct>, Oncotree.Code <fct>, Cancer.Type <fct>,
#> # Cancer.Type.Detailed <fct>
MAF files are the standard file format for mutation information. Each line represents a single mutation mapped to a sample, a particular gene and specific effect. All these fields are required in order to properly process the file. The IMPACT platform sequences a set of targeted oncogenic genes that are on cBioportal.
We can create a binary matrix of genetic events from the files described above. If a patient has a mutation in gene X the entry will be marked with a 1, otherwise it will be a 0. This function has the following arguments:
This function will return a binary matrix of genetics events with patients as rows and columns as genes. Along with a list of patients that weren’t found to have any events (if any).
patients <- as.character(unique(mut$Tumor_Sample_Barcode))[1:200]
bin.mut <- binmat(patients = patients,maf = mut,mut.type = "SOMATIC",SNP.only = FALSE,include.silent = FALSE, specify.plat = TRUE)
as.tbl(bin.mut)
#> # A tibble: 200 x 275
#> TP53 EZH2 MDM2 IGF1R KEAP1 KDM5C KRAS TERT MAP2K1 NCOR1 DDR2 IDH1
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 1 1 1 1 0 0 0 0 0 0
#> 2 0 0 0 0 0 0 1 1 1 1 0 0
#> 3 1 0 0 0 0 0 0 0 0 0 1 1
#> 4 0 0 0 0 0 0 0 0 0 0 0 0
#> 5 1 0 0 0 0 0 0 0 0 0 0 0
#> 6 0 0 0 0 0 1 1 0 0 1 0 0
#> 7 1 0 0 0 0 0 1 0 0 0 0 0
#> 8 1 0 0 1 0 0 0 1 0 0 0 0
#> 9 0 0 0 0 0 0 0 0 0 0 0 0
#> 10 1 0 0 0 0 0 0 0 0 0 0 0
#> # … with 190 more rows, and 263 more variables: FIP1L1 <dbl>, EPHA5 <dbl>,
#> # RASA1 <dbl>, RAD50 <dbl>, ESR1 <dbl>, HGF <dbl>, MLL3 <dbl>, RET <dbl>,
#> # FLT1 <dbl>, BRCA2 <dbl>, RB1 <dbl>, PTPRT <dbl>, AR <dbl>, FOXL2 <dbl>,
#> # FLT4 <dbl>, PIK3CA <dbl>, EP300 <dbl>, FBXW7 <dbl>, MPL <dbl>, RIT1 <dbl>,
#> # NTRK1 <dbl>, FAT1 <dbl>, RICTOR <dbl>, TNFAIP3 <dbl>, ARID1B <dbl>,
#> # GNAQ <dbl>, SYK <dbl>, TSC1 <dbl>, NOTCH1 <dbl>, PAK1 <dbl>, MLL2 <dbl>,
#> # POLE <dbl>, TRAF7 <dbl>, NF1 <dbl>, GNA11 <dbl>, INSR <dbl>, AXL <dbl>,
#> # ERG <dbl>, ARID1A <dbl>, MLH1 <dbl>, EGFR <dbl>, TBX3 <dbl>, MDC1 <dbl>,
#> # RECQL4 <dbl>, SPEN <dbl>, NTRK2 <dbl>, STAG2 <dbl>, CREBBP <dbl>,
#> # AKT1 <dbl>, CTNNB1 <dbl>, PDPK1 <dbl>, CTCF <dbl>, CDH1 <dbl>,
#> # PPP2R1A <dbl>, CCND1 <dbl>, RUNX1 <dbl>, CARD11 <dbl>, ARID5B <dbl>,
#> # ERBB2 <dbl>, BCOR <dbl>, BRAF <dbl>, ATM <dbl>, MTOR <dbl>, STK40 <dbl>,
#> # SPOP <dbl>, RAF1 <dbl>, MAP3K13 <dbl>, BRCA1 <dbl>, CIC <dbl>,
#> # RPS6KA4 <dbl>, AKT3 <dbl>, NTRK3 <dbl>, ATRX <dbl>, PBRM1 <dbl>,
#> # ASXL2 <dbl>, SF3B1 <dbl>, PIK3CB <dbl>, TET2 <dbl>, CDKN2B <dbl>,
#> # MRE11A <dbl>, CBL <dbl>, TSHR <dbl>, BLM <dbl>, CBFB <dbl>, DOT1L <dbl>,
#> # RBM10 <dbl>, SMAD3 <dbl>, CDK8 <dbl>, GATA2 <dbl>, STK11 <dbl>,
#> # SMARCA4 <dbl>, PIK3C2G <dbl>, HIST1H3B <dbl>, MYOD1 <dbl>, APC <dbl>,
#> # EPHA3 <dbl>, ETV6 <dbl>, PTCH1 <dbl>, TET1 <dbl>, BMPR1A <dbl>, …
Similarly including fusions and CNAs:
bin.mut <- binmat(patients = patients,maf = mut,mut.type = "SOMATIC",SNP.only = FALSE,include.silent = FALSE, fusion = fusion, cna = cna, specify.plat = TRUE)
as.tbl(bin.mut)
#> # A tibble: 200 x 452
#> TP53 EZH2 MDM2 IGF1R KEAP1 KDM5C KRAS TERT MAP2K1 NCOR1 DDR2 IDH1
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 1 1 1 1 0 0 0 0 0 0
#> 2 0 0 0 0 0 0 1 1 1 1 0 0
#> 3 1 0 0 0 0 0 0 0 0 0 1 1
#> 4 0 0 0 0 0 0 0 0 0 0 0 0
#> 5 1 0 0 0 0 0 0 0 0 0 0 0
#> 6 0 0 0 0 0 1 1 0 0 1 0 0
#> 7 1 0 0 0 0 0 1 0 0 0 0 0
#> 8 1 0 0 1 0 0 0 1 0 0 0 0
#> 9 0 0 0 0 0 0 0 0 0 0 0 0
#> 10 1 0 0 0 0 0 0 0 0 0 0 0
#> # … with 190 more rows, and 440 more variables: FIP1L1 <dbl>, EPHA5 <dbl>,
#> # RASA1 <dbl>, RAD50 <dbl>, ESR1 <dbl>, HGF <dbl>, MLL3 <dbl>, RET <dbl>,
#> # FLT1 <dbl>, BRCA2 <dbl>, RB1 <dbl>, PTPRT <dbl>, AR <dbl>, FOXL2 <dbl>,
#> # FLT4 <dbl>, PIK3CA <dbl>, EP300 <dbl>, FBXW7 <dbl>, MPL <dbl>, RIT1 <dbl>,
#> # NTRK1 <dbl>, FAT1 <dbl>, RICTOR <dbl>, TNFAIP3 <dbl>, ARID1B <dbl>,
#> # GNAQ <dbl>, SYK <dbl>, TSC1 <dbl>, NOTCH1 <dbl>, PAK1 <dbl>, MLL2 <dbl>,
#> # POLE <dbl>, TRAF7 <dbl>, NF1 <dbl>, GNA11 <dbl>, INSR <dbl>, AXL <dbl>,
#> # ERG <dbl>, ARID1A <dbl>, MLH1 <dbl>, EGFR <dbl>, TBX3 <dbl>, MDC1 <dbl>,
#> # RECQL4 <dbl>, SPEN <dbl>, NTRK2 <dbl>, STAG2 <dbl>, CREBBP <dbl>,
#> # AKT1 <dbl>, CTNNB1 <dbl>, PDPK1 <dbl>, CTCF <dbl>, CDH1 <dbl>,
#> # PPP2R1A <dbl>, CCND1 <dbl>, RUNX1 <dbl>, CARD11 <dbl>, ARID5B <dbl>,
#> # ERBB2 <dbl>, BCOR <dbl>, BRAF <dbl>, ATM <dbl>, MTOR <dbl>, STK40 <dbl>,
#> # SPOP <dbl>, RAF1 <dbl>, MAP3K13 <dbl>, BRCA1 <dbl>, CIC <dbl>,
#> # RPS6KA4 <dbl>, AKT3 <dbl>, NTRK3 <dbl>, ATRX <dbl>, PBRM1 <dbl>,
#> # ASXL2 <dbl>, SF3B1 <dbl>, PIK3CB <dbl>, TET2 <dbl>, CDKN2B <dbl>,
#> # MRE11A <dbl>, CBL <dbl>, TSHR <dbl>, BLM <dbl>, CBFB <dbl>, DOT1L <dbl>,
#> # RBM10 <dbl>, SMAD3 <dbl>, CDK8 <dbl>, GATA2 <dbl>, STK11 <dbl>,
#> # SMARCA4 <dbl>, PIK3C2G <dbl>, HIST1H3B <dbl>, MYOD1 <dbl>, APC <dbl>,
#> # EPHA3 <dbl>, ETV6 <dbl>, PTCH1 <dbl>, TET1 <dbl>, BMPR1A <dbl>, …
The gen.summary
function allows us to test for potential differences in genetic event frequencies using Fisher’s exact test for unpaired data and the McNemar exact test for paired data. This function takes the following arguments:
outcome <- as.character(clin.sample$Sample.Type[match(patients,clin.sample$Sample.Identifier)])
gen.dat <- bin.mut
bin.test <- gen.summary(gen.dat = gen.dat,
outcome = outcome,
filter = 0.05,paired = F,cont = F,rank = T)
kable(bin.test$fits,row.names = T)
Feature | Overall | Metastasis(N=94) | Primary(N=106) | OddsRatio | Pvalue | FDR | Lower | Upper | |
---|---|---|---|---|---|---|---|---|---|
CREBBP | CREBBP | 5% | 8.51% | 1.89% | 0.21 | 4.82e-02 | 9.73e-01 | 0.02 | 1.08 |
FAT1 | FAT1 | 8% | 11.7% | 4.72% | 0.38 | 1.15e-01 | 9.73e-01 | 0.1 | 1.23 |
EGFR | EGFR | 8.5% | 11.7% | 5.66% | 0.45 | 1.37e-01 | 9.73e-01 | 0.13 | 1.41 |
BRAF | BRAF | 5% | 7.45% | 2.83% | 0.36 | 1.95e-01 | 9.73e-01 | 0.06 | 1.65 |
ARID1A | ARID1A | 7% | 9.57% | 4.72% | 0.47 | 2.67e-01 | 9.73e-01 | 0.12 | 1.63 |
ATRX | ATRX | 7.5% | 5.32% | 9.43% | 1.85 | 2.97e-01 | 9.73e-01 | 0.55 | 7.17 |
PTEN | PTEN | 5% | 3.19% | 6.6% | 2.14 | 3.40e-01 | 9.73e-01 | 0.47 | 13.19 |
CDKN2A | CDKN2A | 5.5% | 7.45% | 3.77% | 0.49 | 3.54e-01 | 9.73e-01 | 0.1 | 2 |
SMARCA4 | SMARCA4 | 5% | 6.38% | 3.77% | 0.58 | 5.21e-01 | 1.00e+00 | 0.12 | 2.52 |
MLL2 | MLL2 | 7% | 8.51% | 5.66% | 0.65 | 5.80e-01 | 1.00e+00 | 0.18 | 2.22 |
TP53 | TP53 | 47% | 48.94% | 45.28% | 0.86 | 6.71e-01 | 1.00e+00 | 0.48 | 1.56 |
TERT | TERT | 13.5% | 14.89% | 12.26% | 0.8 | 6.80e-01 | 1.00e+00 | 0.32 | 1.96 |
ATM | ATM | 5% | 4.26% | 5.66% | 1.35 | 7.52e-01 | 1.00e+00 | 0.31 | 6.71 |
MLL3 | MLL3 | 6.5% | 7.45% | 5.66% | 0.75 | 7.75e-01 | 1.00e+00 | 0.2 | 2.71 |
APC | APC | 8.5% | 7.45% | 9.43% | 1.29 | 8.00e-01 | 1.00e+00 | 0.42 | 4.19 |
KRAS | KRAS | 13.5% | 12.77% | 14.15% | 1.13 | 8.38e-01 | 1.00e+00 | 0.46 | 2.8 |
KEAP1 | KEAP1 | 6% | 6.38% | 5.66% | 0.88 | 1.00e+00 | 1.00e+00 | 0.23 | 3.42 |
RB1 | RB1 | 7% | 7.45% | 6.6% | 0.88 | 1.00e+00 | 1.00e+00 | 0.25 | 3.07 |
PIK3CA | PIK3CA | 11.5% | 11.7% | 11.32% | 0.96 | 1.00e+00 | 1.00e+00 | 0.37 | 2.55 |
NOTCH1 | NOTCH1 | 6.5% | 6.38% | 6.6% | 1.04 | 1.00e+00 | 1.00e+00 | 0.29 | 3.89 |
NF1 | NF1 | 7% | 7.45% | 6.6% | 0.88 | 1.00e+00 | 1.00e+00 | 0.25 | 3.07 |
STK11 | STK11 | 6% | 6.38% | 5.66% | 0.88 | 1.00e+00 | 1.00e+00 | 0.23 | 3.42 |
bin.test$forest.plot
bin.test$vPlot
Similarly we show here an example with a simulated continuous outcome:
set.seed(1)
outcome <- rnorm(n = nrow(gen.dat))
tab.out <- gen.summary(gen.dat = gen.dat,
outcome = outcome,
filter = 0.05,paired = F,cont = T,rank = T)
kable(tab.out$fits,row.names = T)
Estimate | SD | Pvalue | MutationFreq | FDR | |
---|---|---|---|---|---|
CREBBP | 0.71 | 0.30 | 0.0187 | 0.05 | 4.11e-01 |
RB1 | -0.50 | 0.26 | 0.0498 | 0.07 | 5.24e-01 |
BRAF | 0.54 | 0.30 | 0.0714 | 0.05 | 5.24e-01 |
STK11 | -0.39 | 0.28 | 0.1560 | 0.06 | 7.00e-01 |
KEAP1 | -0.38 | 0.28 | 0.1710 | 0.06 | 7.00e-01 |
TERT | 0.25 | 0.19 | 0.1910 | 0.14 | 7.00e-01 |
FAT1 | 0.21 | 0.24 | 0.3930 | 0.08 | 9.24e-01 |
SMARCA4 | -0.24 | 0.30 | 0.4290 | 0.05 | 9.24e-01 |
APC | 0.16 | 0.24 | 0.5100 | 0.09 | 9.24e-01 |
NF1 | 0.16 | 0.26 | 0.5340 | 0.07 | 9.24e-01 |
EGFR | -0.14 | 0.24 | 0.5410 | 0.09 | 9.24e-01 |
ATM | -0.18 | 0.30 | 0.5560 | 0.05 | 9.24e-01 |
MLL3 | -0.15 | 0.27 | 0.5680 | 0.06 | 9.24e-01 |
ATRX | -0.11 | 0.25 | 0.6550 | 0.07 | 9.24e-01 |
PTEN | -0.13 | 0.30 | 0.6690 | 0.05 | 9.24e-01 |
CDKN2A | 0.12 | 0.29 | 0.6720 | 0.06 | 9.24e-01 |
KRAS | -0.07 | 0.19 | 0.7270 | 0.14 | 9.41e-01 |
PIK3CA | -0.05 | 0.21 | 0.7960 | 0.12 | 9.73e-01 |
TP53 | -0.02 | 0.13 | 0.9070 | 0.47 | 9.82e-01 |
ARID1A | -0.02 | 0.26 | 0.9500 | 0.07 | 9.82e-01 |
NOTCH1 | 0.01 | 0.27 | 0.9600 | 0.06 | 9.82e-01 |
MLL2 | -0.01 | 0.26 | 0.9820 | 0.07 | 9.82e-01 |
tab.out$vPlot
We further include uni.cox
for univariate survival analysis if time to event data is available. This function takes as inputs:
Surv(time,status)~.
. Note that delayed entry is allowed of the form Surv(time1,time2,status)~.
surv.dat <- clin.patients %>%
filter(X.Patient.Identifier %in% abbreviate(patients,strict = T, minlength = 9)) %>%
select(X.Patient.Identifier,Overall.Survival..Months., Overall.Survival.Status) %>%
rename(DMPID = X.Patient.Identifier, time = Overall.Survival..Months.,status = Overall.Survival.Status) %>%
mutate(time = as.numeric(as.character(time)),
status = ifelse(status == "LIVING",0,1)) %>%
filter(!is.na(time))
X <- bin.mut[match(surv.dat$DMPID,abbreviate(rownames(bin.mut),strict = T, minlength = 9)),]
surv.test <- uni.cox(X = X, surv.dat = surv.dat,surv.formula = Surv(time,status)~.,filter = 0.05)
kable(surv.test$tab,row.names = T)
Feature | Coefficient | HR | Pvalue | FDR | MutationFrequency | |
---|---|---|---|---|---|---|
EGFR | EGFR | 0.84 | 2.32 | 0.00821 | 0.214 | 0.10 |
MLL | MLL | 0.94 | 2.57 | 0.01960 | 0.227 | 0.06 |
RB1 | RB1 | 0.72 | 2.06 | 0.03480 | 0.227 | 0.09 |
CREBBP | CREBBP | 0.90 | 2.46 | 0.03490 | 0.227 | 0.06 |
MLL2 | MLL2 | 0.56 | 1.76 | 0.08590 | 0.341 | 0.10 |
TP53 | TP53 | 0.40 | 1.49 | 0.08780 | 0.341 | 0.44 |
ATRX | ATRX | -1.21 | 0.30 | 0.09180 | 0.341 | 0.07 |
NF1 | NF1 | 0.62 | 1.87 | 0.14800 | 0.455 | 0.06 |
KRAS | KRAS | 0.48 | 1.62 | 0.15800 | 0.455 | 0.13 |
CDH1 | CDH1 | -0.97 | 0.38 | 0.17500 | 0.456 | 0.06 |
APC | APC | -0.65 | 0.52 | 0.27000 | 0.617 | 0.07 |
KEAP1 | KEAP1 | 0.42 | 1.52 | 0.29100 | 0.617 | 0.08 |
PTEN | PTEN | -0.60 | 0.55 | 0.30900 | 0.617 | 0.06 |
TERT | TERT | 0.30 | 1.35 | 0.34800 | 0.646 | 0.14 |
EPHA5 | EPHA5 | -0.27 | 0.77 | 0.60300 | 0.996 | 0.06 |
PIK3CA | PIK3CA | 0.16 | 1.17 | 0.67100 | 0.996 | 0.11 |
NOTCH1 | NOTCH1 | -0.21 | 0.81 | 0.67900 | 0.996 | 0.07 |
SMARCA4 | SMARCA4 | -0.18 | 0.83 | 0.72100 | 0.996 | 0.06 |
ARID1A | ARID1A | 0.12 | 1.12 | 0.78600 | 0.996 | 0.08 |
POLE | POLE | -0.12 | 0.89 | 0.84100 | 0.996 | 0.05 |
FAT1 | FAT1 | -0.07 | 0.94 | 0.86700 | 0.996 | 0.11 |
MLL3 | MLL3 | -0.06 | 0.94 | 0.89400 | 0.996 | 0.06 |
STK11 | STK11 | 0.02 | 1.02 | 0.96300 | 0.996 | 0.09 |
PTPRT | PTPRT | -0.01 | 0.99 | 0.97800 | 0.996 | 0.05 |
ATM | ATM | 0.01 | 1.01 | 0.97800 | 0.996 | 0.07 |
BRCA2 | BRCA2 | -18.16 | 0.00 | 0.99600 | 0.996 | 0.05 |
surv.test$p
surv.test$KM[[1]]
surv.test$KM[[2]]
OncoPrints are a convenient way to study comutation patterns in our cohort through the plot_oncoPrint
function. It takes as argument:
binmat
functionWe show here an example with the most common genes.
keep_genes <- names(sort(apply(bin.mut[,-grep(".Del|.Amp|.fus",colnames(bin.mut))],2, sum),decreasing = T)[1:15])
gen.dat <- bin.mut %>%
select(starts_with(keep_genes))
plot_oncoPrint(gen.dat)
Similarly we include here an example adding patients’ clinical variables:
clin.patients.dat <- clin.patients[match(abbreviate(rownames(gen.dat),strict = TRUE, minlength = 9),clin.patients$X.Patient.Identifier),] %>%
rename(DMPID = X.Patient.Identifier, Smoker = Smoking.History) %>%
select(DMPID, Sex,Smoker) %>%
filter(!is.na(DMPID)) %>%
distinct(DMPID,.keep_all = TRUE)
gen.dat <- gen.dat[match(clin.patients.dat$DMPID,abbreviate(rownames(gen.dat),strict = TRUE, minlength = 9)),]
clin.patients.dat <- clin.patients.dat %>%
tibble::column_to_rownames('DMPID')
rownames(gen.dat) <- rownames(clin.patients.dat)
plot_oncoPrint(gen.dat = gen.dat,clin.dat = clin.patients.dat)
FACETs is a fully integrated stand-alone pipeline that includes sequencing BAM file post-processing, joint segmentation of total- and allele-specific read counts, and integer copy number calls corrected for tumor purity, ploidy and clonal heterogeneity, with comprehensive output and integrated visualization. We integrate the output of this tool to our package to visualise the copy number alteration events in our cohort. The segmentation file is now integrated to the cBioPortal and we include it in our package. The FACETs output can be visualised using the facets.heatmap
function which takes as input:
This function returns the a heatmap and the merged segmentation dataset used to created:
patients.seg <- as.character(unlist(clin.sample %>% filter(Sample.Identifier %in% patients, as.numeric(as.character(Tumor.Purity)) > 30) %>% select(Sample.Identifier)))
facet <- facets.heatmap(seg = seg, patients=patients.seg[0:100])
facet$p
as.tbl(facet$out.cn)
#> # A tibble: 100 x 644
#> `chr1.2488138-1… `chr1.11167550-… `chr1.14192672-… `chr1.15296019-…
#> <dbl> <dbl> <dbl> <dbl>
#> 1 0.0326 0.0326 0.0326 0.0326
#> 2 -0.0198 -0.0198 -0.0198 -0.0198
#> 3 0.0939 0.0939 0.0939 0.0939
#> 4 -0.155 0.0092 0.0092 0.0092
#> 5 -0.029 -0.029 -0.029 -0.029
#> 6 0.0431 0.0431 0.0431 0.0431
#> 7 0.113 0.113 0.113 0.113
#> 8 -0.408 -0.0501 -0.0501 -0.0501
#> 9 -0.0555 -0.0555 -0.0555 -0.0555
#> 10 -0.0391 -0.0391 -0.0391 -0.0391
#> # … with 90 more rows, and 640 more variables: `chr1.16199471-16235896` <dbl>,
#> # `chr1.16235896-17345415` <dbl>, `chr1.17345415-22587878` <dbl>,
#> # `chr1.22587878-30535114` <dbl>, `chr1.30535114-35013586` <dbl>,
#> # `chr1.35013586-35468429` <dbl>, `chr1.35468429-36411737` <dbl>,
#> # `chr1.36411737-45795044` <dbl>, `chr1.45795044-51434461` <dbl>,
#> # `chr1.51434461-59564373` <dbl>, `chr1.59564373-65300293` <dbl>,
#> # `chr1.65300293-65301840` <dbl>, `chr1.65301840-66042078` <dbl>,
#> # `chr1.66042078-70742063` <dbl>, `chr1.70742063-82939537` <dbl>,
#> # `chr1.82939537-96258925` <dbl>, `chr1.96258925-99589424` <dbl>,
#> # `chr1.99589424-108079945` <dbl>, `chr1.108079945-113234456` <dbl>,
#> # `chr1.113234456-117273534` <dbl>, `chr1.117273534-118145461` <dbl>,
#> # `chr1.118145461-118223984` <dbl>, `chr1.118223984-120199189` <dbl>,
#> # `chr1.120199189-120491113` <dbl>, `chr1.120491113-120572570` <dbl>,
#> # `chr1.120572570-120611984` <dbl>, `chr1.120611984-147282446` <dbl>,
#> # `chr1.147282446-152330945` <dbl>, `chr1.152330945-155870294` <dbl>,
#> # `chr1.155870294-156785626` <dbl>, `chr1.156785626-161284206` <dbl>,
#> # `chr1.161284206-162688895` <dbl>, `chr1.162688895-169022903` <dbl>,
#> # `chr1.169022903-175914298` <dbl>, `chr1.175914298-176175911` <dbl>,
#> # `chr1.176175911-180810233` <dbl>, `chr1.180810233-185520600` <dbl>,
#> # `chr1.185520600-193094295` <dbl>, `chr1.193094295-202188641` <dbl>,
#> # `chr1.202188641-204494686` <dbl>, `chr1.204494686-206646614` <dbl>,
#> # `chr1.206646614-206942027` <dbl>, `chr1.206942027-209632563` <dbl>,
#> # `chr1.209632563-219991341` <dbl>, `chr1.219991341-226064745` <dbl>,
#> # `chr1.226064745-226549202` <dbl>, `chr1.226549202-226589998` <dbl>,
#> # `chr1.226589998-234272887` <dbl>, `chr1.234272887-243663067` <dbl>,
#> # `chr1.243663067-245977996` <dbl>, `chr2.4717089-25100738` <dbl>,
#> # `chr2.25100738-28539877` <dbl>, `chr2.28539877-29416439` <dbl>,
#> # `chr2.29416439-34654129` <dbl>, `chr2.34654129-38467258` <dbl>,
#> # `chr2.38467258-47596683` <dbl>, `chr2.47596683-47600656` <dbl>,
#> # `chr2.47600656-50178151` <dbl>, `chr2.50178151-60170875` <dbl>,
#> # `chr2.60170875-61706028` <dbl>, `chr2.61706028-61711156` <dbl>,
#> # `chr2.61711156-72126420` <dbl>, `chr2.72126420-96919700` <dbl>,
#> # `chr2.96919700-99136565` <dbl>, `chr2.99136565-99149899` <dbl>,
#> # `chr2.99149899-99373720` <dbl>, `chr2.99373720-120048729` <dbl>,
#> # `chr2.120048729-125424612` <dbl>, `chr2.125424612-158739584` <dbl>,
#> # `chr2.158739584-173914775` <dbl>, `chr2.173914775-178129282` <dbl>,
#> # `chr2.178129282-185198425` <dbl>, `chr2.185198425-189541688` <dbl>,
#> # `chr2.189541688-193538820` <dbl>, `chr2.193538820-193562208` <dbl>,
#> # `chr2.193562208-198257106` <dbl>, `chr2.198257106-198299710` <dbl>,
#> # `chr2.198299710-202123030` <dbl>, `chr2.202123030-203723989` <dbl>,
#> # `chr2.203723989-204836598` <dbl>, `chr2.204836598-212248563` <dbl>,
#> # `chr2.212248563-216768839` <dbl>, `chr2.216768839-217736131` <dbl>,
#> # `chr2.217736131-242800953` <dbl>, `chr3.1449872-12955982` <dbl>,
#> # `chr3.12955982-24205251` <dbl>, `chr3.24205251-30733002` <dbl>,
#> # `chr3.30733002-33432307` <dbl>, `chr3.33432307-37035097` <dbl>,
#> # `chr3.37035097-37038155` <dbl>, `chr3.37038155-38180336` <dbl>,
#> # `chr3.38180336-41265566` <dbl>, `chr3.41265566-43177020` <dbl>,
#> # `chr3.43177020-47058664` <dbl>, `chr3.47058664-59789176` <dbl>,
#> # `chr3.59789176-69788801` <dbl>, `chr3.69788801-71015124` <dbl>,
#> # `chr3.71015124-71019922` <dbl>, `chr3.71019922-71064752` <dbl>,
#> # `chr3.71064752-71247137` <dbl>, …