1 H3K36me3 ChIP-seq data

Briefly, H3K36me3 ChIP-seq data were processed as follow : reads were cleaned and trimmed using fastq-mcf from the ea-utils suite v1.1.2 to remove adapters, low quality bases and reads, and discard reads shorter than 25 bp after filtering. Reads were then aligned to the human reference genome (hg19) with bowtie v1.1.1 using best matches parameters (bowtie -v 2 -m 1 –best –strata). Alignment files were further processed with samtools v1.2 and PicardTools v1.130 to flag PCR and optical duplicates and remove alignments located in Encode blacklisted regions. Fragment size was estimated in silico for each library using spp v1.10.1. Enriched regions were identified for each replicate independently with MACS v2.1.0 with non-IPed genomic DNA as a control (macs2 callpeak –nomodel –shiftsize –shift-control –gsize hs -p 1e-1). These relaxed peak lists were then processed through the irreproducible discovery rate (IDR) pipeline to generate an optimal and reproducible set of peaks for each histone modification type and each time point.

1.1 Preprocessing of the data

1.1.1 Import peaks

Using DiffBind, we first generate a consensus set of peaks, merging all the peaks defined with the IDR for 3DA, 4OHT and DMSO.

library("DiffBind")
library("DESeq2")
library("biomaRt")
library("GenomicFeatures")
library("org.Hs.eg.db")

#--> Read experimental design
setwd("~/Documents/Post-Doc/B-Projects/JESUS_K36ME3/")
path_to_samples <- "./data/ChIP-seq/Plan_Expe_ChIP_DiffBind_"
sample_sheet <-  read.csv(paste(path_to_samples, "K36ME3", ".csv", sep = ""), sep = ";")

#--> Create the global DBA object
expe <- dba(sampleSheet = sample_sheet, skipLines = 0, bRemoveM=TRUE)
expe_consensus <- dba.peakset(expe, consensus = -DBA_REPLICATE)
expe_ol <- dba.overlap(expe_consensus,  expe_consensus$masks$Consensus)

1.1.2 Explore commonalities and specificities

#--> Get the data
DMSO <- dba.peakset(expe, peaks = expe$masks$DMSO, consensus=-DBA_REPLICATE, bRetrieve = TRUE)
DA3 <- dba.peakset(expe, peaks = expe$masks$`3DA`, consensus=-DBA_REPLICATE, bRetrieve = TRUE)
OHT4 <- dba.peakset(expe, peaks = expe$masks$`4OHT`, consensus=-DBA_REPLICATE, bRetrieve = TRUE)

#--> Load libraries
library(ChIPpeakAnno)
library(eulerr)
library(colorRamps)

#--> Format data
ol <- findOverlapsOfPeaks(DMSO, DA3, OHT4)
venn_data <- ol$venn_cnt[,"Counts"][-1]
names(venn_data) <- c("OHT4", "DA3", "DA3&OHT4", "DMSO", "DMSO&OHT4", "DMSO&DA3", "DMSO&DA3&OHT4")
venn_p <- euler(venn_data)

#--> Number of common / specific peaks
print(ol$venn_cnt)

##      DMSO DA3 OHT4 Counts count.DMSO count.DA3 count.OHT4
## [1,]    0   0    0      0          0         0          0
## [2,]    0   0    1   5693          0         0       5693
## [3,]    0   1    0   2370          0      2370          0
## [4,]    0   1    1   4262          0      4591       4536
## [5,]    1   0    0   3024       3024         0          0
## [6,]    1   0    1   1535       1598         0       1610
## [7,]    1   1    0   1158       1207      1219          0
## [8,]    1   1    1  19531      26586     24779      23370
## attr(,"class")
## [1] "VennCounts"

#--> Draw Euler plot
plot(venn_p)

Figure 1: Comparison of H3K36me3 peaks between DMSO, 4OHT and 3DA treated cells

As depicted on the Euler above, while 19531 reproducible peaks are common to the 3 conditions, 2024 are specific to DMSO, 2370 to 3DA, and 5693 to 4OHT.

1.2 TSS centric analysis

Before starting this analysis, we need to retrieve the coordinates of TSS on a data-base such as UCSC or Ensembl. Here, we use the Ensembl database GRCh37 corresponding to the genome build hg19. We next consider the window +/- 10kb on each side of the TSS and split it in 1000 non-overlapping windows.

1.2.1 Preparing the data

#--> Load the biomeRt and the biovizBase packages
library(biomaRt)
library(biovizBase)

#--> Connexion to BioMart hg19 (aka GRCh37)
ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",
                   host="grch37.ensembl.org",
                   path="/biomart/martservice",
                   dataset="hsapiens_gene_ensembl")

#--> Retrive the coordinates of TSSs
tss <- getBM(mart = ensembl,
             attributes = c("chromosome_name", "transcript_start", "transcript_end", "hgnc_symbol", "strand"))

#--> Format the chromosome name column (to match the chromosome names in alignement files)
set.seed(1234)
tss <- tss[grep("^[1-9]{1,2}$", tss$chromosome_name),]

#--> Creating a GRange containing the information related to TSSs
# And add a +/- 1kb window
tss_gr <- GRanges(seqnames = paste0("chr", tss$chromosome_name),
                  ranges = IRanges(start = tss$transcript_start - 10000,
                                   end = tss$transcript_end + 10000,
                                   names = tss$hgnc_symbol),
                  strand = tss$strand)


#--> For each TSS +/- 5kb, tile the window in 100bp bins
tss_gr_plus  <- tss_gr[which(strand(tss_gr) == "+")]
tss_gr_plus_split <- tile(tss_gr_plus, n = 1000)
tss_gr_plus_split <- flatGrl(tss_gr_plus_split)
tss_gr_minus  <- tss_gr[which(strand(tss_gr) == "-")]
tss_gr_minus_split <- tile(tss_gr_minus, n = 1000)
tss_gr_minus_split <- flatGrl(tss_gr_minus_split)

Now that the object containing information related to TSSs is formated, we will first list the BigWig files we are interested in, and count the reads overlapping each tiles consituting each TSS window.

1.2.2 Compute coverages across TSS

library(edgeR)
library(ggplot2)
library(IRanges)

#--> List input averages and normalized BigWig files
bw <- list.files(path = "./data/ChIP-seq/", pattern = "*_ALL_DEDUP_NOBLACKLIST.bw", full = TRUE)[c(1:3)]

#--> Count signal in bins
counts_plus  <- matrix(NA, nrow = length(tss_gr_plus_split), ncol = 3)
counts_minus <- matrix(NA, nrow = length(tss_gr_minus_split), ncol = 3)

chrs <- c(paste("chr", seq(1, 22), sep = ""))

for(i in c(1:length(bw))) {
  
  print(i)
  
  coverage <- rtracklayer::import(bw[i], as = 'RleList')

    for(chr in chrs) {

      counts_plus[which(seqnames(tss_gr_plus_split) == chr) , i] <-
        mean(Views(coverage[[which(names(coverage) == chr)]], 
                   ranges(tss_gr_plus_split[seqnames(tss_gr_plus_split) == chr])))
        
        counts_minus[which(seqnames(tss_gr_minus_split) == chr) , i] <-
        mean(Views(coverage[[which(names(coverage) == chr)]],
                   ranges(tss_gr_minus_split[seqnames(tss_gr_minus_split) == chr])))
        
    }
}

#--> Norm factors
norm <- c(51217909, 50988684, 50578426)

counts_plus  <- cpm(counts_plus, lib.size = norm)
counts_minus <- cpm(counts_minus, lib.size = norm)


#--> Format outputs
count_3DA_R1  <- rbind(t(matrix(counts_plus[,1], nrow = 1000)),
                    t(matrix(counts_minus[,1], nrow = 1000))[,rev(1:1000)])
count_4OHT_R1  <- rbind(t(matrix(counts_plus[,2], nrow = 1000)),
                    t(matrix(counts_minus[,2], nrow = 1000))[,rev(1:1000)])    
count_DMSO_R1  <-  rbind(t(matrix(counts_plus[,3], nrow = 1000)),
                       t(matrix(counts_minus[,3], nrow = 1000))[,rev(1:1000)])

1.2.3 Build the metaprofiles

#--> Format outputs
count_all_1 <- list(count_3DA_R1, count_4OHT_R1, count_DMSO_R1)

count_all_2 <- lapply(count_all_1, function(x) colMeans(x, na.rm = TRUE))

count_all_3 <- data.frame(Pos = c(rep(rep(seq(from = -10000, to = 9999, by = 20),1),3)),
                          Condition = rep(c("DA3_1", "OHT4_1", "DMSO_1"), each = 1000),
                          Reads = unlist(count_all_2))

#--> Plot profiles
ggplot(count_all_3, aes(x = Pos, y = Reads)) +
  geom_line(aes(color = Condition)) +
  scale_x_continuous(name = "Distance to TSS (bp)") +
  scale_y_continuous(name = "Reads") +
  theme_bw()

Figure 2: H3K36me3 metaprofiles around TSS

1.3 Generate the raw count matrix

To perform a differential analysis, we finally combined all those peaks in a consenus global set and computed the raw read coverage for each samples / conditions using DiffBind. During the process, all H3K36me3 ChIP-seq peaks seperated by gaps smaller than 10kb were merged.

#--> Create consensus peakset
consensus <- dba.peakset(expe, consensus = DBA_FACTOR)
peaks <- dba.peakset(consensus, consensus$masks$ALL, bRetrieve=TRUE)

#--> Count binding in consensus peaks 
count <- dba.count(expe,
                   peaks = reduce(peaks, min.gapwidth = 10000),
                   score = DBA_SCORE_TMM_MINUS_EFFECTIVE,
                   bLog = FALSE,
                   bParallel = TRUE)

#--> Outputing the binding matrix containing filtered row counts
b_matrix <- dba.peakset(count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)

1.4 Scaling and normalization

To consider differences in sequencing depth and immuno-precipitation efficiency, we applied :

a quantile normalization using EDASeq.
a debatching strategy identifying surrogate variables using RUVSeq.

#--> Prepare the data
library(EDASeq)
library(RUVSeq)
library(RColorBrewer)
data_RUVs <- matrix(as.numeric(unlist(round(b_matrix[,-c(1:3)]))), nrow = nrow(b_matrix[,-c(1:3)]))

#-->  Quantile normalization
set <- newSeqExpressionSet(counts = data_RUVs)
colnames(set) <- sample_sheet$SampleID
qn_counts <- betweenLaneNormalization(set, which = "full")

#-->  Debatching
differences <- matrix(data = c(1,2,3,4,5, 6), ncol = 2, byrow = TRUE)
set_RUVs <- RUVs(x = qn_counts, cIdx = row.names(set), k = 1, scIdx = differences)

1.5 Graphical QC

As depicted on the PCA plot below, when looking at the raw count matrix, the PC1 (71.5% of the variability) captures the variability introduce by the batch-of-origin of samples while the PC2 (17.2% of variability) captures the experimental variability. This suggest that the experiment is impaired by a non-neglectable batch effect (PC1 >> PC2) that we need to take into account.

Nevertheless, we can already notice that the biggest effect is driven by senescence induction (distance between DMSO and 4OHT on the PC2) while the effect of 3DA is rather mild (distance between 3DA and 4OHT on PC2). Moreover, the use of 3DA tend to push samples downward on the PC2 suggesting that the H3K36me3 profile of 3DA treated cells tend to be bend toward the one of DMSO growing cells.

#--> PCA on raw data
colors <- brewer.pal(6, "Set1")
x <- as.factor(sample_sheet$Condition)
PCA_Raw <- plotPCA(set, col=colors[x], cex=1.2,
                   main = "PCA on raw counts")

Figure 3: PCA on raw counts - H3K36me3

Despite the quantile normalization, PC1 still captures a huge proportion of variability arising from the batch-of-origin. Further processing is required.

#--> PCA on quantile normalized data
PCA_UQ <- plotPCA(qn_counts, col=colors[x], cex=1.2,
                  main = "PCA on quantile-normalized pseucounts")

Figure 4: PCA on quantile-normalized pseucounts - H3K36me3

After non-linear debatching, we managed to get rid of the batch effect. Our initial conlusions are still valid in this sub-space : distance on PC1 between DMSO and 4OHT samples is much bigger than the distance between 3DA and 4OHT cells.

#--> PCA on quantile normalized and debatched data
PCA_RUVs <- plotPCA(set_RUVs, col=colors[x], cex=1.2,
                    main = "PCA on quantile-normalized + RUVSeq pseudocounts")

Figure 5: PCA on quantile-normalized + RUVSeq pseudocounts - H3K36me3

1.6 Differential analysis

For the diffenrential analysis, we used edgeR along with a linear model allowing to test for differences in H3K36me3 between DMSO and 4OHT (effect of senescence induction), and between 3DA and 4OHT (effect of the treatment on senescent cells).

The peaks were finally annotated using bimoaRt and ChIPpeakAnno considering the hg19 assembly. Here, a peaks was flag with the name of a gene when included in the gene body of the gene (either TSS, TTS, exons, introns).

library(edgeR)

#--> Prepare the data
data_edger_chip <- data_RUVs
samples <- data.frame(sample_sheet$Condition)
design <- cbind(model.matrix(~ 0  + samples[,1]))
colnames(design) <- c("DA", "OHT", "DMSO")

#--> Build the DGEList object
data_edger_chip  <- DGEList(counts = data_edger_chip,
                            group = samples[,1],
                            genes = paste(b_matrix[,1],
                                          b_matrix[,2],
                                          b_matrix[,3], sep= "_"))

#--> Estimate dispersion and fit the model
data_edger_chip <- calcNormFactors(data_edger_chip, method = "none", lib.size = norm)
data_edger_chip <- estimateGLMRobustDisp(data_edger_chip, design)
fit_chip <- glmFit(data_edger_chip, design = design)

#--> DE peaks between 4OHT and DMSO
contrasts_4OHT_chip <- makeContrasts(Senescence =  OHT - DMSO, levels = design)
lrt_4OHT_chip <- glmLRT(fit_chip, contrast = contrasts_4OHT_chip)
top_4OHT_chip <- topTags(lrt_4OHT_chip, n = Inf)$table
colnames(top_4OHT_chip) <- paste("edgeR_ChIP_OHT",
                                 colnames(top_4OHT_chip), sep = ".")

#--> DE peaks between 4OHT and 3DA
contrasts_3DA_chip <- makeContrasts(Senescence =   DA - OHT, levels = design)
lrt_3DA_chip <- glmLRT(fit_chip, contrast = contrasts_3DA_chip)
top_3DA_chip <- topTags(lrt_3DA_chip, n = Inf)$table
colnames(top_3DA_chip) <- paste("edgeR_ChIP_3DA",
                               colnames(top_3DA_chip), sep = ".")

#--> Output
gr_edger_chip <- merge(top_4OHT_chip, top_3DA_chip, by = 1)
gr_edger_chip$chr <- strsplit2(gr_edger_chip[,1],"_")[,1]
gr_edger_chip$start <- strsplit2(gr_edger_chip[,1],"_")[,2]
gr_edger_chip$end <- strsplit2(gr_edger_chip[,1],"_")[,3]
gr_edger_chip <- GRanges(gr_edger_chip)

#--> Annotation
library(ChIPpeakAnno)
library(org.Hs.eg.db)

ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",
                   host="grch37.ensembl.org",
                   path="/biomart/martservice",
                   dataset="hsapiens_gene_ensembl")

ensembl_gr <- getAnnotation(mart = ensembl)
gr_edger_chip <- annotatePeakInBatch(gr_edger_chip, AnnotationData = ensembl_gr,
                                     output = "overlapping")
gr_edger_chip <- addGeneIDs(gr_edger_chip, org.Hs.eg.db, IDs2Add=c("symbol", "entrez_id"), 
           feature_id_type="ensembl_gene_id", silence=TRUE, mart)

1.7 Visual inspection

library(ggplot2)
library(ggrepel)
library(clusterProfiler)

#--> Import the senescence signatures
gmt_senescence <- qusage::read.gmt("~/Documents/Post-Doc/A-Data/OUTILS/SenescenceSignature.gmt")

#--> Build MA plot for the senescence induction
MA_OHT <- ggplot() +
         geom_point(data = data.frame(gr_edger_chip),
                    aes(y = edgeR_ChIP_OHT.logFC,
                        x = edgeR_ChIP_OHT.logCPM)) +
         geom_point(data = subset(data.frame(gr_edger_chip),
                                  edgeR_ChIP_OHT.logFC < -log2(1.2) & 
                                  edgeR_ChIP_OHT.FDR < .1),
                    aes(y = edgeR_ChIP_OHT.logFC,
                        x = edgeR_ChIP_OHT.logCPM),
                    color = "firebrick1") +
         geom_point(data = subset(data.frame(gr_edger_chip),
                                  edgeR_ChIP_OHT.logFC > log2(1.2) &
                                  edgeR_ChIP_OHT.FDR < .1),
                    aes(y = edgeR_ChIP_OHT.logFC,
                        x = edgeR_ChIP_OHT.logCPM),
                    color = "chartreuse3") +
         geom_point(data = subset(data.frame(gr_edger_chip),
                                  is.element(data.frame(gr_edger_chip)$symbol, 
                                             gmt_senescence$SASP)),
                    aes(y = edgeR_ChIP_OHT.logFC,
                        x = edgeR_ChIP_OHT.logCPM),
                    color = "steelblue") +
         geom_label_repel(data = subset(data.frame(gr_edger_chip),
                                  is.element(data.frame(gr_edger_chip)$symbol, 
                                             gmt_senescence$SASP) &
                                  edgeR_ChIP_OHT.logFC > 1 &
                                  edgeR_ChIP_OHT.FDR < .1),
               aes(y = edgeR_ChIP_OHT.logFC,
                   x = edgeR_ChIP_OHT.logCPM,
                   label = symbol)) +
         scale_x_continuous(name = "logCPM") +
         scale_y_continuous(name = "logFC 4OHT over DMSO") +
         ggtitle("Senescence induction : Impact on H3K36me3") +
         theme_bw()

#--> Build MA plot for 3DA treatment
MA_DA <- ggplot() +
         geom_point(data = data.frame(gr_edger_chip),
                    aes(y = edgeR_ChIP_3DA.logFC,
                        x = edgeR_ChIP_3DA.logCPM)) +
         geom_point(data = subset(data.frame(gr_edger_chip),
                                  edgeR_ChIP_3DA.logFC < -log2(1.2) & 
                                  edgeR_ChIP_3DA.FDR < .1),
                    aes(y = edgeR_ChIP_3DA.logFC,
                        x = edgeR_ChIP_3DA.logCPM),
                    color = "firebrick1") +
         geom_point(data = subset(data.frame(gr_edger_chip),
                                  edgeR_ChIP_3DA.logFC > log2(1.2) &
                                  edgeR_ChIP_3DA.FDR < .1),
                    aes(y = edgeR_ChIP_3DA.logFC,
                        x = edgeR_ChIP_3DA.logCPM),
                    color = "chartreuse3") +
         geom_point(data = subset(data.frame(gr_edger_chip),
                                  is.element(data.frame(gr_edger_chip)$symbol, 
                                             gmt_senescence$SASP)),
                    aes(y = edgeR_ChIP_3DA.logFC,
                        x = edgeR_ChIP_3DA.logCPM),
                    color = "steelblue") +
         geom_label_repel(data = subset(data.frame(gr_edger_chip),
                                  is.element(data.frame(gr_edger_chip)$symbol, 
                                             gmt_senescence$SASP) &
                                  abs(edgeR_ChIP_3DA.logFC) > .2 &
                                  edgeR_ChIP_3DA.FDR < .1),
               aes(y = edgeR_ChIP_3DA.logFC,
                   x = edgeR_ChIP_3DA.logCPM,
                   label = symbol)) +
         scale_x_continuous(name = "logCPM") +
         scale_y_continuous(name = "logFC 3DA over 4OHT") +
         ggtitle("3DA : Impact on H3K36me3") +
         theme_bw()

#--> Build FC vs FC plot
FC_FC <- ggplot() +
    geom_point(data = data.frame(gr_edger_chip),
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_ChIP_OHT.logFC),
               color = "grey") +
    geom_smooth(method = "gam", data = data.frame(gr_edger_chip),
               aes(x = edgeR_ChIP_3DA.logFC, y = edgeR_ChIP_OHT.logFC)) +
    geom_point(data = subset(data.frame(gr_edger_chip),
                                  is.element(data.frame(gr_edger_chip)$symbol, 
                                             gmt_senescence$`OIS FC1.5`)),
                    aes(x = edgeR_ChIP_3DA.logFC,
                        y = edgeR_ChIP_OHT.logFC),
                    color = "steelblue") +
     geom_label_repel(data = subset(data.frame(gr_edger_chip),
                                  is.element(data.frame(gr_edger_chip)$symbol, 
                                             gmt_senescence$`OIS FC1.5`) &
                                  edgeR_ChIP_OHT.FDR < .1 &
                                  edgeR_ChIP_3DA.FDR < .1),
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_ChIP_OHT.logFC,
                   label = symbol)) +
    xlab("log2 fold change in H3K36me3 (3DA over DMSO)") +
    ylab("log2 fold change in H3K36me3 (OHT over 4OHT)")

library(gridExtra)
grid.arrange(MA_OHT, MA_DA,  nrow = 2)

Figure 6: Differential analysis for H3K36me3 ChIP-seq : MA-plots

On the two MA-plot above, peaks gaining H3K36me3 during senescence or after 3DA treatment are colored in green, while the one losing H3K36me3 are colored in red. The overploted blue points depict SASP genes (as defined in the list provided by Jesus Gil).

As expected, we see that most of the SASP genes gain H3K36me3 after OIS (which know to be correlated to gene expression level). Nevertheless, these genes are rather evenly distributed among regions gaining / losing H3K36me3 upon 3DA treatment.

FC_FC

Figure 7: Differential analysis for H3K36me3 ChIP-seq : FC vs FC plot

cor.test(gr_edger_chip$edgeR_ChIP_3DA.logFC,
         gr_edger_chip$edgeR_ChIP_OHT.logFC)

## 
##  Pearson's product-moment correlation
## 
## data:  gr_edger_chip$edgeR_ChIP_3DA.logFC and gr_edger_chip$edgeR_ChIP_OHT.logFC
## t = -54.583, df = 21738, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3588178 -0.3354358
## sample estimates:
##        cor 
## -0.3471808

Comparing H3K36me3 fold-changes during senescence induction and upon 3DA treatment reveales a mild (but significant) negative correlation (R2 = -0.35) : peaks gaining H3K36me3 after OIS tend to lose H3K36me3 upon 3DA treatment. But this rule doesn’t apply well to SASP-related peaks.

1.8 Functionnal exploration

To depict in greater details the impact of senescence induction and post-senescence 3DA treatment on H3K36me3 landscape, we performed GSEA ranking peaks according to the fold-change in H3K36me3 (either after OIS or 3DA treatment). We considered two gene-set collections : the Reactome collection provided by MSigDB, and the Senescence collection we built based on the litterature.

1.8.1 GSEA for senescence induction - REACTOME

library(fgsea)
library(qusage)

# Define the target 
target <- gr_edger_chip

# Import the GMT - REACTOME
gmt <- qusage::read.gmt("~/Documents/Post-Doc/A-Data/OUTILS/c2.cp.reactome.v6.0.symbols.gmt")

# Order peaks according to the logFC of OHT over DMSO
order_da <- sort(target$edgeR_ChIP_OHT.logFC)
names(order_da) <- target$symbol[order(target$edgeR_ChIP_OHT.logFC, decreasing = FALSE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, nperm = 1000, BPPARAM = SerialParam(), minSize = 20)

# Select Top NES (+/-)
topPathways<- rbind(fgsea_da[order(NES)[c(1:5)],],
                    fgsea_da[order(NES,decreasing = TRUE)[c(1:5)],])
topPathways <- topPathways[order(topPathways$NES, decreasing = TRUE), pathway]

# Plot
plotGseaTable(gmt[topPathways], order_da, fgsea_da)

Figure 8: Top up/down pathways for senenscence induction - REACTOME

plotEnrichment(gmt[["REACTOME_CELL_CYCLE"]], order_da)

Figure 9: Top up/down pathways for senenscence induction - REACTOME

The senescence induction tend to impact positively (gain in H3K36me3 in 4OHT samples) peaks connected to genes involved in cell-cell interaction, in Notch signaling, while impacting negatively (loss in H3K36me3 in 4OHT samples) peaks connected to genes involved in cell cycle.

1.8.2 GSEA for senescence induction - SENESCENCE

# Define the target 
target <- gr_edger_chip

# Import the GMT - REACTOME
gmt <- gmt_senescence

# Order peaks according to the logFC of OHT over DMSO
order_da <- sort(target$edgeR_ChIP_OHT.logFC, decreasing = TRUE)
names(order_da) <- target$symbol[order(target$edgeR_ChIP_OHT.logFC, decreasing = TRUE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, nperm = 1000, BPPARAM = SerialParam(), minSize = 20)

# Plot
plotGseaTable(gmt, order_da, fgsea_da)

Figure 10: Top up/down pathways for senenscence induction - SENESCENCE

plotEnrichment(gmt[["SASP"]], order_da)

Figure 11: Top up/down pathways for senenscence induction - SENESCENCE

The senescence induction tend to impact positively (gain in H3K36me3 in 4OHT samples) peaks connected to genes involved in the SASP as well as genes known to to be up-regulated during OIS (OIS FC2 and OIS FC1.5).

1.8.3 GSEA for 3DA treatment - REACTOME

library(fgsea)

# Define the target 
target <- gr_edger_chip

# Import the GMT - REACTOME
gmt <- qusage::read.gmt("~/Documents/Post-Doc/A-Data/OUTILS/c2.cp.reactome.v6.0.symbols.gmt")

# Order peaks according to the logFC of 3DA over OHT
order_da <- sort(target$edgeR_ChIP_3DA.logFC, decreasing = TRUE)
names(order_da) <- target$symbol[order(target$edgeR_ChIP_3DA.logFC, decreasing = TRUE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, nperm = 1000, BPPARAM = SerialParam(), minSize = 20)

# Select Top NES (+/-)
topPathways<- rbind(fgsea_da[order(NES)[c(1:5)],],
                    fgsea_da[order(NES,decreasing = TRUE)[c(1:5)],])
topPathways <- topPathways[order(topPathways$NES, decreasing = TRUE), pathway]

# Plot
plotGseaTable(gmt[topPathways], order_da, fgsea_da)

Figure 12: Top up/down pathways with 3DA treatment - REACTOME

plotEnrichment(gmt[["REACTOME_CELL_CYCLE"]], order_da)

Figure 13: Top up/down pathways with 3DA treatment - REACTOME

The 3DA treatment on senescent cells tend to impact positively (gain in H3K36me3 in 3DA samples) peaks connected to genes involved in the cell cycle, while impacting negatively (loss in H3K36me3 in 3DA samples) peaks connected to genes involved in RAS and ERK pathways.

1.8.4 GSEA for 3DA treatment - SENESCENCE

# Define the target 
target <- gr_edger_chip

# Import the GMT - REACTOME
gmt <- gmt_senescence

# Order peaks according to the logFC of 3DA over OHT
order_da <- sort(target$edgeR_ChIP_3DA.logFC, decreasing = TRUE)
names(order_da) <- target$symbol[order(target$edgeR_ChIP_3DA.logFC, decreasing = TRUE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, nperm = 1000, BPPARAM = SerialParam(), minSize = 20)

# Plot
plotGseaTable(gmt, order_da, fgsea_da)

Figure 14: Top up/down pathways with 3DA treatment - SENESCENCE

plotEnrichment(gmt[["SASP"]], order_da)

Figure 15: Top up/down pathways with 3DA treatment - SENESCENCE

On the other hand, the 3DA treatment on senescent cells tend to also impact positively (gain in H3K36me3 in 3DA samples) peaks connected to genes involved in SASP, while impacting negatively (loss in H3K36me3 in 3DA samples) peaks connected to genes known to to be up-regulated during OIS (OIS FC2 and OIS FC1.5).

2 RNA-seq data

RNA-seq data was processed with STAR using standard parameters.

2.1 Generate count matrix

Using Rsubread, we first generate a gene-centric raw count matrix, considering a h19-formated annotation.

library(Rsubread)

#-->
pdata_rna <- read.table("./data/RNA-seq/JESUS_RNASEQ_PHENO.txt")

#--> List files
bam_rna <- list.files("./data/RNA-seq/", full.names = TRUE, pattern = "bam$")
count_rna <- featureCounts(files = bam_rna, annot.inbuilt = "hg19", nthreads = 6)
count_rna_2 <- count_rna$counts

colnames(count_rna_2) <- row.names(pdata_rna)
se_rna <- SummarizedExperiment(assays = count_rna_2, colData = pdata_rna)

2.2 Normalization

To maintain homogeneity throughout our analytical workflow, we applied the same normalization scheme here as the one used for the H3K36me3 ChIP-seq data. But overall, the batch effect is here less pronounced.

#-->  Quantile normalization
set <- newSeqExpressionSet(counts = count_rna_2)
colnames(set) <- row.names(pdata_rna)
qn_counts <- betweenLaneNormalization(set, which = "full")

#-->  Debatching
differences <- matrix(data = c(1:12), ncol = 4, byrow = TRUE)
set_RUVs <- RUVs(x = qn_counts, cIdx = row.names(set), k = 1, scIdx = differences)

colors <- brewer.pal(6, "Set1")
x <- as.factor(pdata_rna$Treatment)

set <- newSeqExpressionSet(counts = count_rna_2)
PCA_Raw <- plotPCA(set, col=colors[x], cex=1.2,
                   main = "PCA on raw counts")

Figure 16: PCA on raw counts - RNA-seq

PCA_QN <- plotPCA(qn_counts, col=colors[x], cex=1.2,
                   main = "PCA on raw counts")

PCA_QN <- plotPCA(set_RUVs , col=colors[x], cex=1.2,
                   main = "PCA on raw counts")

Figure 17: PCA on quantile-normalized + RUVSeq pseudocounts - RNA-seq

2.3 DE analysis

For the diffenrential analysis, we used edgeR along with a linear model allowing to test for differences in expression between DMSO and 4OHT (effect of senescence induction), and between 3DA and 4OHT (effect of the treatment on senescent cells).

#--> Prepare the data
data_edger_rna <-  normCounts(set_RUVs)
design <- cbind(model.matrix(~ 0  + pdata_rna$Treatment))
colnames(design) <- c("DA", "OHT", "DMSO")

#--> Build the DGEList object
data_edger_rna  <- DGEList(counts = data_edger_rna, group = pdata_rna$Treatment,
                       genes = row.names(data_edger_rna))

#-->  Filter lowly epxressed genes
filter_g <- filterByExpr(data_edger_rna, group=pdata_rna$Treatment, min.count = 1)
data_edger_rna <- data_edger_rna[filter_g,, keep.lib.sizes=FALSE]

#--> Estimate dispersion and fit the model
data_edger_rna <- calcNormFactors(data_edger_rna, method = "none")
data_edger_rna <- estimateGLMRobustDisp(data_edger_rna, design)
fit_rna <- glmFit(data_edger_rna, design = design)

#--> DE peaks between 4OHT and DMSO
contrasts_4OHT_rna <- makeContrasts(Senescence =  OHT - DMSO, levels = design)
lrt_4OHT_rna <- glmLRT(fit_rna, contrast = contrasts_4OHT_rna)
top_4OHT_rna <- topTags(lrt_4OHT_rna, n = Inf)$table
colnames(top_4OHT_rna) <- paste("edgeR_RNA_OHT", colnames(top_4OHT_rna), sep = ".")

#--> DE peaks between 4OHT and 3DA
contrasts_3DA_rna <- makeContrasts(Senescence =   DA - OHT, levels = design)
lrt_3DA_rna <- glmLRT(fit_rna, contrast = contrasts_3DA_rna)
top_3DA_rna <- topTags(lrt_3DA_rna, n = Inf)$table
colnames(top_3DA_rna) <- paste("edgeR_RNA_3DA", colnames(top_3DA_rna), sep = ".")


#--> Output
gr_edger_rna <- merge(top_4OHT_rna, top_3DA_rna, by = 1)

#--> Annotation
symbols <- AnnotationDbi::select(org.Hs.eg.db, 
                  keys = gr_edger_rna $edgeR_RNA_OHT.genes,
                  columns = c("ENTREZID", "SYMBOL"),
                  keytype = "ENTREZID")
gr_edger_rna <- merge(gr_edger_rna, symbols,
                      by.x = "edgeR_RNA_OHT.genes", by.y = "ENTREZID")

2.4 Visual inspection

#--> Build MA plot for the senescence induction
MA_OHT <- ggplot() +
         geom_point(data = data.frame(gr_edger_rna),
                    aes(y = edgeR_RNA_OHT.logFC,
                        x = edgeR_RNA_OHT.logCPM)) +
         geom_point(data = subset(data.frame(gr_edger_rna),
                                  edgeR_RNA_OHT.logFC < -log2(1.2) & 
                                  edgeR_RNA_OHT.FDR < .1),
                    aes(y = edgeR_RNA_OHT.logFC,
                        x = edgeR_RNA_OHT.logCPM),
                    color = "firebrick1") +
         geom_point(data = subset(data.frame(gr_edger_rna),
                                  edgeR_RNA_OHT.logFC > log2(1.2) &
                                  edgeR_RNA_OHT.FDR < .1),
                    aes(y = edgeR_RNA_OHT.logFC,
                        x = edgeR_RNA_OHT.logCPM),
                    color = "chartreuse3") +
         geom_point(data = subset(data.frame(gr_edger_rna),
                                  is.element(data.frame(gr_edger_rna)$SYMBOL, 
                                             gmt_senescence$SASP)),
                    aes(y = edgeR_RNA_OHT.logFC,
                        x = edgeR_RNA_OHT.logCPM),
                    color = "steelblue") +
         geom_label_repel(data = subset(data.frame(gr_edger_rna),
                                  is.element(data.frame(gr_edger_rna)$SYMBOL, 
                                             gmt_senescence$SASP) &
                                  edgeR_RNA_OHT.logFC > 1 &
                                  edgeR_RNA_OHT.FDR < .1),
               aes(y = edgeR_RNA_OHT.logFC,
                   x = edgeR_RNA_OHT.logCPM,
                   label = SYMBOL)) +
         scale_x_continuous(name = "logCPM") +
         scale_y_continuous(name = "logFC 4OHT over DMSO") +
         ggtitle("Senescence induction : Impact on RNA") +
         theme_bw()

#--> Build MA plot for 3DA treatment
MA_DA <- ggplot() +
         geom_point(data = data.frame(gr_edger_rna),
                    aes(y = edgeR_RNA_3DA.logFC,
                        x = edgeR_RNA_3DA.logCPM)) +
         geom_point(data = subset(data.frame(gr_edger_rna),
                                  edgeR_RNA_3DA.logFC < -log2(1.2) & 
                                  edgeR_RNA_3DA.FDR < .1),
                    aes(y = edgeR_RNA_3DA.logFC,
                        x = edgeR_RNA_3DA.logCPM),
                    color = "firebrick1") +
         geom_point(data = subset(data.frame(gr_edger_rna),
                                  edgeR_RNA_3DA.logFC > log2(1.2) &
                                  edgeR_RNA_3DA.FDR < .1),
                    aes(y = edgeR_RNA_3DA.logFC,
                        x = edgeR_RNA_3DA.logCPM),
                    color = "chartreuse3") +
         geom_point(data = subset(data.frame(gr_edger_rna),
                                  is.element(data.frame(gr_edger_rna)$SYMBOL, 
                                             gmt_senescence$SASP)),
                    aes(y = edgeR_RNA_3DA.logFC,
                        x = edgeR_RNA_3DA.logCPM),
                    color = "steelblue") +
         geom_label_repel(data = subset(data.frame(gr_edger_rna),
                                  is.element(data.frame(gr_edger_rna)$SYMBOL, 
                                             gmt_senescence$SASP) &
                                  abs(edgeR_RNA_3DA.logFC) > .2 &
                                  edgeR_RNA_3DA.FDR < .1),
               aes(y = edgeR_RNA_3DA.logFC,
                   x = edgeR_RNA_3DA.logCPM,
                   label = SYMBOL)) +
         scale_x_continuous(name = "logCPM") +
         scale_y_continuous(name = "logFC 3DA over 4OHT") +
         ggtitle("3DA : Impact on RNA") +
         theme_bw()

#--> Build FC vs FC plot
FC_FC <- ggplot() +
    geom_point(data = data.frame(gr_edger_rna),
               aes(x = edgeR_RNA_3DA.logFC,
                   y = edgeR_RNA_OHT.logFC),
               color = "grey") +
    geom_smooth(method = "gam", data = data.frame(gr_edger_rna),
               aes(x = edgeR_RNA_3DA.logFC, y = edgeR_RNA_OHT.logFC)) +
    geom_point(data = subset(data.frame(gr_edger_rna),
                                  is.element(data.frame(gr_edger_rna)$SYMBOL, 
                                             gmt_senescence$SASP)),
                    aes(x = edgeR_RNA_3DA.logFC,
                        y = edgeR_RNA_OHT.logFC),
                    color = "steelblue") +
     geom_label_repel(data = subset(data.frame(gr_edger_rna),
                                  is.element(data.frame(gr_edger_rna)$SYMBOL, 
                                             gmt_senescence$SASP) &
                                  edgeR_RNA_OHT.FDR < .1 &
                                  edgeR_RNA_3DA.FDR < .1),
               aes(x = edgeR_RNA_3DA.logFC,
                   y = edgeR_RNA_OHT.logFC,
                   label = SYMBOL)) +
    xlab("log2 fold change in RNA (3DA over DMSO)") +
    ylab("log2 fold change in RNA (4OHT over 4OHT)")

library(gridExtra)
grid.arrange(MA_OHT, MA_DA, nrow = 2)

Figure 18: Differential analysis for RNA-seq : MA-plots

On the two MA-plot above, up-regulated genes during senescence or after 3DA treatment are colored in green, while the one down-regulated are colored in red. The overploted blue points depict SASP genes (as defined in the list provided by Jesus Gil).

As expected, we see that most of the SASP genes are up-regulated after OIS. Overall, these genes are mostly down-regulated genes upon 3DA treatment. The conclusion drown here for expression and H3K4me36 are also showing a “decoupling” between gene expression and H3K36me3 for SASP genes.

FC_FC

Figure 19: Differential analysis for RNA-seq : FC vs FC plot

cor.test(gr_edger_rna$edgeR_RNA_3DA.logFC,
         gr_edger_rna$edgeR_RNA_OHT.logFC)

## 
##  Pearson's product-moment correlation
## 
## data:  gr_edger_rna$edgeR_RNA_3DA.logFC and gr_edger_rna$edgeR_RNA_OHT.logFC
## t = -63.649, df = 16592, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4551459 -0.4306863
## sample estimates:
##        cor 
## -0.4429985

Comparing gene exoression fold-changes during senescence induction and upon 3DA treatment reveales a mild (but significant) negative correlation (R2 = -0.44) : genes upregulated after OIS tend to be down-regulated upon 3DA treatment.

2.5 Functionnal exploration

2.5.1 GSEA for senenscence induction - REACTOME

# Define the target 
target <- gr_edger_rna

# Import the GMT - REACTOME
gmt <-  qusage::read.gmt("~/Documents/Post-Doc/A-Data/OUTILS/c2.cp.reactome.v6.0.symbols.gmt")

# Order peaks according to the logFC of OHT over DMSO
order_da <- sort(target$edgeR_RNA_OHT.logFC, decreasing = TRUE)
names(order_da) <- target$SYMBOL[order(target$edgeR_RNA_OHT.logFC, decreasing = TRUE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, BPPARAM = SerialParam(),  nperm = 1000)

# Select Top NES (+/-)
topPathways<- rbind(fgsea_da[order(NES)[c(1:5)],],
                    fgsea_da[order(NES,decreasing = TRUE)[c(1:5)],])
topPathways <- topPathways[order(topPathways$NES, decreasing = TRUE), pathway]


# Plot
plotGseaTable(gmt[topPathways], order_da, fgsea_da)

Figure 20: Top up/down pathways for senenscence induction - REACTOME

plotEnrichment(gmt[["REACTOME_CELL_CYCLE"]], order_da)

Figure 21: Top up/down pathways for senenscence induction - REACTOME

The senescence induction tend to impact positively (up-regulation in 4OHT samples) genes involved encoding for cytokines / chemokines, while impacting negatively (down-regulation in 4OHT samples) genes involved in cell cycle.

2.5.2 GSEA for senenscence induction - SENESCENCE

# Define the target 
target <- gr_edger_rna

# Import the GMT - REACTOME
gmt <- gmt_senescence

# Order peaks according to the logFC of OHT over DMSO
order_da <- sort(target$edgeR_RNA_OHT.logFC, decreasing = TRUE)
names(order_da) <- target$SYMBOL[order(target$edgeR_RNA_OHT.logFC, decreasing = TRUE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, nperm = 1000, BPPARAM = SerialParam(), minSize = 20)

# Plot
plotGseaTable(gmt, order_da, fgsea_da)

Figure 22: Top up/down pathways for senenscence induction - SENESCENCE

plotEnrichment(gmt[["SASP"]], order_da)

Figure 23: Top up/down pathways for senenscence induction - SENESCENCE

The senescence induction tend to impact positively (up-regulation in 4OHT samples) genes involved in the SASP.

2.5.3 GSEA for 3DA treatment - REACTOME

library(fgsea)

# Define the target 
target <- gr_edger_rna

# Import the GMT - REACTOME
gmt <- qusage::read.gmt("~/Documents/Post-Doc/A-Data/OUTILS/c2.cp.reactome.v6.0.symbols.gmt")

# Order peaks according to the logFC of 3DA over OHT
order_da <- sort(target$edgeR_RNA_3DA.logF, decreasing = TRUE)
names(order_da) <- target$SYMBOL[order(target$edgeR_RNA_3DA.logFC, decreasing = TRUE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, nperm = 1000, BPPARAM = SerialParam(), minSize = 20)

# Select Top NES (+/-)
topPathways<- rbind(fgsea_da[order(NES)[c(1:10)],],
                    fgsea_da[order(NES,decreasing = TRUE)[c(1:10)],])
topPathways <- topPathways[order(topPathways$NES, decreasing = TRUE), pathway]

# Plot
plotGseaTable(gmt[topPathways], order_da, fgsea_da)

Figure 24: Top up/down pathways with 3DA treatment - REACTOME

plotEnrichment(gmt[["REACTOME_CELL_CYCLE"]], order_da)

Figure 25: Top up/down pathways with 3DA treatment - REACTOME

The 3DA treatment on senescent cells tend to impact positively (up-regulation in 3DA samples) genes involved in the cell cycle, while impacting negatively (down-regulation in 3DA samples) genes encoding for cytokines / chemokines.

2.5.4 GSEA for 3DA treatment - SENESCENCE

# Define the target 
target <- gr_edger_rna

# Import the GMT - REACTOME
gmt <- gmt_senescence

# Order peaks according to the logFC of 3DA over OHT
order_da <- sort(target$edgeR_RNA_3DA.logFC, decreasing = TRUE)
names(order_da) <- target$SYMBOL[order(target$edgeR_RNA_3DA.logFC, decreasing = TRUE)]

# Run GSEA
fgsea_da <- fgsea(gmt, order_da, nperm = 1000, BPPARAM = SerialParam(), minSize = 20)

# Plot
plotGseaTable(gmt, order_da, fgsea_da)

Figure 26: Top up/down pathways with 3DA treatment - SENESCENCE

plotEnrichment(gmt[["SASP"]], order_da)

Figure 27: Top up/down pathways with 3DA treatment - SENESCENCE

The 3DA treatment on senescent cells tend to impact nagatively (down-regulation in 3DA samples) genes involved in the SASP.

3 Integration

To explore the possible coupling between H3K36me3 signal in gene body and gene expression, we integrated the two data-sets and explore correlation between :

Fold-change in H3K36me3 and gene expression after OIS.
Fold-change in H3K36me3 and gene expression after 3DA treatment on senescent cells.

3.1 Correlations between ChIP-seq and RNA-seq during OIS

library(ggplot2)
library(ggrepel)

chip_rna <- merge(gr_edger_chip, gr_edger_rna, by.x = "entrez_id", by.y = "edgeR_RNA_OHT.genes")

fc_fc_ois <-ggplot() +
    geom_point(data = chip_rna,
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC),
               color = "grey") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_OHT.logFC > log2(1.5) &
                             edgeR_RNA_OHT.logFC >log2(1.5)),
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC),
               color = "chartreuse3") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_OHT.logFC < - log2(1.5) &
                             edgeR_RNA_OHT.logFC <  - log2(1.5)),
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC),
               color = "firebrick") +
    geom_label_repel(data = subset(chip_rna, edgeR_ChIP_OHT.logFC < - 2 &
                                   edgeR_RNA_OHT.logFC <  - 4),
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC,
                   label = symbol), cex = 3) +
    geom_label_repel(data = subset(chip_rna, edgeR_ChIP_OHT.logFC >  2 &
                                   edgeR_RNA_OHT.logFC >   4),
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC,
                   label = symbol), cex = 3) +
    geom_hline(yintercept = c(log2(1.5), -log2(1.5))) +
    geom_vline(xintercept = c(log2(1.5), -log2(1.5))) +
    geom_smooth(method = "lm", data = chip_rna,
                aes(x = edgeR_ChIP_OHT.logFC,
                    y = edgeR_RNA_OHT.logFC)) +
    ggtitle("Senescence induction : correlation between FC in expression and H3K36me3") +
    xlab("log2 fold change in H3K36me3 (OHT vs DMSO)") +
    ylab("log2 fold change in expression (OHT vs DMSO)")

fc_fc_ois_sasp <- ggplot() +
    geom_point(data = chip_rna,
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC),
               color = "grey") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_OHT.logFC > log2(1.2) &
                             edgeR_RNA_OHT.logFC >log2(1.2)),
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC),
               color = "chartreuse3") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_OHT.logFC < - log2(1.2) &
                             edgeR_RNA_OHT.logFC <  - log2(1.2)),
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC),
               color = "firebrick") +
    geom_point(data = subset(chip_rna,
                             is.element(chip_rna$SYMBOL, 
                                        gmt_senescence$SASP)),
                aes(x = edgeR_ChIP_OHT.logFC,
                    y = edgeR_RNA_OHT.logFC),
                color = "steelblue") +
    geom_label_repel(data = subset(chip_rna,
                             is.element(chip_rna$SYMBOL, 
                                        gmt_senescence$SASP) &
                                  edgeR_ChIP_OHT.logFC > 1 &
                                  edgeR_RNA_OHT.logFC > 1),
               aes(x = edgeR_ChIP_OHT.logFC,
                   y = edgeR_RNA_OHT.logFC,
                   label = SYMBOL)) + 
    geom_hline(yintercept = c(log2(1.2), -log2(1.2))) +
    geom_vline(xintercept = c(log2(1.2), -log2(1.2))) +
    geom_smooth(method = "lm", data = chip_rna,
                aes(x = edgeR_ChIP_OHT.logFC,
                    y = edgeR_RNA_OHT.logFC)) +
    ggtitle("Senescence induction : correlation between FC in expression and H3K36me3") +
    xlab("log2 fold change in H3K36me3 (OHT vs DMSO)") +
    ylab("log2 fold change in expression (OHT vs DMSO)")

grid.arrange(fc_fc_ois, fc_fc_ois_sasp, nrow = 2)

Figure 28: FC vs FC plot for RAS OIS induction

cor(chip_rna$edgeR_ChIP_OHT.logFC, chip_rna$edgeR_RNA_OHT.logFC)

## [1] 0.5067247

The two plots above show the correlation between FC in H3K36me3 and gene expression during OIS. Overall, during RAS induction, there is a coordinate change in H3K36me3 and gene expression (R2=0.51) : genes gaining H3K36me3 after OIS tend to get up-regulated.

The graph at the shows top up / down coordinated genes. The graph at the bottom is the same, with SASP genes highlited.

3.2 Correlations between ChIP-seq and RNA-seq after 3DA

fc_fc_da <- ggplot() +
    geom_point(data = chip_rna,
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC),
               color = "grey") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_3DA.logFC > log2(1.2) &
                             edgeR_RNA_3DA.logFC >log2(1.2)),
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC),
               color = "chartreuse3") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_3DA.logFC < - log2(1.2) &
                             edgeR_RNA_3DA.logFC <  - log2(1.2)),
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC),
               color = "firebrick") +
    geom_label_repel(data = subset(chip_rna, edgeR_ChIP_3DA.logFC < - 1 &
                                   edgeR_RNA_3DA.logFC <  - 1)[1:10,],
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC,
                   label = symbol), cex = 3) +
    geom_label_repel(data = subset(chip_rna, edgeR_ChIP_3DA.logFC >  1 &
                                   edgeR_RNA_3DA.logFC >   1)[1:10,],
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC,
                   label = symbol), cex = 3) +
    geom_hline(yintercept = c(log2(1.2), -log2(1.2))) +
    geom_vline(xintercept = c(log2(1.2), -log2(1.2))) +
    geom_smooth(method = "lm", data = chip_rna,
                aes(x = edgeR_ChIP_3DA.logFC,
                    y = edgeR_RNA_3DA.logFC)) +
    ggtitle("3DA treatment in senescent cells : correlation between FC in expression and H3K36me3") +
    xlab("log2 fold change in H3K36me3 (OHT vs DA)") +
    ylab("log2 fold change in expression (OHT vs DA)") 

fc_fc_da_sasp <-ggplot() +
    geom_point(data = chip_rna,
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC),
               color = "grey") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_3DA.logFC > log2(1.2) &
                             edgeR_RNA_3DA.logFC >log2(1.2)),
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC),
               color = "chartreuse3") +
    geom_point(data = subset(chip_rna, edgeR_ChIP_3DA.logFC < - log2(1.2) &
                             edgeR_RNA_3DA.logFC <  - log2(1.2)),
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC),
               color = "firebrick") +
    geom_point(data = subset(chip_rna,
                             is.element(chip_rna$SYMBOL, 
                                        gmt_senescence$SASP)),
                aes(x = edgeR_ChIP_3DA.logFC,
                    y = edgeR_RNA_3DA.logFC),
                color = "steelblue") +
    geom_label_repel(data = subset(chip_rna,
                             is.element(chip_rna$SYMBOL, 
                                        gmt_senescence$SASP) &
                                  edgeR_ChIP_3DA.logFC > 1 &
                                  edgeR_RNA_3DA.logFC < .1),
               aes(x = edgeR_ChIP_3DA.logFC,
                   y = edgeR_RNA_3DA.logFC,
                   label = SYMBOL)) +
    geom_hline(yintercept = c(log2(1.2), -log2(1.2))) +
    geom_vline(xintercept = c(log2(1.2), -log2(1.2))) +
    geom_smooth(method = "lm", data = chip_rna,
                aes(x = edgeR_ChIP_3DA.logFC,
                    y = edgeR_RNA_3DA.logFC)) +
    ggtitle("3DA treatment in senescent cells : correlation between FC in expression and H3K36me3") +
    xlab("log2 fold change in H3K36me3 (OHT vs DA)") +
    ylab("log2 fold change in expression (OHT vs DA)")

grid.arrange(fc_fc_da, fc_fc_da_sasp, nrow = 2)

Figure 29: FC vs FC plot for 3DA treatment on senescent cells

cor.test(chip_rna$edgeR_ChIP_3DA.logFC, chip_rna$edgeR_RNA_3DA.logFC)

## 
##  Pearson's product-moment correlation
## 
## data:  chip_rna$edgeR_ChIP_3DA.logFC and chip_rna$edgeR_RNA_3DA.logFC
## t = 24.13, df = 14735, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1793860 0.2104498
## sample estimates:
##       cor 
## 0.1949668

The two plots above show the correlation between FC in H3K36me3 and gene expression after 3DA treatment on senescent cells. Here, the coupling between H3K36me3 and gene expression is lost (R2=0.19) for almost all genes, except cell cycle-related genes, for which an up-regulation at the RNA level is supported by an increase in H3K36me3.

The graph at the shows top up / down coordinated genes. The graph at the bottom is the same, with SASP genes highlited.

3.3 List of up-regulated genes during 3DA with a concordant H3K36me3 profile

Considering a threshold at log2(1.2) for both gene expression and H3K36me3 fold-change after 3DA treatment, here is the list of up-regulated genes with a concordant H3K36me3 profile.

up_list <- noquote(unique(subset(chip_rna, edgeR_ChIP_3DA.logFC > log2(1.2) &
                              edgeR_RNA_3DA.logFC > log2(1.2))$SYMBOL))
up_list

##   [1] ZNF730      HOXA-AS3    TROAP       GNG12-AS1   MAN1B1-DT   MIR1282    
##   [7] SAP25       CDH6        THAP9-AS1   ZNF865      TRIM52-AS1  SMC4       
##  [13] CDH11       PDIA6       AASS        LANCL1      CDKN3       HMG20B     
##  [19] SEMA3A      NDC80       HYOU1       AKAP3       CENPA       SMC2       
##  [25] CENPE       CENPF       POSTN       RAD51AP1    POLD3       CD3EAP     
##  [31] SLC26A1     EHMT2       DBF4        CBX1        KIF2C       ADAP1      
##  [37] DSTN        B4GAT1      OGFR        CNTRL       KRR1        CIT        
##  [43] RABL2B      RABL2A      PSIP1       LZTS1       ERI2        PTGDR2     
##  [49] SNX18       USP18       FMNL2       HAUS1       CISH        TSEN15     
##  [55] SNORD83B    TWIST2      CLCN2       SFXN2       LRIG3       NEDD1      
##  [61] MFSD12      IQGAP3      COL6A2      NUP35       RFTN2       COL12A1    
##  [67] DCBLD2      SCLT1       SOGA1       CSF1R       SPINDOC     CSK        
##  [73] CMTM4       SLC25A10    KIF18B      EME1        KLC3        SPC24      
##  [79] ZNF714      GNAS-AS1    CCDC117     CKAP2L      SGO2        RNF38      
##  [85] GLIPR2      SCFD2       CDCA2       DGKQ        DDX11       DHFR       
##  [91] DHODH       DLD         DLG3        DNMT3B      DPYSL2      DPYSL3     
##  [97] DUT         DYRK1A      EDNRA       EFNB2       EGR3        WDR90      
## [103] ZNF48       EIF4EBP1    TET3        RPL22L1     CENPV       RPSAP52    
## [109] EPHB4       F3          FANCA       FANCE       ACSL4       ALDH1B1    
## [115] FBLN1       ALDH1A3     SLC29A4     FEN1        PHLDA1      CHSY1      
## [121] SHANK2      FOXF2       TPX2        CEP152      FOXL1       PALLD      
## [127] FOXC2       ZNF292      FOXM1       SEPTIN6     ZC3H4       FN1        
## [133] NCAPH       ABCB10      DDAH1       PATZ1       FZD2        SNX32      
## [139] TMEM86B     RAD54B      METTL7A     ANAPC13     ASPM        WWTR1      
## [145] AUTS2       CCDC9       TIAM2       FBXO5       NIPSNAP2    GBP1       
## [151] RGS17       MYEOV       SNORA65     SNORD48     SNORD36B    STAU2      
## [157] NDOR1       AGO2        DISC1       RBMX        UTP20       TSSK4      
## [163] WDR62       C1orf167    RGMB        ATP11C      TRIM59      ANPEP      
## [169] MRPL15      UBE2T       FHOD1       RACGAP1     NCAPH2      TRHDE      
## [175] MDFIC       H3-3B       HAS2        HELLS       HGF         HMGB3      
## [181] HMGA1       HMMR        HNRNPA1     HNRNPH1     HOXA2       HOXA3      
## [187] HOXB4       AGFG2       HSPA5       BIRC5       TNC         TMEM119    
## [193] CCDC18      IGFBP5      ZNF445      IL7         IRF2BP2     INCENP     
## [199] INSR        ITGA2       ITPR3       C5orf34     KIF11       KIFC1      
## [205] KIF22       C1QTNF12    RND3        ANKRD20A11P STMN1       LBR        
## [211] MIR100HG    LMNB1       FENDRR      PRICKLE3    CENPP       FAM166A    
## [217] UFSP1       LTBP1       MIR125B1    MIR221      BLID        MCL1       
## [223] MCM2        MCM3        MCM5        MCM6        MCM7        MEIS1      
## [229] KITLG       MKI67       MRE11       ZNF724      ANKRD18B    MSH2       
## [235] MTCP1       MYC         MYH10       NEDD9       NEK2        TONSL      
## [241] NPM1        NPTX1       NTSR1       OGG1        ORC1        CLDN11     
## [247] OXCT1       P2RY11      EMC9        RRP15       GMNN        NUSAP1     
## [253] CRIM1       ANKMY1      RASL12      PRR16       DACT1       DDX41      
## [259] CTDSPL2     GTSE1       DTL         PDGFRA      RTEL1       PFAS       
## [265] PFKFB4      PFKL        PFKP        PHKG1       PLS3        IL20RB     
## [271] PRRX1       ERRFI1      POLD1       POLD2       POLE        GPR173     
## [277] ANLN        MIOS        PCDH18      EXOC6       FAM193B     GPATCH4    
## [283] NCAPG2      DUS2        HAUS4       C1orf109    HCFC1R1     NOL8       
## [289] OXR1        ANO1        PLEKHJ1     FANCL       SRBD1       CDCA8      
## [295] SHQ1        CEP55       FANCI       MIS18BP1    HJURP       TNFRSF19   
## [301] CDCA7L      PRIM1       PRIM2       DEPDC1      NDC1        SLF2       
## [307] PRR11       AXL         MAP2K6      DNAJC3      PRPS2       TWNK       
## [313] PARP6       PRDM8       KIF15       SLC12A9     PELI2       NHSL1      
## [319] PTGER2      SENP7       SNORA10     WDR18       PTGS1       NLN        
## [325] RIMKLB      TBC1D14     ARRDC3      SHROOM3     ZBTB2       CIP2A      
## [331] VAT1L       SEMA4G      BARD1       BCAT1       ACTA2       RBL1       
## [337] GNB4        RCN2        RFC3        RFC4        ACTB        SNORA21    
## [343] SNORD2      SNORD96A    RPS6KA2     SALL1       SCD         THADA      
## [349] BLOC1S5     CHTF18      CLSPN       SEMA3F      PRSS22      CENPK      
## [355] SMG1P1      SNORD87     IRF2BPL     SFPQ        SFRP1       SRSF3      
## [361] ZFP62       BLVRA       CDH24       SH3BGRL     C16orf58    STIL       
## [367] SIX1        SKI         ANKRD33B    BMP4        PYCR3       SNORA5A    
## [373] SLC19A1     SMARCC2     SOX15       ITPRID2     STC1        STK11      
## [379] SYN1        TBX5        MSMP        TCF4        MIR635      TCF12      
## [385] BUB1        DYNLT3      BUB1B       NR2F1       THBS1       THBS2      
## [391] THRA        TMPO        TMSB4X      TNFAIP6     TOP2A       HSP90B1    
## [397] TRAF5       ACTG2       TRPC1       TNFSF4      PLIN4       LOC730101  
## [403] VARS1       VEGFA       MYRF        WEE1        NSD2        WNT5A      
## [409] CSKMT       ZNF33B      ZNF70       SNORD113-3  SNORD114-13 CENPO      
## [415] THOC6       HSD17B8     IGFLR1      TBL1XR1     ZNF768      NARS2      
## [421] ELMO3       ZFHX4       SHCBP1      ARHGAP28    THSD4       ATAD5      
## [427] WDR76       ATAT1       PIF1        RPF1        BRD3        TET1       
## [433] TSEN2       EGFL8       C1orf21     CLPB        FAM83D      C6orf62    
## [439] ZNF239      KIF18A      CHAF1B      INO80B      NUF2        FERMT3     
## [445] CDCA7       CTTNBP2     WDR54       TAGLN2      CEP78       ZNF644     
## [451] RNASEH2C    ARID5B      WDR24       SLF1        HAGHL       GINS4      
## [457] CAVIN2      RAD54L      L3MBTL3     MCM8        KIAA1841    SNORD35B   
## [463] TTF2        TNRC18      KDM2B       HIRIP3      TRIM52      CBR4       
## [469] PARP10      TMTC4       TIGD5       ARHGAP19    TSPYL5      GFM1       
## [475] INE1        CAV1        RUNX2       RGS20       DNAH11      B3GALNT1   
## [481] SUCLG2      SYNGAP1     DLEU2       SLC5A6      CCNB1       TIMELESS   
## [487] HERC3       TRPA1       WDR34       CCNF        NAT1        PHLDB2     
## [493] TAF1C       TICRR       PRC1        CCT6A       MTMR4       NMI        
## [499] DERL3       PDCD5       SYNGR3      RCCD1       FIBP        SLFN11     
## [505] NOLC1       MEX3A       PTTG1       NUMBL       MOB1B       PDLIM7     
## [511] ACVR2B      NAPRT       TRIP13      HAUS8       SLIT2       TBCK       
## [517] FADS2       MAP4K4      KIF20B      AKAP12      PDIA4       MDC1       
## [523] FAM53B      PCDHGA8     CD72        KNTC1       ARHGAP32    KMT2B      
## [529] DLGAP5      ARHGAP11A   MELK        CDC20       NCAPD2      KIF14      
## [535] CDC25A      DOP1B       REC8

3.4 List of down-regulated genes with a concordant H3K36me3 profiles

down_list <- noquote(unique(subset(chip_rna, edgeR_ChIP_3DA.logFC < - log2(1.2) &
                              edgeR_RNA_3DA.logFC < - log2(1.2))$SYMBOL))
down_list

##   [1] AKT3         ZNF667-AS1   SLC8A1-AS1   LRRC69       CKMT2-AS1   
##   [6] CLUHP3       TEN1         TMEM161B-AS1 LOC100506100 MYLK-AS1    
##  [11] LINC00665    LOC100507291 LOC100507373 ZNF571-AS1   MUC12       
##  [16] HIPK3        FARP1        CDK3         CDK6         APC2        
##  [21] TNIP1        RXYLT1       TRIM22       BTN2A2       BAIAP2      
##  [26] CAP2         SEMA3C       KHDRBS3      SLC12A7      TRAF3IP2    
##  [31] ZNF271P      TSPAN9       BLCAP        MAPRE2       KCNQ1OT1    
##  [36] IFT27        RAB31        ATE1         IL1RAPL1     SLC2A6      
##  [41] GALNT5       GPR176       CMTM7        SP140        RPLP0P2     
##  [46] FKBP9        ACOT7        MGLL         SYNPO        SLC25A25    
##  [51] KLHL29       OSBPL10      ADCY9        ALPK2        CKMT2       
##  [56] SLC18B1      MED12L       TRIM6        FGD4         SPPL3       
##  [61] MSI2         SLC43A2      RNF19B       COL8A1       COL10A1     
##  [66] FLACC1       GALM         OCIAD2       MAP3K8       SLC9B2      
##  [71] NCOA7        MAPK14       HECTD2       LAYN         TTC7B       
##  [76] C15orf61     ZNF597       TOM1L2       WIPF2        TMEM99      
##  [81] CCBE1        ZNF560       ZNF563       ZNF565       ZNF542P     
##  [86] ZNF582       ZNF570       CCDC80       ZNF827       SH3D19      
##  [91] PAQR3        PPM1K        ARSK         CYLD         VKORC1L1    
##  [96] KCTD7        TTC39B       ZNF836       ZNF433       CNST        
## [101] AKR1C1       FAM171B      TIGD2        PKD1L1       GLIS3       
## [106] TCEANC       ASXL1        DNAH5        DPP4         DRP2        
## [111] DSP          SLC26A2      DUSP4        MLKL         SERPINB1    
## [116] KLHDC8B      ARHGAP27     ADAM32       FAM219A      ERN1        
## [121] ETFB         SAMD9L       ZNF438       ADCY10P1     NAPEPLD     
## [126] ATXN7L1      RRAS2        AAK1         LMTK2        PLEKHA6     
## [131] ZHX2         ARSG         MSRB2        ELL2         DAAM1       
## [136] SAMD4A       WDTC1        TNIK         ZNF609       KDM4C       
## [141] ERC1         AKR1B1       RAP1GAP2     RAD54L2      MAST2       
## [146] RFTN1        FAM168A      FLT1         EXOC6B       DNMBP       
## [151] KIAA0930     MAN2B2       NEDD4L       DOCK9        ATP13A2     
## [156] SIRT4        ANGPTL2      RBFOX2       OPN3         LY96        
## [161] SSBP3        FLRT2        BCL2L13      KCTD13       ASPHD1      
## [166] CALHM5       TMCO4        GABRA2       TBC1D22A     BAMBI       
## [171] ABTB2        MPC2         RNF19A       RTTN         PCDHGA12    
## [176] SIPA1L1      PYGO1        TANC2        TMEM251      OR1J4       
## [181] SLC13A4      AK5          EHF          GAPDHS       GBP2        
## [186] HSPB8        OSTF1        MDGA1        B4GALT1      ABL2        
## [191] NPHP3        GLB1         TP53TG5      RABGEF1      GLI3        
## [196] GM2A         ZMIZ1-AS1    C1RL-AS1     GPR18        EMC10       
## [201] ANGPT2       GPER1        GPR37        C8orf31      GPX3        
## [206] SNX24        AAMDC        LMCD1        BICRA        HAGH        
## [211] HIVEP2       HLA-A        HLA-B        HLA-L        APBA1       
## [216] APBB2        HRH1         C17orf67     ZNF677       IFIT2       
## [221] CDKL4        CAVIN4       ZNF81        IL1B         IL1RAP      
## [226] IL4R         IL6R         ITGA1        GALNT18      LOC374443   
## [231] ZNF710       STIMATE      LAMA4        ABLIM1       SHC4        
## [236] LNPEP        SMAD7        MGST2        ASAH1        MITF        
## [241] AFF1         NR3C2        MME          ZNF506       MYO9A       
## [246] NDUFB2       RERE         NEO1         NFKB1        ATP1B1      
## [251] CNOT4        ATP2B4       ODC1         SYCP3        SERPINB2    
## [256] CLEC4A       SHANK1       CPA4         ACP6         COPZ2       
## [261] GASK1B       PHF21A       AIG1         PDE4B        SNX9        
## [266] PDE4C        PDE4D        PRRX2        ATP6V1H      UBE2D4      
## [271] CHMP3        PFKFB2       SERPINB8     PIK3C3       PIK3CD      
## [276] PLCB4        SHC3         PLD1         STX18        PCBP3       
## [281] UBL3         SH3TC1       PLEKHA5      RHOF         POU2F1      
## [286] ROBO4        GNB1L        TET2         ST7L         UHRF1BP1    
## [291] UBE2R2       RHBDL2       LPCAT2       PARP16       BANP        
## [296] UBA6-AS1     TMEM51       P3H2         STEAP3       VPS53       
## [301] PPP3CC       DRAM1        CAMK2N1      PIP4P2       ENOSF1      
## [306] PRR5         FLVCR2       BCAS4        FAR2         FAM222B     
## [311] CDK5RAP2     LRIF1        ADAP2        LRP2BP       ADCY10      
## [316] CISD1        THSD1        INKA2        PCDHGB3      PCDHGB2     
## [321] PCDHGA7      PCDHGA4      PCDHGA1      ANKH         FAM214A     
## [326] KIAA1217     PARD3        MASP1        PSEN1        PSEN2       
## [331] NRIP3        FMN2         PMEPA1       CAMK1G       NIPAL3      
## [336] RHOJ         ABHD6        HEG1         ARHGAP31     ZNF398      
## [341] NCEH1        LRRC7        ARHGAP20     MARCHF4      MRTFA       
## [346] FNIP2        ZFAT         SH3RF1       ZSWIM6       ZFYVE28     
## [351] MARK4        GRHL3        PTPRA        PTPRK        PTPRM       
## [356] SQOR         ZNF77        RALA         RARRES1      RBMS1       
## [361] RBP1         RHD          ZFAND3       RPL37A       CLIP1       
## [366] BDNF         SAT1         CLEC11A      PIEZO2       KIF13A      
## [371] LRRC4        MOAP1        FN3K         ERAP2        RAD21L1     
## [376] NXN          CPEB1        C6orf132     SMYD3        SIAH2       
## [381] LMF1         MRPS6        RAPH1        SLC5A3       SLC22A1     
## [386] SLC22A5      PHACTR4      ZSCAN18      SLC2A11      SOD2        
## [391] DST          STIM1        STX3         KLF9         TBXAS1      
## [396] TCF20        TCF21        TNFAIP3      C1S          TRAF1       
## [401] TRAF3        EIPR1        TUB          GXYLT2       NUTM2A-AS1  
## [406] MAP4K3-DT    UBE2H        UVRAG        ZNF20        ZNF135      
## [411] SLC30A4      RAB7A        SLC25A20     SLC25A23     MAPKAP1     
## [416] GNPTAB       ULBP3        CCNJL        SH3TC2       FAT4        
## [421] RHBDF2       C3orf52      VEPH1        PIP4K2C      RIN3        
## [426] NAA60        LINC00472    SVEP1        PCNX2        TMEM156     
## [431] C7orf69      BICC1        NR4A3        FHOD3        OPA3        
## [436] ACSF2        PDCD1LG2     AKNA         DUSP16       WNT5B       
## [441] DPF3         GPR68        FAM49A       ELL          CAST        
## [446] FZD4         H4C8         TLN2         SETDB2       TMTC1       
## [451] RSPH3        FAM186B      PCBD2        ANTXR1       MFSD14C     
## [456] CYSTM1       MAML2        SLC12A8      GNPTG        ZNF594      
## [461] EBPL         SLC9A7       PYROXD2      GPAT3        C1orf198    
## [466] ZNF341       SSH2         PLA2G4C      PLPP3        PCDHGB4     
## [471] NUMB         IRS2         SLC4A4       CBLB         TNFRSF10C   
## [476] TNFRSF10A    KSR1         ST3GAL5      MTMR3        MBD2        
## [481] CCND2        KYNU         H2BC11       PPP1R3F      ATP6V0E1    
## [486] FAM110B      PSTPIP2      STPG1        ZNF799       ZNF439      
## [491] N4BP2L1      LDB2         ZNF682       ATP6V0D1     GUSBP11     
## [496] DDX60L       TPGS1        ZMYM6        VAPB         MAGI1       
## [501] IL32         CCPG1        DHRS3        FANK1        ZNF235      
## [506] LPXN         KCNK6        CHST3        RALGPS1      PDE4DIP     
## [511] N4BP1        VGLL4        HDAC9        CLSTN3       TBKBP1      
## [516] SUSD6        SERTAD2      RUSC2        FCHSD2       TRANK1      
## [521] XYLB         OXSR1        TNFSF15      PTBP3

Considering a threshold at -log2(1.2) for both gene expression and H3K36me3 fold-change after 3DA treatment, here is the list of down-regulated genes with a concordant H3K36me3 profile. Note the presence of IL1B, IL1R, TNFSF15 and NFKB in this list.

4 Examples

4.1 Prepare the Gviz session

#--> Loading libraries
library(Gviz)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)

#--> Define the GenomeAxis layout
g_axe <- GenomeAxisTrack(col="black")

#--> Define the GeneRegion characteristics
g_reg <- BiomartGeneRegionTrack(genome = "hg19")

#--> List histone ChIP-seq normalized signal tracks
samples_chip <- bw[c(3,2,1)]
names <- c("DMSO", "4OHT", "3DA")

#--> Create DataTrack for each samples
colors  <- c("#FD8800", "#8FBD00", "#BA008C")
type <- "histogram"

s1 <- DataTrack(samples_chip[1], name = names[1], ylim = c(0,15), type = type, col.histogram= colors[1])
s2 <- DataTrack(samples_chip[2], name = names[2], ylim = c(0,15), type = type, col.histogram = colors[2])
s3 <- DataTrack(samples_chip[3], name = names[3], ylim = c(0,15), type = type, col.histogram = colors[3])

samples <- list(s1, s2, s3)

#--> Define the region of interest
#region <- GRanges("chr4", IRanges(103420486,103540459)) #NKB1
#region <- GRanges("chr2", IRanges(136595196,136636047)) #MCM6
region <- GRanges("chr1", IRanges(67771047,67864583)) #IL12RB2


#--> Setup the highlight
hl <- HighlightTrack(trackList = samples, 
                     start = as.numeric(start(region)),
                     end = as.numeric(end(region)),
                     chromosome = as.character(seqnames(region)))

#--> Setup the chromosome ideogram
ideo <- IdeogramTrack(genome = "hg19",
                      chromosome = as.character(seqnames(region )))

#--> Plot
plotTracks(c(hl, g_reg, ideo),
           sizes=c(rep(1,3),1, 1),
           from = as.numeric(start(region)) - 1000,
           to = as.numeric(end(region)) + 1000,
           chromosome = as.character(seqnames(region)), shape = "arrow",
           transcriptAnnotation = "symbol", collapseTranscripts = "meta")

Figure 30: Normalized profiles of H3K4me1, H3K27ac, H3K4me3 and H3K27me3 ChIP-seq signal in the IL12RB2 locus

5 Clustering

5.1 Perform Clustering

library(pheatmap)
library(viridis)

de_all_chip_1 <- gr_edger_chip[
                    which((abs(gr_edger_chip$edgeR_ChIP_3DA.logFC) > log2(2) & gr_edger_chip$edgeR_ChIP_3DA.FDR < .1) |
                      abs(gr_edger_chip$edgeR_ChIP_OHT.logFC) > log2(2) & gr_edger_chip$edgeR_ChIP_OHT.FDR < .1),]
    

data_full <- cpm(data_edger_chip)
row.names(data_full) <- c(data_edger_chip$genes$genes)
colnames(data_full) <- data_edger_chip$samples$group

de_all_chip_2  <- data_full[match(de_all_chip_1$edgeR_ChIP_OHT.genes, row.names(data_full), nomatch = 0),]
#de_all_chip <- do.call("cbind", by(data = t(de_all_chip), INDICES = colnames(de_all_chip), colMeans))
de_all_chip_2 <- t(scale(t(de_all_chip_2)))

set.seed(123)
k <- kmeans(de_all_chip_2, centers = 5)
row.names(de_all_chip_2[order(k$cluster),])

anno_r <- data.frame(Cluster = as.factor(k$cluster), row.names = make.names(names(k$cluster), unique = TRUE))

pheatmap(unique(de_all_chip_2[order(k$cluster),]),
         cluster_rows = FALSE, cluster_cols = FALSE, show_rownames = FALSE, color = viridis(100),
         annotation_row = anno_r)

Figure 31: K-means clustering of H3K36me3 data

5.2 Functional over-representation

#---> Get clusters
probes <- data.frame(k$cluster, peak = names(k$cluster))
probes <- merge(probes, gr_edger_chip,
                by.x = "peak", by.y = "edgeR_ChIP_OHT.genes")

#---> Get the corresponding Entrez_ID
fun_enrich_WGCNA <- split(probes$entrez_id, probes$k.cluster)

#--->  Over-representation analysis (MSigDB Hallmark)
library(ReactomePA)
library(clusterProfiler)
            
# Load the gmt file
GMT <- clusterProfiler::read.gmt("~/Documents/Post-Doc/A-Data/OUTILS/h.all.v6.0.entrez.gmt")

# Run the over-representation analysis 
fun_enrich_res <- data.frame(compareCluster(fun_enrich_WGCNA, fun='enricher',
                                            TERM2GENE=GMT, pvalueCutoff=1, qvalueCutoff = 1,
                                            minGSSize = 20))
# Filter the data
fun_enrich_res_2 <- data.frame()
for (i in unique(fun_enrich_res$Cluster)) {
  x <- fun_enrich_res[which(fun_enrich_res$Cluster == i),]
  x[which(x$qvalue > .2),]$qvalue <- NA
  x <- x[order(x$qvalue),]
  fun_enrich_res_2 <- rbind(fun_enrich_res_2, x)
}

fun_enrich_res_2 <- na.omit(fun_enrich_res_2)

# Clustering
library(reshape)
fun_enrich_res_tmp <- fun_enrich_res_2[,c("ID", "qvalue", "Cluster")]
fun_enrich_res_tmp <- cast(fun_enrich_res_tmp, Cluster~ID, value = "qvalue")
fun_enrich_res_tmp <- data.matrix(fun_enrich_res_tmp)
fun_enrich_res_tmp[is.na(fun_enrich_res_tmp)] <- 0
row.names(fun_enrich_res_tmp) <- fun_enrich_res_tmp[,1]
fun_enrich_res_tmp <- fun_enrich_res_tmp[,-1]
order_fun_enrich <- hclust(as.dist(1-cor(fun_enrich_res_tmp)), method = "ward.D2")$order

fun_enrich_res_2$Description <- factor(fun_enrich_res_2$Description,
                                       levels =colnames(fun_enrich_res_tmp)[order_fun_enrich] )
fun_enrich_res_2$Percent <- unlist(lapply(strsplit(fun_enrich_res_2$GeneRatio, "/"), function(x){as.numeric(x[1])/as.numeric(x[2])}*100))

# Plot
ggplot(fun_enrich_res_2, aes(y = ID, x = Cluster, fill = -log10(pvalue), size = Percent)) +
  geom_point(shape = 21, stroke = .5) +
  scale_fill_gradient(low = "firebrick1", high = "chartreuse3") +
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text.y = element_text(size = 8))

Figure 32: Functional over-representation analysis (MSigDB hallmark)

5.3 Integration

data_all <- unique(merge(probes,  normCounts(set_RUVs), by.x = "entrez_id", by.y = "row.names"))
data_all <- data_all[order(data_all$k.cluster),]
data_all[,c(29:40)] <- t(scale(t(log10(data_all[,c(29:40)]+1))))

anno_r <- data.frame(as.factor(data_all$k.cluster), row.names = row.names(data_all))

pheatmap(unique(data_all[,c(29:40)]), cluster_rows = FALSE, cluster_cols = FALSE, scale = "none", show_rownames = FALSE, color = viridis(100), annotation_row = anno_r)

Figure 33: Projection of RNA-seq data on K-means clustering obtained for H3K36me3 data

6 Session

sessionInfo()

## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS  10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] reshape_0.8.8                          
##  [2] ReactomePA_1.32.0                      
##  [3] viridis_0.5.1                          
##  [4] viridisLite_0.3.0                      
##  [5] pheatmap_1.0.12                        
##  [6] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
##  [7] Gviz_1.32.0                            
##  [8] Rsubread_2.2.6                         
##  [9] qusage_2.22.0                          
## [10] fgsea_1.14.0                           
## [11] gridExtra_2.3                          
## [12] clusterProfiler_3.16.1                 
## [13] ggrepel_0.9.1                          
## [14] RColorBrewer_1.1-2                     
## [15] RUVSeq_1.22.0                          
## [16] EDASeq_2.22.0                          
## [17] ShortRead_1.46.0                       
## [18] GenomicAlignments_1.24.0               
## [19] Rsamtools_2.4.0                        
## [20] BiocParallel_1.22.0                    
## [21] ggplot2_3.3.3                          
## [22] edgeR_3.30.3                           
## [23] limma_3.44.3                           
## [24] biovizBase_1.36.0                      
## [25] colorRamps_2.3                         
## [26] eulerr_6.1.0                           
## [27] ChIPpeakAnno_3.22.4                    
## [28] Biostrings_2.56.0                      
## [29] XVector_0.28.0                         
## [30] org.Hs.eg.db_3.11.4                    
## [31] GenomicFeatures_1.40.1                 
## [32] AnnotationDbi_1.50.3                   
## [33] biomaRt_2.44.4                         
## [34] DESeq2_1.28.1                          
## [35] DiffBind_2.16.2                        
## [36] SummarizedExperiment_1.18.2            
## [37] DelayedArray_0.14.1                    
## [38] matrixStats_0.58.0                     
## [39] Biobase_2.48.0                         
## [40] GenomicRanges_1.40.0                   
## [41] GenomeInfoDb_1.24.2                    
## [42] IRanges_2.22.2                         
## [43] S4Vectors_0.26.1                       
## [44] BiocGenerics_0.34.0                    
## [45] BiocStyle_2.16.1                       
## 
## loaded via a namespace (and not attached):
##   [1] estimability_1.3         rappdirs_0.3.3           rtracklayer_1.48.0      
##   [4] AnnotationForge_1.30.1   R.methodsS3_1.8.1        coda_0.19-4             
##   [7] tidyr_1.1.2              bit64_4.0.5              knitr_1.31              
##  [10] aroma.light_3.18.0       R.utils_2.10.1           data.table_1.13.6       
##  [13] rpart_4.1-15             hwriter_1.3.2            RCurl_1.98-1.2          
##  [16] AnnotationFilter_1.12.0  generics_0.1.0           cowplot_1.1.1           
##  [19] lambda.r_1.2.4           RSQLite_2.2.3            europepmc_0.4           
##  [22] bit_4.0.4                enrichplot_1.8.1         base64url_1.4           
##  [25] xml2_1.3.2               assertthat_0.2.1         batchtools_0.9.15       
##  [28] amap_0.8-18              xfun_0.21                hms_1.0.0               
##  [31] evaluate_0.14            progress_1.2.2           caTools_1.18.1          
##  [34] dbplyr_2.1.0             Rgraphviz_2.32.0         igraph_1.2.6            
##  [37] DBI_1.1.1                geneplotter_1.66.0       htmlwidgets_1.5.3       
##  [40] futile.logger_1.4.3      purrr_0.3.4              ellipsis_0.3.1          
##  [43] dplyr_1.0.4              backports_1.2.1          V8_3.4.0                
##  [46] bookdown_0.21            annotate_1.66.0          vctrs_0.3.6             
##  [49] ensembldb_2.12.1         cachem_1.0.4             withr_2.4.1             
##  [52] ggforce_0.3.2            triebeard_0.3.0          DOT_0.1                 
##  [55] BSgenome_1.56.0          emmeans_1.5.4            checkmate_2.0.0         
##  [58] prettyunits_1.1.1        cluster_2.1.1            DOSE_3.14.0             
##  [61] lazyeval_0.2.2           crayon_1.4.1             genefilter_1.70.0       
##  [64] labeling_0.4.2           pkgconfig_2.0.3          tweenr_1.0.1            
##  [67] nlme_3.1-152             ProtGenerics_1.20.0      nnet_7.3-15             
##  [70] rlang_0.4.10             lifecycle_1.0.0          downloader_0.4          
##  [73] seqinr_4.2-5             BiocFileCache_1.12.1     GOstats_2.54.0          
##  [76] dichromat_2.0-0          VennDiagram_1.6.20       rsvg_2.1                
##  [79] polyclip_1.10-0          graph_1.66.0             Matrix_1.3-2            
##  [82] urltools_1.7.3           base64enc_0.1-3          ggridges_0.5.3          
##  [85] png_0.1-7                rjson_0.2.20             bitops_1.0-6            
##  [88] R.oo_1.24.0              KernSmooth_2.23-18       blob_1.2.1              
##  [91] stringr_1.4.0            qvalue_2.20.0            regioneR_1.20.1         
##  [94] brew_1.0-6               jpeg_0.1-8.1             gridGraphics_0.5-1      
##  [97] reactome.db_1.70.0       scales_1.1.1             graphite_1.34.0         
## [100] memoise_2.0.0            GSEABase_1.50.1          magrittr_2.0.1          
## [103] plyr_1.8.6               gplots_3.1.1             zlibbioc_1.34.0         
## [106] scatterpie_0.1.5         compiler_4.0.0           ade4_1.7-16             
## [109] systemPipeR_1.22.0       Category_2.54.0          htmlTable_2.1.0         
## [112] formatR_1.7              Formula_1.2-4            mgcv_1.8-34             
## [115] MASS_7.3-53.1            tidyselect_1.1.0         fftw_1.0-6              
## [118] stringi_1.5.3            highr_0.8                yaml_2.2.1              
## [121] GOSemSim_2.14.2          askpass_1.1              locfit_1.5-9.4          
## [124] latticeExtra_0.6-29      VariantAnnotation_1.34.0 fastmatch_1.1-0         
## [127] tools_4.0.0              rstudioapi_0.13          foreign_0.8-81          
## [130] idr_1.2                  farver_2.0.3             ggraph_2.0.4            
## [133] digest_0.6.27            rvcheck_0.1.8            BiocManager_1.30.10     
## [136] Rcpp_1.0.6               httr_1.4.2               colorspace_2.0-0        
## [139] XML_3.99-0.5             splines_4.0.0            RBGL_1.64.0             
## [142] graphlayouts_0.7.1       multtest_2.44.0          ggplotify_0.0.5         
## [145] xtable_1.8-4             jsonlite_1.7.2           futile.options_1.0.1    
## [148] tidygraph_1.2.0          R6_2.5.0                 Hmisc_4.4-2             
## [151] pillar_1.4.7             htmltools_0.5.1.1        glue_1.4.2              
## [154] fastmap_1.1.0            DESeq_1.39.0             codetools_0.2-18        
## [157] mvtnorm_1.1-1            lattice_0.20-41          tibble_3.0.6            
## [160] curl_4.3                 gtools_3.8.2             GO.db_3.11.4            
## [163] openssl_1.4.3            survival_3.2-7           rmarkdown_2.6           
## [166] munsell_0.5.0            DO.db_2.9                GenomeInfoDbData_1.2.3  
## [169] reshape2_1.4.4           gtable_0.3.0

3-deazaadenosine (3DA) alleviates senescence to promote cellular fitness and therapy efficiency

1 H3K36me3 ChIP-seq data

1.1 Preprocessing of the data

1.1.1 Import peaks

1.1.2 Explore commonalities and specificities

1.2 TSS centric analysis

1.2.1 Preparing the data

1.2.2 Compute coverages across TSS

1.2.3 Build the metaprofiles

1.3 Generate the raw count matrix

1.4 Scaling and normalization

1.5 Graphical QC

1.6 Differential analysis

1.7 Visual inspection

1.8 Functionnal exploration

1.8.1 GSEA for senescence induction - REACTOME

1.8.2 GSEA for senescence induction - SENESCENCE

1.8.3 GSEA for 3DA treatment - REACTOME

1.8.4 GSEA for 3DA treatment - SENESCENCE

2 RNA-seq data

2.1 Generate count matrix

2.2 Normalization

2.3 DE analysis

2.4 Visual inspection

2.5 Functionnal exploration

2.5.1 GSEA for senenscence induction - REACTOME

2.5.2 GSEA for senenscence induction - SENESCENCE

2.5.3 GSEA for 3DA treatment - REACTOME

2.5.4 GSEA for 3DA treatment - SENESCENCE

3 Integration

3.1 Correlations between ChIP-seq and RNA-seq during OIS

3.2 Correlations between ChIP-seq and RNA-seq after 3DA

3.3 List of up-regulated genes during 3DA with a concordant H3K36me3 profile

3.4 List of down-regulated genes with a concordant H3K36me3 profiles

4 Examples

4.1 Prepare the Gviz session

5 Clustering

5.1 Perform Clustering

5.2 Functional over-representation

5.3 Integration

6 Session