Analysis is performed according to Love et al. 2014

Abbreviations:

B. villosa = BRA1896
B. oleracea = BRA1909

1 Data import

Load libraries:

### Clear workspace ###
rm(list=ls())

### Load libraries ###
library(DESeq2)

## Loading required package: S4Vectors

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:base':
## 
##     expand.grid

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:grDevices':
## 
##     windows

## Loading required package: GenomicRanges

## Loading required package: GenomeInfoDb

## Loading required package: SummarizedExperiment

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Loading required package: DelayedArray

## Loading required package: matrixStats

## 
## Attaching package: 'matrixStats'

## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians

## Loading required package: BiocParallel

## 
## Attaching package: 'DelayedArray'

## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges

## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum

library(ggplot2)
library(cowplot)
library(gdata)

## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

##

## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

## 
## Attaching package: 'gdata'

## The following object is masked from 'package:SummarizedExperiment':
## 
##     trim

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following object is masked from 'package:GenomicRanges':
## 
##     trim

## The following object is masked from 'package:IRanges':
## 
##     trim

## The following objects are masked from 'package:S4Vectors':
## 
##     first, first<-

## The following object is masked from 'package:BiocGenerics':
## 
##     combine

## The following object is masked from 'package:stats4':
## 
##     nobs

## The following object is masked from 'package:stats':
## 
##     nobs

## The following object is masked from 'package:utils':
## 
##     object.size

## The following object is masked from 'package:base':
## 
##     startsWith

library(readxl)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:gdata':
## 
##     combine

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following object is masked from 'package:BiocGenerics':
## 
##     combine

library(ggpubr)

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:cowplot':
## 
##     get_legend

library(VennDiagram)

## Loading required package: grid

## Loading required package: futile.logger

## 
## Attaching package: 'VennDiagram'

## The following object is masked from 'package:ggpubr':
## 
##     rotate

library(grid)
library(futile.logger)
library(gplots)

## Registered S3 method overwritten by 'gplots':
##   method         from 
##   reorder.factor gdata

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:gdata':
## 
##     reorder.factor

## The following object is masked from 'package:IRanges':
## 
##     space

## The following object is masked from 'package:S4Vectors':
## 
##     space

## The following object is masked from 'package:stats':
## 
##     lowess

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following objects are masked from 'package:gdata':
## 
##     combine, first, last

## The following object is masked from 'package:matrixStats':
## 
##     count

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union

## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect

## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union

## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union

## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ComplexHeatmap)

## ========================================
## ComplexHeatmap version 2.2.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## ========================================

library(officer)

## 
## Attaching package: 'officer'

## The following object is masked from 'package:readxl':
## 
##     read_xlsx

library(ggrepel)
library(RColorBrewer)
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(extrafont)

## Registering fonts with R

font_import(pattern="arial")

## Scanning ttf files in C:\WINDOWS\Fonts ...

## Extracting .afm files from .ttf files...

## C:\Windows\Fonts\arial.ttf : ArialMT already registered in fonts database. Skipping.
## C:\Windows\Fonts\arialbd.ttf : Arial-BoldMT already registered in fonts database. Skipping.
## C:\Windows\Fonts\arialbi.ttf : Arial-BoldItalicMT already registered in fonts database. Skipping.
## C:\Windows\Fonts\ariali.ttf : Arial-ItalicMT already registered in fonts database. Skipping.
## Found FontName for 0 fonts.
## Scanning afm files in C:/Users/Thomas/Documents/R/win-library/3.6/extrafontdb/metrics

Import and filter data:

## Data import
GeneMatrix <- as.matrix(read.csv("GeneMatrix.csv", sep=",", row.names="gene_id", quote=""))
TrinityMatrix <- as.matrix(read.csv("TrinityMatrix.csv", sep=";", header=TRUE, row.names="gene_id"))
Phenotype_Data <- read.csv("PhenotypeData.csv", sep=";", row.names=1)
Phenotype_Data$genotype <- as.factor(Phenotype_Data$genotype)
Phenotype_Data$treatment <- as.factor(Phenotype_Data$treatment)

## Filter for low counts
keep <- which(TrinityMatrix[,1] > 10 & TrinityMatrix[,2] > 10 & TrinityMatrix[,3] > 10
            | TrinityMatrix[,4] > 10 & TrinityMatrix[,5] > 10 & TrinityMatrix[,6] > 10
            | TrinityMatrix[,7] > 10 & TrinityMatrix[,8] > 10 & TrinityMatrix[,9] > 10
            | TrinityMatrix[,10] > 10 & TrinityMatrix[,11] > 10 & TrinityMatrix[,12] > 10)

## Apply the filter
TrinityMatrix <- TrinityMatrix[keep,]
nrow(TrinityMatrix)

## [1] 15251

Merge reference genes with de novo transcripts:

## Merge data
GeneMatrix <- rbind(GeneMatrix, TrinityMatrix)
## Check data
all(rownames(Phenotype_Data) %in% colnames(GeneMatrix))

## [1] TRUE

GeneMatrix <- GeneMatrix[, rownames(Phenotype_Data)]
all(rownames(Phenotype_Data) == colnames(GeneMatrix))

## [1] TRUE

DESeq2 analysis:

## Define the design, make a pseudofactor and perform DESeq2
DESeq_Object <- DESeq2::DESeqDataSetFromMatrix(countData=GeneMatrix,
                                               colData=Phenotype_Data, 
                                               design= ~0+genotype*treatment)
DESeq_Object$group <- factor(paste0(DESeq_Object$genotype, DESeq_Object$treatment))
design(DESeq_Object) <- ~ 0 + group
DESeq_Object <- DESeq2::DESeq(DESeq_Object)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

DESeq2::resultsNames(DESeq_Object)

## [1] "groupBRA1896Inoculated" "groupBRA1896Mock"       "groupBRA1909Inoculated"
## [4] "groupBRA1909Mock"

## Perform regularized log-transformation
rlog <- DESeq2::rlog(DESeq_Object)

1.1 Sample-to-sample distance

Calculate distances:

## Calculate sample-to-sample distances
sampleDists <- dist(t(assay(rlog)))
sampleDistMatrix <- as.matrix(sampleDists)
colours=colorRampPalette(rev(brewer.pal(9, "Blues")))(255)

## Plot the heatmap
Sample_Distances <- ComplexHeatmap::Heatmap(sampleDistMatrix, 
                                            col=colours,
                                            show_row_names=TRUE, row_dend_width=unit(3.5,"cm"),
                                            column_dend_height=unit(1,"cm"),
                                            border=TRUE,
                                            row_names_gp=gpar(fontsize=12, fontface="bold"),
                                            column_names_gp=gpar(fontsize=12, fontface="bold"),
                                            column_names_rot=45,
                                            show_heatmap_legend=FALSE)

Plot:

Sample_Distances

1.2 PCA-plot

Plot:

## PCA-plot
plot_genes <- DESeq2::plotPCA(rlog, intgroup=c("group"), returnData=TRUE)
percentVar <- round(100*attr(plot_genes, "percentVar"))
ggplot2::ggplot(plot_genes, aes(x= PC1,y= PC2, fill=group))+
ggplot2::geom_point(size=4, shape=21)+
ggplot2::scale_fill_manual(labels=c("BRA1896-Inoculated", "BRA1896-Mock", "BRA1909-Inoculated", "BRA1909-Mock"),
                           values=c("dodgerblue4", "firebrick4","darkorchid","cyan4"))+
ggplot2::guides(fill=guide_legend(title="Group"),
                shape=guide_legend(title="Group"),
                size=guide_legend(title="Group"))+
ggplot2::xlab(paste0("PC1: ",percentVar[1], "% variance"))+
ggplot2::ylab(paste0("PC2: ",percentVar[2], "% variance"))+
ggplot2::geom_hline(yintercept=0,lty=2)+
ggplot2::geom_vline(xintercept=0, lty=2)+
ggplot2::coord_fixed()+
ggplot2::theme_bw()+
            theme(panel.background = element_blank(),
            panel.grid.major=element_blank(),
            panel.grid.minor=element_blank(),
            legend.title=element_blank(),
            legend.position = c(0.5,1.1),
            axis.title.x=element_text(size=13, color="black", face="bold"),
            legend.text=element_text(size=12, color="black", face="bold"),
            axis.text.x=element_text(size=12, color="black", face="bold"),
            axis.title.y=element_text(size=13, color="black", face="bold"),
            axis.text.y=element_text(size=12, color="black", face="bold"),
            axis.ticks.x =element_blank(),
            legend.direction ="horizontal",
            axis.line.y = element_line(colour="black"),
            plot.margin=margin(2,2,2,2, "cm"))

Sample information:

# For overview of the single samples
knitr::kable(plot_genes) %>% kableExtra::kable_styling(bootstrap_options = c("striped"))

	PC1	PC2	group	group.1	name
BRA1896Pet_C1	-61.18431	-39.97137	BRA1896Mock	BRA1896Mock	BRA1896Pet_C1
BRA1896Pet_C2	-60.97164	-39.62362	BRA1896Mock	BRA1896Mock	BRA1896Pet_C2
BRA1896Pet_C3	-61.28166	-40.05500	BRA1896Mock	BRA1896Mock	BRA1896Pet_C3
BRA1896Pet_I1	50.50724	-53.06623	BRA1896Inoculated	BRA1896Inoculated	BRA1896Pet_I1
BRA1896Pet_I2	51.31203	-52.40310	BRA1896Inoculated	BRA1896Inoculated	BRA1896Pet_I2
BRA1896Pet_I3	40.57592	-50.88098	BRA1896Inoculated	BRA1896Inoculated	BRA1896Pet_I3
BRA1909Pet_C1	-51.71876	52.46049	BRA1909Mock	BRA1909Mock	BRA1909Pet_C1
BRA1909Pet_C2	-52.55861	51.35714	BRA1909Mock	BRA1909Mock	BRA1909Pet_C2
BRA1909Pet_C3	-50.87765	49.12466	BRA1909Mock	BRA1909Mock	BRA1909Pet_C3
BRA1909Pet_I1	71.45057	40.38508	BRA1909Inoculated	BRA1909Inoculated	BRA1909Pet_I1
BRA1909Pet_I2	53.02575	42.02263	BRA1909Inoculated	BRA1909Inoculated	BRA1909Pet_I2
BRA1909Pet_I3	71.72113	40.65031	BRA1909Inoculated	BRA1909Inoculated	BRA1909Pet_I3

1.3 Cook’s distance

Plot:

## Cook's plot
par(mar=c(8,5,2,2))
boxplot(log10(assays(DESeq_Object)[["cooks"]]), range=0, las=2)

2 Results

Retrieve results:

## 1. Compare treatments
TreatmentMatrix <- rbind(
  "Inoculated/Mock : B1896" = c(1,-1, 0, 0),
  "Inoculated/Mock : B1909"  = c(0, 0, 1,-1)
)

# 2.: Interaction between species due to inoculation
Interaction_dpi <- rbind(
  "Int Genotype_Treatment: 11dpi" = c(1,-1,-1, 1)
)

## 1. Results
Results_B1896 <- DESeq2::results(DESeq_Object, contrast=TreatmentMatrix[1,])
Results_B1909  <- DESeq2::results(DESeq_Object, contrast=TreatmentMatrix[2,])

# 2. Results
Results_Interaction <- DESeq2::results(DESeq_Object, contrast=Interaction_dpi[1,])

Summary: B1896

summary(Results_B1896, alpha=0.05)

## 
## out of 63995 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 6630, 10%
## LFC < 0 (down)     : 1829, 2.9%
## outliers [1]       : 60, 0.094%
## low counts [2]     : 11007, 17%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Summary: B1909

summary(Results_B1909, alpha=0.05)

## 
## out of 63995 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 7209, 11%
## LFC < 0 (down)     : 3566, 5.6%
## outliers [1]       : 60, 0.094%
## low counts [2]     : 9785, 15%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Summary: Interaction

summary(Results_Interaction, alpha=0.05)

## 
## out of 63995 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 542, 0.85%
## LFC < 0 (down)     : 312, 0.49%
## outliers [1]       : 60, 0.094%
## low counts [2]     : 21998, 34%
## (mean count < 14)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

2.1 DEG-tables

B1896:

DT::datatable(as.data.frame(B1896_Signif <- Results_B1896[which(Results_B1896$padj < 0.05),]),
                            colnames = c("Gene_ID" = 1)) %>% 
                            DT::formatRound(c("baseMean","log2FoldChange", "lfcSE", "stat", "pvalue", "padj"), 3)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

B1909:

DT::datatable(as.data.frame(B1909_Signif <- Results_B1909[which(Results_B1909$padj < 0.05),]),
                            colnames = c("Gene_ID" = 1)) %>% 
                            DT::formatRound(c("baseMean","log2FoldChange", "lfcSE", "stat", "pvalue", "padj"), 3)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Interaction:

DT::datatable(as.data.frame(Interaction_Signif <- Results_Interaction[which(Results_Interaction$padj < 0.05),]),
                            colnames = c("Gene_ID" = 1)) %>% 
                            DT::formatRound(c("baseMean","log2FoldChange", "lfcSE", "stat", "pvalue", "padj"), 3)

Convert tables:

## Function for manipulating gene tables
editDEGs <- function(DEGtable){
                  edited <- as.data.frame(DEGtable)
                  edited <- cbind(Gene_ID=rownames(edited), edited)
                  rownames(edited) <- NULL
                  edited <- edited %>%
                            dplyr::filter(baseMean > 0) %>%
                            dplyr::select(Gene_ID, log2FoldChange, lfcSE, pvalue, padj) %>%
                            dplyr::rename(Log2_FC = "log2FoldChange", 
                                   LFC_SE="lfcSE", 
                                   Pvalue="pvalue", 
                                   Padj ="padj") %>%
                            dplyr::mutate(Regulation = ifelse(Log2_FC > 0,"Up",
                                                ifelse(Log2_FC < 0,"Down","Intermediate"))) %>%
                            dplyr::mutate(Significant = ifelse(Pvalue < 0.05 & Padj < 0.05, "Yes","No"))

        return(edited)
}

## Apply on genes
B1896_Genes <- editDEGs(Results_B1896)
B1909_Genes <- editDEGs(Results_B1909)
## Apply on DEGs
B1896_DEGs <- editDEGs(B1896_Signif)
B1909_DEGs <- editDEGs(B1909_Signif)
## Apply on genes
Interaction_Genes <- editDEGs(Results_Interaction)
## Apply on DEGs
Interaction_DEGs <- editDEGs(Interaction_Signif)

2.2 Volcano-plot

Plot genes with significant fold-change differences in the interaction:

## Interaction
Interaction_Plot <- Interaction_Genes
Interaction_Plot$Padj[is.na(Interaction_Plot$Padj)] <- 1
Interaction_Plot$Significant[is.na(Interaction_Plot$Significant)] <- "No"
Interaction_Plot$Log_Padj <- -log10(Interaction_Plot$Padj)
Interaction_Plot <- Interaction_Plot %>%
            dplyr::mutate(Induction = ifelse(Regulation == "Up" & Significant =="Yes", "Positive",
                                    ifelse(Regulation == "Down" & Significant == "Yes", "Negative", 
                                    ifelse(Regulation == "Up" | Regulation =="Down" & Significant == "No", "No change", "No change"))))

Prepare plots:

## Interaction
Interaction_Vulcano <- ggplot2::ggplot(Interaction_Plot)+
ggplot2::geom_point(aes(x=Log2_FC, y=Log_Padj, colour=Induction),
                                               alpha=0.8,
                                               size=1.5)+
ggplot2::ylab(expression(-log[10]("FDR")))+
ggplot2::scale_colour_manual(name="Induction", 
                    breaks=c("Negative","No change", "Positive"),
                    values=c("dodgerblue4","darkslategray4","firebrick"),
                    labels=c("Negative","No change", "Positive"))+
ggplot2::labs(title="", x="Log2-fold change difference", y=expression(bold(paste(-log[10], " P-adjusted"))))+
ggplot2::geom_hline(yintercept=1.30103, colour="black", linetype="dashed", size = 0.5)+
ggplot2::geom_vline(xintercept=1, colour="black", linetype="dashed", size = 0.5)+
ggplot2::geom_vline(xintercept=-1, colour="black", linetype="dashed", size = 0.5)+
ggplot2::scale_y_continuous(expand=c(0,0), limits=c(0,30))+
ggplot2::scale_x_continuous(expand=c(0,0), limits=c(-50,50))+
ggplot2::theme_bw()+
    ggplot2::theme(
        legend.position = "bottom",
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        legend.title=element_blank(),
        legend.text=element_text(size=13, color="black", face="bold"),
        axis.text.x=element_text(size=13, color="blacK", face="bold"),
        axis.text.y=element_text(size=13, color="black", face="bold"),
        axis.title.x=element_text(size=13, color="blacK", face="bold"),
        axis.title.y=element_text(size=13, color="black", face="bold"),
        plot.margin=margin(1,1,1,1, "cm"),
        plot.title=element_text(size=13, hjust=0.5))

## Prepare stats
Interaction_Stats <- as.data.frame(table(Interaction_DEGs$Regulation))
colnames(Interaction_Stats) <-  c("Induction", "Interaction")

Plot:

Interaction_Vulcano

## Table
knitr::kable(Interaction_Stats) %>% kableExtra::kable_styling(bootstrap_options = c("striped"))

Induction	Interaction
Down	312
Up	542

2.3 DEG comparison

Prepare venn diagram:

wf <- windowsFonts()
## Prepare the venn diagramm
B1896_DEGs_UP <- subset(B1896_DEGs, Regulation == "Up")
B1896_DEGs_DOWN <- subset(B1896_DEGs, Regulation == "Down")
B1909_DEGs_UP <- subset(B1909_DEGs, Regulation == "Up")
B1909_DEGs_DOWN <- subset(B1909_DEGs, Regulation == "Down")

## Venn diagram
UP <- VennDiagram::venn.diagram(list(BRA1896=B1896_DEGs_UP$Gene_ID, BRA1909=B1909_DEGs_UP$Gene_ID),
                                        alpha = c(0.7,0.7), fill=c("darkslategray4", "deepskyblue2"), filename=NULL,
                                        cex=0.9, cat.cex=0.9, cat.fontface="bold",
                                        fontfamily=wf$Arial,
                                        fontface="bold",
                                        main.fontface="bold",
                                        main.cex=1.3,
                                        main.pos=c(0.5,1),
                                        cat.fontfamily=wf$Arial,
                                        main.fontfamily=wf$Arial,
                                        cat.pos=c(-160,160),
                                        cat.dist=c(0.04,0.04),
                                        margin=0.05,
                                        main="Upregulated")

DOWN <- VennDiagram::venn.diagram(list(BRA1896=B1896_DEGs_DOWN$Gene_ID, BRA1909=B1909_DEGs_DOWN$Gene_ID),
                                        alpha = c(0.7,0.7), fill=c("darkslategray4", "deepskyblue2"), filename=NULL,
                                        cex=0.9, cat.cex=0.9, cat.fontface="bold",
                                        fontfamily=wf$Arial,
                                        fontface="bold",
                                        main.fontface="bold",
                                        main.cex=1.3,
                                        main.pos=c(0.5,1),
                                        cat.fontfamily=wf$Arial,
                                        main.fontfamily=wf$Arial,
                                        cat.pos=c(-160,170),
                                        cat.dist=c(0.04,0.04),
                                        margin=0.03,
                                        main="Downregulated")
Venn <- cowplot::plot_grid(grobTree(UP), grobTree(DOWN), ncol=1, nrow=2, align ="v", scale=0.9)

Prepare histogram:

## Change labels
colnames(Interaction_Genes) <- c("Gene_ID","Log2-Difference (B1896-B1909)","LFC_SE",
                              "Pvalue","Padj", "Induction", "Significant")

## Prepare data frame
Induction <- c("Positive", "Negative")
Gene_Count <- as.numeric(c(542,312))
Interaction_Bar <- data.frame(Induction, Gene_Count)

Genotype <- c("BRA1896" , "BRA1896", "BRA1909","BRA1909")
Regulation <- c("Up", "Down","Up", "Down")
DEGs <- as.numeric(c("6630","1829","7209","3566"))
DEGs_Bar <- data.frame(Genotype, Regulation, DEGs)

## Make the first plot (DEGs for the two species)
DEGs_Bar_Plot <- ggplot2::ggplot(DEGs_Bar, aes(x=Genotype, y=DEGs, fill=Regulation))+
ggplot2::geom_bar(stat="identity", position=position_dodge2(reverse=TRUE))+
ggplot2::scale_y_continuous(expand=c(0,0), limits=c(0,8000))+
ggplot2::coord_cartesian(ylim=c(0,8000))+
ggplot2::scale_fill_manual(values=c("darkslategray4","darkslategray"), breaks=c("Up","Down"))+
ggplot2::annotate("text", x=0.77, y=7030, size=4.5, label="6630", fontface="bold")+
ggplot2::annotate("text", x=1.22, y=2229, size=4.5, label="1829", fontface="bold")+
ggplot2::annotate("text", x=1.77, y=7609, size=4.5, label="7209", fontface="bold")+
ggplot2::annotate("text", x=2.22, y=3966, size=4.5, label="3566", fontface="bold")+
ggplot2::labs(x="", y="DEGs")+
    ggplot2::theme_bw()+
        ggplot2::theme(panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        legend.title=element_text(size=12, face="bold"),
        legend.position = c(0.1,1.15),
        axis.title.x=element_text(size=12, face="bold"),
        legend.text=element_text(size=11, face="bold"),
        axis.text.x=element_text(size=12, color="black", face="bold"),
        axis.title.y=element_text(size=14, color="black", face="bold"),
        axis.text.y=element_text(size=12, color="black", face="bold"),
        axis.ticks.x =element_blank(),
        legend.direction ="vertical",
        axis.line.y = element_line(colour="black"),
        plot.margin=unit(c(3,0,0,1), "cm")) 
DEGs_Bar_Plot_Grid <- cowplot::plot_grid(DEGs_Bar_Plot, scale=1)

## Make the second plot (DEGs for the interaction)
DEGs_Interaction_Plot <- ggplot2::ggplot(Interaction_Bar, 
                                         aes(x=Induction, y=Gene_Count, fill=Induction))+
ggplot2::geom_bar(stat="identity", position=position_dodge2(reverse=TRUE))+
ggplot2::scale_y_continuous(expand=c(0,0), limits=c(0,600))+
ggplot2::scale_x_discrete(limits=c("Positive", "Negative"))+
ggplot2::coord_cartesian(ylim=c(0,600))+
ggplot2::scale_fill_manual(name="Log2-Difference \n(BRA1896-BRA1909)",
                           values=c("deepskyblue4","deepskyblue2"),
                           breaks=c("Positive","Negative"))+
ggplot2::annotate("text", x=1, y=562, size=4.5, label="542", fontface="bold")+
ggplot2::annotate("text", x=2, y=332, size=4.5, label="312", fontface="bold")+
ggplot2::labs(x="", y="DEGs")+
    theme_bw()+
        theme(panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        legend.title=element_text(size=12, face="bold"),
        legend.position = c(0.5,1.15),
        axis.title.x=element_text(size=12, face="bold"),
        legend.text=element_text(size=11, face="bold"),
        axis.text.x=element_blank(),
        axis.title.y=element_text(size=14, color="black", face="bold"),
        axis.text.y=element_text(size=12, color="black", face="bold"),
        axis.ticks.x =element_blank(),
        legend.direction ="vertical",
        axis.line.y = element_line(colour="black"),
        plot.margin=unit(c(3,3,-0.33,0), "cm")) 
Interaction_Bar_Plot <-  cowplot::plot_grid(DEGs_Interaction_Plot, scale=0.9)
## Combine plots
DEG_Comparison <- cowplot::plot_grid(DEGs_Bar_Plot_Grid, Venn, Interaction_Bar_Plot,
                                     ncol=3, nrow=1, align ="hv", rel_widths = c(1.25,1,1.1))

Plot:

#png("./Figures/DEG_Comparison.png", width=3000, height=1800, res=300, type="cairo")
DEG_Comparison

#dev.off()

3 Data export

Add gene informations:

## B1896 data
B1896_C_BR1 <- read.table("./ballgown/N1896Pet_C1/N1896Pet_C1_Genes.tab", sep="\t", header=TRUE)
B1896_C_BR2 <- read.table("./ballgown/N1896Pet_C2/N1896Pet_C2_Genes.tab", sep="\t", header=TRUE)
B1896_C_BR3 <- read.table("./ballgown/N1896Pet_C3/N1896Pet_C3_Genes.tab", sep="\t", header=TRUE)
B1896_I_BR1 <- read.table("./ballgown/N1896Pet_I1/N1896Pet_I1_Genes.tab", sep="\t", header=TRUE)
B1896_I_BR2 <- read.table("./ballgown/N1896Pet_I2/N1896Pet_I2_Genes.tab", sep="\t", header=TRUE)
B1896_I_BR3 <- read.table("./ballgown/N1896Pet_I3/N1896Pet_I3_Genes.tab", sep="\t", header=TRUE)
## B1909 data
B1909_C_BR1 <- read.table("./ballgown/N1909Pet_C1/N1909Pet_C1_Genes.tab", sep="\t", header=TRUE)
B1909_C_BR2 <- read.table("./ballgown/N1909Pet_C2/N1909Pet_C2_Genes.tab", sep="\t", header=TRUE)
B1909_C_BR3 <- read.table("./ballgown/N1909Pet_C3/N1909Pet_C3_Genes.tab", sep="\t", header=TRUE)
B1909_I_BR1 <- read.table("./ballgown/N1909Pet_I1/N1909Pet_I1_Genes.tab", sep="\t", header=TRUE)
B1909_I_BR2 <- read.table("./ballgown/N1909Pet_I2/N1909Pet_I2_Genes.tab", sep="\t", header=TRUE)
B1909_I_BR3 <- read.table("./ballgown/N1909Pet_I3/N1909Pet_I3_Genes.tab", sep="\t", header=TRUE)
## B1896 data
Trinity_B1896_C_BR1 <- read.table("./trinity/RSEM/B1896_C_BR1/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1896_C_BR2 <- read.table("./trinity/RSEM/B1896_C_BR2/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1896_C_BR3 <- read.table("./trinity/RSEM/B1896_C_BR3/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1896_I_BR1 <- read.table("./trinity/RSEM/B1896_I_BR1/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1896_I_BR2 <- read.table("./trinity/RSEM/B1896_I_BR2/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1896_I_BR3 <- read.table("./trinity/RSEM/B1896_I_BR3/RSEM.isoforms.results", sep="\t", header=TRUE)
## B1909 data
Trinity_B1909_C_BR1 <- read.table("./trinity/RSEM/B1909_C_BR1/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1909_C_BR2 <- read.table("./trinity/RSEM/B1909_C_BR2/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1909_C_BR3 <- read.table("./trinity/RSEM/B1909_C_BR3/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1909_I_BR1 <- read.table("./trinity/RSEM/B1909_I_BR1/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1909_I_BR2 <- read.table("./trinity/RSEM/B1909_I_BR2/RSEM.isoforms.results", sep="\t", header=TRUE)
Trinity_B1909_I_BR3 <- read.table("./trinity/RSEM/B1909_I_BR3/RSEM.isoforms.results", sep="\t", header=TRUE)

Merge reference and de novo gene infos:

## Prepare trinity tables to merge with the genes tables
prepTrinity <- function(trinityTable, referenceTable){

    trinity <- trinityTable %>%
               dplyr::select(transcript_id, FPKM, TPM) %>%
               dplyr::rename(Gene.ID = "transcript_id") %>%
               dplyr::mutate(Gene.Name = "-", Reference = "-", 
                             Strand = "-", Start = "-", End = "-",
                             Coverage = "-")

    merged <- rbind(referenceTable, trinity)

return(merged)

}

## Merge trinity and reference transcripts and genes
B1896_C_BR1_Merged <- prepTrinity(Trinity_B1896_C_BR1, B1896_C_BR1)
B1896_C_BR2_Merged <- prepTrinity(Trinity_B1896_C_BR2, B1896_C_BR2)
B1896_C_BR3_Merged <- prepTrinity(Trinity_B1896_C_BR3, B1896_C_BR3)
B1896_I_BR1_Merged <- prepTrinity(Trinity_B1896_I_BR1, B1896_I_BR1)
B1896_I_BR2_Merged <- prepTrinity(Trinity_B1896_I_BR2, B1896_I_BR2)
B1896_I_BR3_Merged <- prepTrinity(Trinity_B1896_I_BR3, B1896_I_BR3)
##
B1909_C_BR1_Merged <- prepTrinity(Trinity_B1909_C_BR1, B1909_C_BR1)
B1909_C_BR2_Merged <- prepTrinity(Trinity_B1909_C_BR2, B1909_C_BR2)
B1909_C_BR3_Merged <- prepTrinity(Trinity_B1909_C_BR3, B1909_C_BR3)
B1909_I_BR1_Merged <- prepTrinity(Trinity_B1909_I_BR1, B1909_I_BR1)
B1909_I_BR2_Merged <- prepTrinity(Trinity_B1909_I_BR2, B1909_I_BR2)
B1909_I_BR3_Merged <- prepTrinity(Trinity_B1909_I_BR3, B1909_I_BR3)

## Function
meanFPKM_parser <- function(FPKM_table1, FPKM_table2, FPKM_table3){
                  FPKM_table1 <- FPKM_table1 %>%
                                 dplyr::select(Gene.ID, Gene.Name, Reference, Strand, Start, End, FPKM) %>%
                                 dplyr::rename(Gene_ID ="Gene.ID", Gene_Name="Gene.Name")
                  FPKM_table2 <- FPKM_table2 %>%
                                 dplyr::select(Gene.ID, FPKM) %>%
                                 dplyr::rename(Gene_ID ="Gene.ID")          
                  FPKM_table3 <- FPKM_table3 %>%
                                 dplyr::select(Gene.ID, FPKM) %>%
                                 dplyr::rename(Gene_ID ="Gene.ID")
                  FPKM_table1 <- dplyr::left_join(FPKM_table1, FPKM_table2, by="Gene_ID") %>%
                                 dplyr::select(Gene_ID, Gene_Name, Reference, Strand, Start, End, FPKM.x, FPKM.y) %>%
                                 dplyr::rename(FPKM_1 ="FPKM.x", FPKM_2="FPKM.y") %>%
                                 dplyr::left_join(., FPKM_table3, by ="Gene_ID") %>%
                                 dplyr::rename(FPKM_3 = "FPKM") %>%
                                 dplyr::mutate(FPKM_Mean = ((FPKM_1 + FPKM_2 + FPKM_3)/3)) %>%
                                 dplyr::mutate(FPKM_SD = sqrt((((FPKM_1-FPKM_Mean)^2 + (FPKM_2-FPKM_Mean)^2 + (FPKM_3-FPKM_Mean)^2)/3))) %>%
                                 dplyr::select(Gene_ID, Gene_Name, Reference, Strand, Start, End, FPKM_Mean, FPKM_SD)
        return(FPKM_table1)
}

## B1896
B1896_C_Abundance <- meanFPKM_parser(B1896_C_BR1_Merged,B1896_C_BR2_Merged,B1896_C_BR3_Merged)
B1896_I_Abundance <- meanFPKM_parser(B1896_I_BR1_Merged,B1896_I_BR2_Merged,B1896_I_BR3_Merged)

## B1909
B1909_C_Abundance <- meanFPKM_parser(B1909_C_BR1_Merged,B1909_C_BR2_Merged,B1909_C_BR3_Merged)
B1909_I_Abundance <- meanFPKM_parser(B1909_I_BR1_Merged,B1909_I_BR2_Merged,B1909_I_BR3_Merged)

Estimate mean FPKM:

Note: The FPKM mean is not very precise here and is just to get an idea of the abundance of each gene. There are also differences between the log-fold change calculated by DESeq2 and by the FPKM values. The fold-change from DESeq2 is more precise!

## Calculate FC
FPKM_FC_calculater <- function(Mock, Inoculated, DEGs){

        print("Check data integrity...")
        print(table(Mock$Gene_ID %in% Inoculated$Gene_ID))
        print("... genes in both tables are identical")

                parsed <- dplyr::left_join(Mock, Inoculated, by="Gene_ID") %>%
                          dplyr::select(Gene_ID, Gene_Name.x, Reference.x, Strand.x, Start.x, End.x, FPKM_Mean.x, FPKM_SD.x, FPKM_Mean.y, FPKM_SD.y) %>%
                          dplyr::rename(Gene_Name="Gene_Name.x", Chromosome="Reference.x", Strand="Strand.x", Start="Start.x", End="End.x", 
                                        FPKM_Mock="FPKM_Mean.x", FPKM_Mock_SD="FPKM_SD.x", FPKM_Inoc="FPKM_Mean.y", FPKM_Inoc_SD="FPKM_SD.y") %>%
                          dplyr::select(Gene_ID, Gene_Name, Chromosome, Strand, Start, End, FPKM_Mock, FPKM_Mock_SD, FPKM_Inoc, FPKM_Inoc_SD) %>%
                          dplyr::mutate(FPKM_FC = log2(FPKM_Inoc/FPKM_Mock)) %>%
                          dplyr::rename(LOG2_FPKM_FC="FPKM_FC") %>%
                          dplyr::mutate(Regulation = ifelse(LOG2_FPKM_FC > 0,"Up",ifelse(LOG2_FPKM_FC < 0,"Down","Intermediate")))
 return(parsed)
 
}

## Apply
B1896_Genes_FPKM_Values <- FPKM_FC_calculater(B1896_C_Abundance, B1896_I_Abundance)

## [1] "Check data integrity..."
## 
##   TRUE 
## 276044 
## [1] "... genes in both tables are identical"

B1909_Genes_FPKM_Values <- FPKM_FC_calculater(B1909_C_Abundance, B1909_I_Abundance)

## [1] "Check data integrity..."
## 
##   TRUE 
## 276044 
## [1] "... genes in both tables are identical"

## B1896
B1896_Genes <- dplyr::left_join(B1896_Genes, B1896_Genes_FPKM_Values, by="Gene_ID") %>%
               dplyr::select(Gene_ID, LOG2_FPKM_FC, Log2_FC, LFC_SE, Pvalue, Padj, Regulation.x, Significant, Chromosome, Strand, Start, End,
                             FPKM_Mock, FPKM_Inoc) %>%
               dplyr::rename(Regulation="Regulation.x") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, FPKM_Mock, FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)
B1896_DEGs <- dplyr::left_join(B1896_DEGs, B1896_Genes_FPKM_Values, by="Gene_ID") %>%
               dplyr::select(Gene_ID, LOG2_FPKM_FC, Log2_FC, LFC_SE, Pvalue, Padj, Regulation.x, Significant, Chromosome, Strand, Start, End,
                             FPKM_Mock, FPKM_Inoc) %>%
               dplyr::rename(Regulation="Regulation.x") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, FPKM_Mock, FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)
## B1909
B1909_Genes <- dplyr::left_join(B1909_Genes, B1909_Genes_FPKM_Values, by="Gene_ID") %>%
               dplyr::select(Gene_ID, LOG2_FPKM_FC, Log2_FC, LFC_SE, Pvalue, Padj, Regulation.x, Significant, Chromosome, Strand, Start, End,
                             FPKM_Mock, FPKM_Inoc) %>%
               dplyr::rename(Regulation="Regulation.x") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, FPKM_Mock, FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)
B1909_DEGs <- dplyr::left_join(B1909_DEGs, B1909_Genes_FPKM_Values, by="Gene_ID") %>%
               dplyr::select(Gene_ID, LOG2_FPKM_FC, Log2_FC, LFC_SE, Pvalue, Padj, Regulation.x, Significant, Chromosome, Strand, Start, End,
                             FPKM_Mock, FPKM_Inoc) %>%
               dplyr::rename(Regulation="Regulation.x") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, FPKM_Mock, FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)

Add major isoform:

Note: Calculated with: Get_Isoforms.pl and Calculate_Major_Isoform (R-script)

## Load
B1896_Major_Iso <-  read.table("./ballgown/B1896_Major_Isoform.txt", sep="\t", header=TRUE)
B1909_Major_Iso <-  read.table("./ballgown/B1909_Major_Isoform.txt", sep="\t", header=TRUE)
## Prepare trinity information
Trinity_Iso <-  Trinity_B1896_C_BR1 %>%
               dplyr::select(transcript_id) %>%
               dplyr::mutate(Gene_ID = transcript_id, Iso_FPKM = "-") %>%
               dplyr::rename(Major_Iso = "transcript_id")
## Merge both tables
B1896_Major_Iso_Merged <- rbind(B1896_Major_Iso, Trinity_Iso)
B1909_Major_Iso_Merged <- rbind(B1909_Major_Iso, Trinity_Iso)
## Add to the gene files
B1896_Genes <- dplyr::left_join(B1896_Genes, B1896_Major_Iso_Merged, by ="Gene_ID") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock,
                    FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)
B1909_Genes <- dplyr::left_join(B1909_Genes, B1896_Major_Iso_Merged, by ="Gene_ID") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock,
                    FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)
B1896_DEGs <- dplyr::left_join(B1896_DEGs, B1896_Major_Iso_Merged, by ="Gene_ID") %>%
              dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock,
                    FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)
B1909_DEGs <- dplyr::left_join(B1909_DEGs, B1896_Major_Iso_Merged, by ="Gene_ID") %>%
              dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock,
                    FPKM_Inoc, Log2_FC, LFC_SE, Pvalue, Padj, Regulation, Significant)

Add transcript length:

## Load
Transcript_Length <- read.table("./Sequences/Merged_Seq_Length.txt", sep="\t", header=FALSE, quote="")
colnames(Transcript_Length) <- c("Major_Iso", "Length")

## Add
B1896_Genes <- dplyr::left_join(B1896_Genes, Transcript_Length, by ="Major_Iso") %>% rename(Transcript_Length="Length")
B1909_Genes <- dplyr::left_join(B1909_Genes, Transcript_Length, by ="Major_Iso") %>% rename(Transcript_Length="Length")
B1896_DEGs <- dplyr::left_join(B1896_DEGs, Transcript_Length, by ="Major_Iso") %>% rename(Transcript_Length="Length")
B1909_DEGs <- dplyr::left_join(B1909_DEGs, Transcript_Length, by ="Major_Iso") %>% rename(Transcript_Length="Length")

Add BLAST results:

## Load
Refseq <- read.table("./Annotations/Merged_Sequences_against_Refseq", sep="\t", header=FALSE, quote="")
Ath <- read.table("./Annotations/Merged_Sequences_against_Ath", sep="\t", header=FALSE, quote="")

## Give header
colnames(Refseq) <- c("Major_Iso", "Refseq_ID", "pident", "length", "mismatch", "gapopen", "qstart", "qend","sstart", "send", "Refseq_evalue", "bitscore")
colnames(Ath) <- c("Major_Iso", "Ath_ID", "pident", "length", "mismatch", "gapopen", "qstart", "qend","sstart", "send", "Ath_evalue", "bitscore")

## Add refseq
B1896_Genes <- dplyr::left_join(B1896_Genes, Refseq, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue)
B1896_DEGs <- dplyr::left_join(B1896_DEGs, Refseq, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue)
B1909_Genes <- dplyr::left_join(B1909_Genes, Refseq, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue)
B1909_DEGs <- dplyr::left_join(B1909_DEGs, Refseq, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue)

## Add ath
B1896_Genes <- dplyr::left_join(B1896_Genes, Ath, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue)
B1896_DEGs <- dplyr::left_join(B1896_DEGs, Ath, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue)
B1909_Genes <- dplyr::left_join(B1909_Genes, Ath, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue)
B1909_DEGs <- dplyr::left_join(B1909_DEGs, Ath, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length, Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue)

Add Uniprot-IDs:

## Load
Uniprot <- read.table("./Annotations/Transcripts2Uniprot.txt", sep="\t", header=FALSE, quote="")
colnames(Uniprot) <- c("Major_Iso", "Uniprot_ID", "pident", "Uniprot_evalue")
## Remove duplicate elements
Uniprot <- Uniprot[!duplicated(Uniprot$Major_Iso),]

## Add info
B1896_Genes <- dplyr::left_join(B1896_Genes, Uniprot, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length,
                             Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue, Uniprot_ID, Uniprot_evalue)
B1896_DEGs <- dplyr::left_join(B1896_DEGs, Uniprot, by ="Major_Iso") %>%
              dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length,
                             Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue, Uniprot_ID, Uniprot_evalue)
B1909_Genes <- dplyr::left_join(B1909_Genes, Uniprot, by ="Major_Iso") %>%
               dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length,
                             Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue, Uniprot_ID, Uniprot_evalue)
B1909_DEGs <- dplyr::left_join(B1909_DEGs, Uniprot, by ="Major_Iso") %>%
              dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, FPKM_Mock, FPKM_Inoc, Log2_FC,
                             LFC_SE, Pvalue, Padj, Regulation, Significant, Transcript_Length,
                             Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue, Uniprot_ID, Uniprot_evalue)

Prepare interaction tables:

## Create gene infos
Gene_Info <- B1896_Genes %>% select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, Transcript_Length, Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue, Uniprot_ID, Uniprot_evalue)

## Change labels
colnames(Interaction_Genes) <- c("Gene_ID","Log2-Difference (B1896-B1909)","LFC_SE","Pvalue","Padj", "Induction", "Significant")
colnames(Interaction_DEGs) <- c("Gene_ID","Log2-Difference (B1896-B1909)","LFC_SE","Pvalue","Padj", "Induction", "Significant")

## Add to the tables for the interaction
Interaction_DEGs <- dplyr::left_join(Interaction_DEGs, Gene_Info, by = "Gene_ID" ) %>%
                    dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, "Log2-Difference (B1896-B1909)", LFC_SE, Pvalue, Padj,
                                Induction, Significant, Transcript_Length, Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue, Uniprot_ID, Uniprot_evalue)
Interaction_Genes <- dplyr::left_join(Interaction_Genes, Gene_Info, by = "Gene_ID" ) %>%
                     dplyr::select(Chromosome, Strand, Start, End, Gene_ID, Major_Iso, "Log2-Difference (B1896-B1909)", LFC_SE, Pvalue, Padj,
                                Induction, Significant, Transcript_Length, Refseq_ID, Refseq_evalue, Ath_ID, Ath_evalue, Uniprot_ID, Uniprot_evalue)

3.1 Final tables

Export tables:

write.table(B1896_Genes, "./DESeq2/B1896_Genes.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(B1909_Genes, "./DESeq2/B1909_Genes.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(B1896_DEGs, "./DESeq2/B1896_DEGs.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(B1909_DEGs, "./DESeq2/B1909_DEGs.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(Interaction_DEGs, "./DESeq2/Interaction_DEGs.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(Interaction_Genes, "./DESeq2/Interaction_Genes.txt", sep="\t", quote=FALSE, row.names=FALSE)

Finished!

sessionInfo()

## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.1252    
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] extrafont_0.17              kableExtra_1.3.4           
##  [3] RColorBrewer_1.1-2          ggrepel_0.9.1              
##  [5] officer_0.3.18              ComplexHeatmap_2.2.0       
##  [7] dplyr_1.0.5                 gplots_3.1.1               
##  [9] VennDiagram_1.6.20          futile.logger_1.4.3        
## [11] ggpubr_0.4.0                gridExtra_2.3              
## [13] readxl_1.3.1                gdata_2.18.0               
## [15] cowplot_1.1.1               ggplot2_3.3.3              
## [17] DESeq2_1.26.0               SummarizedExperiment_1.16.1
## [19] DelayedArray_0.12.3         BiocParallel_1.20.1        
## [21] matrixStats_0.58.0          Biobase_2.46.0             
## [23] GenomicRanges_1.38.0        GenomeInfoDb_1.22.1        
## [25] IRanges_2.20.2              S4Vectors_0.24.4           
## [27] BiocGenerics_0.32.0        
## 
## loaded via a namespace (and not attached):
##   [1] uuid_0.1-4             backports_1.2.1        circlize_0.4.12       
##   [4] Hmisc_4.5-0            systemfonts_1.0.1      splines_3.6.3         
##   [7] crosstalk_1.1.1        digest_0.6.27          htmltools_0.5.1.1     
##  [10] fansi_0.4.2            magrittr_2.0.1         checkmate_2.0.0       
##  [13] memoise_2.0.0          cluster_2.1.0          openxlsx_4.2.3        
##  [16] annotate_1.64.0        extrafontdb_1.0        svglite_2.0.0         
##  [19] jpeg_0.1-8.1           colorspace_2.0-0       blob_1.2.1            
##  [22] rvest_1.0.0            haven_2.4.1            xfun_0.21             
##  [25] jsonlite_1.7.2         crayon_1.4.1           RCurl_1.98-1.3        
##  [28] genefilter_1.68.0      survival_3.1-8         glue_1.4.2            
##  [31] gtable_0.3.0           zlibbioc_1.32.0        XVector_0.26.0        
##  [34] webshot_0.5.2          GetoptLong_1.0.5       car_3.0-10            
##  [37] Rttf2pt1_1.3.8         shape_1.4.5            abind_1.4-5           
##  [40] scales_1.1.1           futile.options_1.0.1   DBI_1.1.1             
##  [43] rstatix_0.7.0          Rcpp_1.0.6             viridisLite_0.4.0     
##  [46] xtable_1.8-4           htmlTable_2.1.0        clue_0.3-59           
##  [49] foreign_0.8-75         bit_4.0.4              Formula_1.2-4         
##  [52] DT_0.18                htmlwidgets_1.5.3      httr_1.4.2            
##  [55] ellipsis_0.3.1         farver_2.1.0           pkgconfig_2.0.3       
##  [58] XML_3.99-0.3           nnet_7.3-12            locfit_1.5-9.4        
##  [61] utf8_1.1.4             labeling_0.4.2         tidyselect_1.1.1      
##  [64] rlang_0.4.10           AnnotationDbi_1.48.0   munsell_0.5.0         
##  [67] cellranger_1.1.0       tools_3.6.3            cachem_1.0.4          
##  [70] generics_0.1.0         RSQLite_2.2.3          broom_0.7.6           
##  [73] evaluate_0.14          stringr_1.4.0          fastmap_1.1.0         
##  [76] yaml_2.2.1             knitr_1.33             bit64_4.0.5           
##  [79] zip_2.1.1              caTools_1.18.2         purrr_0.3.4           
##  [82] formatR_1.9            xml2_1.3.2             compiler_3.6.3        
##  [85] rstudioapi_0.13        curl_4.3               png_0.1-7             
##  [88] ggsignif_0.6.1         tibble_3.0.4           geneplotter_1.64.0    
##  [91] stringi_1.5.3          highr_0.9              forcats_0.5.1         
##  [94] lattice_0.20-38        Matrix_1.2-18          vctrs_0.3.6           
##  [97] pillar_1.6.0           lifecycle_1.0.0        GlobalOptions_0.1.2   
## [100] data.table_1.14.0      bitops_1.0-7           R6_2.5.0              
## [103] latticeExtra_0.6-29    KernSmooth_2.23-16     rio_0.5.26            
## [106] lambda.r_1.2.4         gtools_3.8.2           assertthat_0.2.1      
## [109] rjson_0.2.20           withr_2.4.2            GenomeInfoDbData_1.2.2
## [112] hms_1.0.0              rpart_4.1-15           tidyr_1.1.3           
## [115] rmarkdown_2.7          carData_3.0-4          base64enc_0.1-3

DESeq2 analysis

Thomas Bergmann

23 September, 2021

1 Data import

1.1 Sample-to-sample distance

1.2 PCA-plot

1.3 Cook’s distance

2 Results

2.1 DEG-tables

2.2 Volcano-plot

2.3 DEG comparison

3 Data export

3.1 Final tables