Analysis is performed according to Young et al. 2010

Abbreviations:

B. villosa = B1896
B. oleracea = B1909

1 Data import

Library import:

## Clear workspace###
rm(list=ls())

## Load libraries###
library(ggplot2)
library(cowplot)
library(gdata)

## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

##

## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

## 
## Attaching package: 'gdata'

## The following object is masked from 'package:stats':
## 
##     nobs

## The following object is masked from 'package:utils':
## 
##     object.size

## The following object is masked from 'package:base':
## 
##     startsWith

library(readxl)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:gdata':
## 
##     combine

library(ggpubr)

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:cowplot':
## 
##     get_legend

library(VennDiagram)

## Loading required package: grid

## Loading required package: futile.logger

## 
## Attaching package: 'VennDiagram'

## The following object is masked from 'package:ggpubr':
## 
##     rotate

library(grid)
library(futile.logger)
library(gplots)

## Registered S3 method overwritten by 'gplots':
##   method         from 
##   reorder.factor gdata

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:gdata':
## 
##     reorder.factor

## The following object is masked from 'package:stats':
## 
##     lowess

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following objects are masked from 'package:gdata':
## 
##     combine, first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Data import:

B1896_Genes <- read.table("./DESeq2/B1896_Genes.txt", sep="\t", header=TRUE, quote="")
B1909_Genes <- read.table("./DESeq2/B1909_Genes.txt", sep="\t", header=TRUE, quote="")
Interaction_Genes <- read.table("./DESeq2/Interaction_Genes.txt", sep="\t", header=TRUE, quote="")

Preparation of tables:

## Function to reduce the tables
makeGOtable <- function(genetable){
                table2GO <- genetable %>% select(Major_Iso, Significant, Transcript_Length)
    return(table2GO)
}

## Apply
B1896_Genes <- makeGOtable(B1896_Genes)
Bn1909_Genes <- makeGOtable(B1909_Genes)
Interaction_Genes <- makeGOtable(Interaction_Genes)

## Subset
B1896_Genes_2GO <- B1896_Genes %>% dplyr::select(Major_Iso, Significant)
B1909_Genes_2GO <- B1909_Genes %>% dplyr::select(Major_Iso, Significant)
Interaction_Genes_2GO <- Interaction_Genes %>% dplyr::select(Major_Iso, Significant)

## Filter DEGs
B1896_DEGs <- B1896_Genes_2GO %>%
               dplyr::filter(Significant == "Yes") %>%
               dplyr::rename(ID4GO ="Major_Iso") %>%
               dplyr::select(ID4GO)

B1909_DEGs <- B1909_Genes_2GO %>%
               dplyr::filter(Significant == "Yes") %>%
               dplyr::rename(ID4GO ="Major_Iso") %>%
               dplyr::select(ID4GO)

Interaction_DEGs <- Interaction_Genes_2GO %>%
                dplyr::filter(Significant == "Yes") %>%
                dplyr::rename(ID4GO ="Major_Iso") %>%
                dplyr::select(ID4GO)

## Transcriptome background
B1896_2GO <- B1896_Genes_2GO %>%
               dplyr::rename(ID4GO ="Major_Iso") %>%
               dplyr::select(ID4GO)

B1909_2GO <- B1909_Genes_2GO %>%
               dplyr::rename(ID4GO ="Major_Iso") %>%
               dplyr::select(ID4GO)

Interaction_2GO <- Interaction_Genes_2GO %>%
               dplyr::rename(ID4GO ="Major_Iso") %>%
               dplyr::select(ID4GO)

## Change structure
B1896_DEGs <- as.character(B1896_DEGs$ID4GO)
B1896_2GO <- as.character(B1896_2GO$ID4GO)

B1909_DEGs <- as.character(B1909_DEGs$ID4GO)
B1909_2GO <- as.character(B1909_2GO$ID4GO)

Interaction_DEGs <- as.character(Interaction_DEGs$ID4GO)
Interaction_2GO <- as.character(Interaction_2GO$ID4GO)

## Mark DEGs in genes
B1896List <- as.integer(B1896_2GO %in% B1896_DEGs)          
names(B1896List) <- B1896_2GO

B1909List <- as.integer(B1909_2GO %in% B1909_DEGs)          
names(B1909List) <- B1909_2GO

InteractionList <- as.integer(Interaction_2GO %in% Interaction_DEGs)            
names(InteractionList) <- Interaction_2GO

## Prepare transcript length
B1896_Length <- as.numeric(B1896_Genes$Transcript_Length)
B1909_Length <- as.numeric(B1909_Genes$Transcript_Length)
Interaction_Length <- as.numeric(Interaction_Genes$Transcript_Length)

Import of the geneID2GO mapping-file:

Note: The geneID2GO-file was once prepared with the muted code.

## Load the file
#geneID2GO <- read.table("./Annotations/gene2GO.txt", sep="\t", quote="")
#geneID2GO <- geneID2GO %>% dplyr::select(V1,V5) %>% na.omit()           
#str(head(geneID2GO))

##convert structure
#geneID2GO$V1 <- as.character(geneID2GO$V1)
#geneID2GO$V2 <- as.character(geneID2GO$V5)
#str(head(geneID2GO))

## Convert data-frame to list
#prepareList <- function(table){
#    myList = list()
#    for(i in 1:nrow(table)){
#        id <- table$V1[i]
#        go <- unlist(strsplit(table$V2[i],","))
#        myList[[id]] <- go
#    }
#    return(myList)
#}

#geneID2GO <- prepareList(geneID2GO)
#str(head(geneID2GO))

## Load geneID2GO-mappinf file
load(file="./Annotations/geneID2GO.RData")

2 Analysis

Load GOseq-package:

library(goseq)

## Loading required package: BiasedUrn

## Loading required package: geneLenDataBase

##

Calculate PWF:

For B1896:

pwfB1896=goseq::nullp(B1896List, bias.data=B1896_Length)

## Warning in pcls(G): initial point very close to some inequality constraints

head(pwfB1896)

##                          DEgenes bias.data        pwf
## transcript:Bo4g001040.1        0      1443 0.14768834
## transcript:Bo5g007190.1        0       912 0.10219284
## transcript:Bo25456s010.1       0       162 0.03660146
## transcript:Bo1g034650.1        0       261 0.04531026
## transcript:Bo5g151420.1        0      2493 0.23528946
## transcript:Bo5g151430.1        1      1128 0.12081270

For B1909:

pwfB1909=goseq::nullp(B1909List, bias.data=B1909_Length)

## Warning in pcls(G): initial point very close to some inequality constraints

head(pwfB1909)

##                          DEgenes bias.data        pwf
## transcript:Bo4g001040.1        0      1443 0.18931312
## transcript:Bo5g007190.1        0       912 0.13485648
## transcript:Bo25456s010.1       0       162 0.05307574
## transcript:Bo1g034650.1        0       261 0.06405040
## transcript:Bo5g151420.1        0      2493 0.28595095
## transcript:Bo5g151430.1        1      1128 0.15745119

For the interaction:

pwfInt=goseq::nullp(InteractionList, bias.data=Interaction_Length)

## Warning in pcls(G): initial point very close to some inequality constraints

head(pwfInt)

##                          DEgenes bias.data         pwf
## transcript:Bo4g001040.1        0      1443 0.015626114
## transcript:Bo5g007190.1        0       912 0.013251793
## transcript:Bo25456s010.1       0       162 0.006564926
## transcript:Bo1g034650.1        0       261 0.007574999
## transcript:Bo5g151420.1        0      2493 0.016488394
## transcript:Bo5g151430.1        0      1128 0.014498556

Perform analysis and adjust P-values according to Benjamini & Hochberg:

## Perform analysis
B1896_GO=goseq::goseq(pwfB1896, gene2cat=geneID2GO, use_genes_without_cat=TRUE)

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

B1909_GO=goseq::goseq(pwfB1909, gene2cat=geneID2GO, use_genes_without_cat=TRUE)

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

Int_GO=goseq::goseq(pwfInt, gene2cat=geneID2GO, use_genes_without_cat=TRUE)

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## Function to adjust via FDR
FDRcalculater <- function(GOtable){
            FDR_Over <- p.adjust(GOtable$over_represented_pvalue, method="BH")
            as.data.frame(FDR_Over)
            FDR_Under <- p.adjust(GOtable$under_represented_pvalue, method="BH")
            as.data.frame(FDR_Under)
            GOtableparsed <- cbind(GOtable, FDR_Over, FDR_Under)

        return(GOtableparsed)

}

## Apply
B1896_GO <- FDRcalculater(B1896_GO)
B1909_GO <- FDRcalculater(B1909_GO)
Int_GO <- FDRcalculater(Int_GO)

##prepare tables
B1896_GO <- B1896_GO %>% 
            dplyr::select(category, numDEInCat, numInCat, term, ontology, FDR_Over, FDR_Under) %>%
            dplyr::rename(Category="category", DEGs = "numDEInCat", Genes = "numInCat", Term ="term", Ontology ="ontology")
B1909_GO <- B1909_GO %>% 
            dplyr::select(category, numDEInCat, numInCat, term, ontology, FDR_Over, FDR_Under) %>%
            dplyr::rename(Category="category", DEGs = "numDEInCat", Genes = "numInCat", Term ="term", Ontology ="ontology")
Int_GO <- Int_GO %>% 
            dplyr::select(category, numDEInCat, numInCat, term, ontology, FDR_Over, FDR_Under) %>%
            dplyr::rename(Category="category", DEGs = "numDEInCat", Genes = "numInCat", Term ="term", Ontology ="ontology")

3 Results

Statistics:

### GO stats ###

## Remove 'NAs' row from the tables
B1896_GO <- B1896_GO %>% na.omit(cols="Category")
B1909_GO <- B1909_GO %>% na.omit(cols="Category")
Int_GO <- Int_GO %>% na.omit(cols="Category")

## Over stats
B1896_GOOverStats <- as.data.frame(table(B1896_GO$FDR_Over < 0.05))
colnames(B1896_GOOverStats) <-  c("B1896", "Overrepresented")

## Under stats
B1896_GOUnderStats <- as.data.frame(table(B1896_GO$FDR_Under < 0.05))
colnames(B1896_GOUnderStats) <-  c("B1896", "Underrepresented")

B1896_GOStats <- cbind(B1896_GOOverStats[2,],B1896_GOUnderStats[2,])
rownames(B1896_GOStats) <- c("B1896")
B1896_GOStats <- subset(B1896_GOStats, select= -c(1,3))

## Over stats
B1909_GOOverStats <- as.data.frame(table(B1909_GO$FDR_Over < 0.05))
colnames(B1909_GOOverStats) <-  c("B1909", "Overrepresented")

## Under stats
B1909_GOUnderStats <- as.data.frame(table(B1909_GO$FDR_Under < 0.05))
colnames(B1909_GOUnderStats) <-  c("B1909", "Underrepresented")

B1909_GOStats <- cbind(B1909_GOOverStats[2,],B1909_GOUnderStats[2,])
rownames(B1909_GOStats) <- c("B1909")
B1909_GOStats <- subset(B1909_GOStats, select= -c(1,3))

## Over stats
Int_GOOverStats <- as.data.frame(table(Int_GO$FDR_Over < 0.05))
colnames(Int_GOOverStats) <-  c("Int", "Overrepresented")

## Under stats
Int_GOUnderStats <- as.data.frame(table(Int_GO$FDR_Under < 0.05))
colnames(Int_GOUnderStats) <-  c("Int", "Underrepresented")

Int_GOStats <- cbind(Int_GOOverStats[2,],Int_GOUnderStats[2,])
rownames(Int_GOStats) <- c("Interaction")
Int_GOStats <- subset(Int_GOStats, select= -c(1,3))
Int_GOStats[[2]] <- "0"

## Merge all GO stats
GO_Stats <- rbind(B1896_GOStats, B1909_GOStats, Int_GOStats)
knitr::kable(GO_Stats) %>% kableExtra::kable_styling(bootstrap_options = c("striped"))

	Overrepresented	Underrepresented
B1896	286	160
B1909	221	78
Interaction	115	0

3.1 B1896

DT::datatable(B1896_GO, filter="top", options=list(pageLength=5))

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

3.2 B1909

DT::datatable(B1909_GO, filter="top", options=list(pageLength=5))

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

3.3 Interaction

DT::datatable(Int_GO, filter="top", options=list(pageLength=5))

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Subdivide the results by their GO category:

## B1896
B1896_BP <- B1896_GO %>% filter(B1896_GO$Ontology == "BP")
B1896_CC <- B1896_GO %>% filter(B1896_GO$Ontology == "CC")
B1896_MF <- B1896_GO %>% filter(B1896_GO$Ontology == "MF")

## B1909
B1909_BP <- B1909_GO %>% filter(B1909_GO$Ontology == "BP")
B1909_CC <- B1909_GO %>% filter(B1909_GO$Ontology == "CC")
B1909_MF <- B1909_GO %>% filter(B1909_GO$Ontology == "MF")

## Interaction
Int_BP <- Int_GO %>% filter(Int_GO$Ontology == "BP")
Int_CC <- Int_GO %>% filter(Int_GO$Ontology == "CC")
Int_MF <- Int_GO %>% filter(Int_GO$Ontology == "MF")

4 Export

Export tables:

write.table(B1896_BP, "./GOseq/B1896BP.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(B1896_MF, "./GOseq/B1896MF.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(B1896_CC, "./GOseq/B1896CC.txt", sep="\t", quote=FALSE, row.names=FALSE)

write.table(B1909_BP, "./GOseq/B1909BP.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(B1909_MF, "./GOseq/B1909MF.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(B1909_CC, "./GOseq/B1909CC.txt", sep="\t", quote=FALSE, row.names=FALSE)

write.table(Int_BP, "./GOseq/IntBP.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(Int_MF, "./GOseq/IntMF.txt", sep="\t", quote=FALSE, row.names=FALSE)
write.table(Int_CC, "./GOseq/IntCC.txt", sep="\t", quote=FALSE, row.names=FALSE)

Finished!

sessionInfo()

## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.1252    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] goseq_1.38.0           geneLenDataBase_1.22.0 BiasedUrn_1.07        
##  [4] dplyr_1.0.5            gplots_3.1.1           VennDiagram_1.6.20    
##  [7] futile.logger_1.4.3    ggpubr_0.4.0           gridExtra_2.3         
## [10] readxl_1.3.1           gdata_2.18.0           cowplot_1.1.1         
## [13] ggplot2_3.3.3         
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-0            ggsignif_0.6.1             
##   [3] ellipsis_0.3.1              rio_0.5.26                 
##   [5] XVector_0.26.0              GenomicRanges_1.38.0       
##   [7] rstudioapi_0.13             DT_0.18                    
##   [9] bit64_4.0.5                 AnnotationDbi_1.48.0       
##  [11] fansi_0.4.2                 xml2_1.3.2                 
##  [13] splines_3.6.3               cachem_1.0.4               
##  [15] knitr_1.33                  jsonlite_1.7.2             
##  [17] Rsamtools_2.2.3             broom_0.7.6                
##  [19] GO.db_3.10.0                dbplyr_2.1.1               
##  [21] compiler_3.6.3              httr_1.4.2                 
##  [23] backports_1.2.1             assertthat_0.2.1           
##  [25] Matrix_1.2-18               fastmap_1.1.0              
##  [27] formatR_1.9                 htmltools_0.5.1.1          
##  [29] prettyunits_1.1.1           tools_3.6.3                
##  [31] gtable_0.3.0                glue_1.4.2                 
##  [33] GenomeInfoDbData_1.2.2      rappdirs_0.3.3             
##  [35] Rcpp_1.0.6                  carData_3.0-4              
##  [37] Biobase_2.46.0              cellranger_1.1.0           
##  [39] vctrs_0.3.6                 Biostrings_2.54.0          
##  [41] svglite_2.0.0               nlme_3.1-144               
##  [43] rtracklayer_1.46.0          crosstalk_1.1.1            
##  [45] xfun_0.21                   stringr_1.4.0              
##  [47] rvest_1.0.0                 openxlsx_4.2.3             
##  [49] lifecycle_1.0.0             gtools_3.8.2               
##  [51] rstatix_0.7.0               XML_3.99-0.3               
##  [53] zlibbioc_1.32.0             scales_1.1.1               
##  [55] hms_1.0.0                   parallel_3.6.3             
##  [57] SummarizedExperiment_1.16.1 lambda.r_1.2.4             
##  [59] yaml_2.2.1                  curl_4.3                   
##  [61] memoise_2.0.0               biomaRt_2.42.1             
##  [63] stringi_1.5.3               RSQLite_2.2.3              
##  [65] highr_0.9                   S4Vectors_0.24.4           
##  [67] GenomicFeatures_1.38.2      caTools_1.18.2             
##  [69] BiocGenerics_0.32.0         zip_2.1.1                  
##  [71] BiocParallel_1.20.1         GenomeInfoDb_1.22.1        
##  [73] systemfonts_1.0.1           rlang_0.4.10               
##  [75] pkgconfig_2.0.3             bitops_1.0-7               
##  [77] matrixStats_0.58.0          evaluate_0.14              
##  [79] lattice_0.20-38             purrr_0.3.4                
##  [81] htmlwidgets_1.5.3           GenomicAlignments_1.22.1   
##  [83] bit_4.0.4                   tidyselect_1.1.1           
##  [85] magrittr_2.0.1              R6_2.5.0                   
##  [87] IRanges_2.20.2              generics_0.1.0             
##  [89] DelayedArray_0.12.3         DBI_1.1.1                  
##  [91] mgcv_1.8-31                 pillar_1.6.0               
##  [93] haven_2.4.1                 foreign_0.8-75             
##  [95] withr_2.4.2                 abind_1.4-5                
##  [97] RCurl_1.98-1.3              tibble_3.0.4               
##  [99] crayon_1.4.1                car_3.0-10                 
## [101] futile.options_1.0.1        KernSmooth_2.23-16         
## [103] utf8_1.1.4                  BiocFileCache_1.10.2       
## [105] rmarkdown_2.7               progress_1.2.2             
## [107] data.table_1.14.0           blob_1.2.1                 
## [109] forcats_0.5.1               webshot_0.5.2              
## [111] digest_0.6.27               tidyr_1.1.3                
## [113] openssl_1.4.3               stats4_3.6.3               
## [115] munsell_0.5.0               viridisLite_0.4.0          
## [117] kableExtra_1.3.4            askpass_1.1

GOseq analysis

Thomas Bergmann

23 September, 2021