################################################################################
# 1. Get the list of packages you have installed, 
#    use priority to exclude base and recommended packages.
#    that may have been distributed with R.
# (pkgList <- installed.packages(priority='NA')[,'Package'])

# 2. Find out which packages are on CRAN and R-Forge.  Because
#    of R-Forge build capacity is currently limiting the number of
#    binaries available, it is queried for source packages only.
# (CRANpkgs <- available.packages(contriburl=contrib.url('http://cran.r-project.org'))[,'Package'])
# (forgePkgs <- available.packages(contriburl=contrib.url('http://r-forge.r-project.org', type='source'))[,'Package'])

# 3. Calculate the set of packages which are installed on your machine,
#    not on CRAN but also present on R-Force.
# (pkgsToUp <- intersect(setdiff(pkgList, CRANpkgs), forgePkgs))

# 4. Update the packages, using oldPkgs to restrict the list considered.
# update.packages(checkBuilt=TRUE, ask=FALSE, repos="http://r-forge.r-project.org", oldPkgs=pkgsToUp)
################################################################################

# Update all packages
# update.packages(ask = FALSE, dependencies = c('Suggests'))
source("https://bioconductor.org/biocLite.R")
## Bioconductor version 3.4 (BiocInstaller 1.24.0), ?biocLite for help
## A new version of Bioconductor is available after installing the most
##   recent version of R; see http://bioconductor.org/install
if (!require("GEOquery")) {
  biocLite("GEOquery")
}
## Loading required package: GEOquery
## Warning: package 'GEOquery' was built under R version 3.3.1
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 3.3.1
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 3.3.1
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colnames,
##     do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff,
##     sort, table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(GEOquery)

if (!require("Biobase")) {
  biocLite("Biobase")
}
library(Biobase)

if (!require("limma")) {
  biocLite("limma")
}
## Loading required package: limma
## Warning: package 'limma' was built under R version 3.3.3
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
library(limma)
# source("http://bioconductor.org/biocLite.R")
# biocLite("genefilter")
# library(genefilter)
if (!require("plyr")) {
  install.packages(pkgs="plyr")
}
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 3.3.3
library(plyr)
# install.packages("XML", repos=c("http://rstudio.org/_packages", "http://cran.rstudio.com"))
# library("XML")
# install.packages("RCurl", repos=c("http://rstudio.org/_packages", "http://cran.rstudio.com"))
# library("RCurl")
# install.packages("rvest", repos=c("http://rstudio.org/_packages", "http://cran.rstudio.com"))
# library("rvest")
getPkgVigs()
##    package
## 1    limma
## 2 GEOquery
## 3  Biobase
## 4  Biobase
## 5  Biobase
## 6 parallel
## 7    utils
##                                                                       filename
## 1                       C:/Program Files/R/R-3.3.0/library/limma/doc/intro.pdf
## 2                C:/Program Files/R/R-3.3.0/library/GEOquery/doc/GEOquery.html
## 3        C:/Program Files/R/R-3.3.0/library/Biobase/doc/BiobaseDevelopment.pdf
## 4 C:/Program Files/R/R-3.3.0/library/Biobase/doc/ExpressionSetIntroduction.pdf
## 5                   C:/Program Files/R/R-3.3.0/library/Biobase/doc/esApply.pdf
## 6                 C:/Program Files/R/R-3.3.0/library/parallel/doc/parallel.pdf
## 7                      C:/Program Files/R/R-3.3.0/library/utils/doc/Sweave.pdf
##                                           title
## 1                   Limma One Page Introduction
## 2                                Using GEOquery
## 3                     Notes for eSet developers
## 4 An introduction to Biobase and ExpressionSets
## 5                          esApply Introduction
## 6                            Package 'parallel'
## 7                            Sweave User Manual

http://www.bioconductor.org/packages/release/bioc/manuals/Biobase/man/Biobase.pdf

http://www.bioconductor.org/packages/release/bioc/vignettes/Biobase/inst/doc/ExpressionSetIntroduction.pdf

http://www.bioconductor.org/packages/release/bioc/manuals/GEOquery/man/GEOquery.pdf

GEO Series (GSE), GEO platforms (GPL), and GEO samples (GSM)

The Gene Expression Omnibus (GEO) from NCBI serves as a public repository for a wide range of high-throughput experimental data. These data include single and dual channel microarray-based experiments measuring mRNA, genomic DNA, and protein abundance, as well as non-array techniques such as serial analysis of gene expression (SAGE), and mass spectrometry proteomic data. At the most basic level of organization of GEO, there are three entity types that may be supplied by users: Platforms, Samples, and Series. Additionally, there is a curated entity called a GEO dataset.

GDS-class: A class describing a GEO GDS entity

GEOData-class: virtual class for holding GEO samples, platforms, and datasets

GEODataTable-class: Contains the column descriptions and data for the datatable part of a GEO object

GPL-class: Contains a full GEO Platform entity

GSE-class: Contains a GEO Series entity

  1. GPLList signature(object = “GSE”): returns a list, each item of the list being a GPL object

  2. GSMList signature(object = “GSE”): returns a list, each item of the list being a GSM object

  3. Meta signature(object = “GSE”): returns a list, the metadata associated with the GSE

GSM-class: A class containing a GEO Sample entity

  1. getGEO

getGEO(GEO = NULL, filename = NULL, destdir = tempdir(), GSElimits=NULL, GSEMatrix=TRUE,AnnotGPL=FALSE,getGPL=TRUE)

This function is the main user-level function in the GEOquery package. It directs the download (if no filename is specified) and parsing of a GEO SOFT format file into an R data structure specifically designed to make access to each of the important parts of the GEO SOFT format easily accessible.

  1. getGEOfile

getGEOfile(GEO, destdir = tempdir(), AnnotGPL = FALSE, amount = c(“full”, “brief”, “quick”, “data”))

This function simply downloads a SOFT format file associated with the GEO accession number given.

  1. getGEOSuppFiles

getGEOSuppFiles(GEO, makeDirectory = TRUE, baseDir = getwd())

NCBI GEO allows supplemental files to be attached to GEO Series (GSE), GEO platforms (GPL), and GEO samples (GSM). This function “knows” how to get these files based on the GEO accession. No parsing of the downloaded files is attempted, since the file format is not generally knowable by the computer.

  1. getGSEDataTables

getGSEDataTables(GSE)

In some cases, instead of individual sample records (GSM) containing information regarding sample phenotypes, the GEO Series contains that information in an attached data table. And example is given by GSE3494 where there are two data tables with important information contained within them. Using getGEO with the standard parameters downloads the GSEMatrix file which, unfortunately, does not contain the information in the data tables. This function simply downloads the “header” information from the GSE record and parses out the data tables into R data.frames.

  1. parseGEO

parseGEO(fname, GSElimits, destdir=tempdir(), AnnotGPL=FALSE, getGPL=TRUE)

parseGPL(fname)

parseGDS(fname)

parseGSE(fname, GSElimits)

parseGSM(fname)

  1. Functions to take a GDS data structure from getGEO and coerce it to limma MALists or ExpressionSets.

GDS2MA(GDS,do.log2=FALSE,GPL=NULL,AnnotGPL=TRUE,getGPL=TRUE)

GDS2eSet(GDS,do.log2=FALSE,GPL=NULL,AnnotGPL=TRUE,getGPL=TRUE)

  1. gunzip

gunzip(filename, destname = gsub(“[.]gz$”, “”, filename), overwrite = FALSE, remove = TRUE, …

Gene Expression Omnibus: https://www.ncbi.nlm.nih.gov/geo/

GSE56094

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4682296/

Series GSE56094 Query DataSets for GSE56094
Status Public on Jun 02, 2015
Title Transcriptional dynamics driving MAMP-triggered immunity and pathogen effector-mediated immunosuppression in Arabidopsis leaves following infection with Pseudomonas syringae pv. tomato DC3000
Organism Arabidopsis thaliana
Experiment type Expression profiling by array
Summary High-resolution temporal transcriptomic analysis of Arabidopsis thaliana leaves during infection by Pseudomonas syringae DC3000 and DC3000hrpA-thaliana. Overall design RNA was extracted from Arabidopsis leaves after mock-inoculation, infection with P. syringae DC3000 and infection with P. syringae DC3000hrpA-. 13 time points (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 16, 17.5 hours post inoculation) were sampled from each treatment and four biological replicates (a single leaf) were harvested at each time point for each treatment. This led to 156 biological samples (13 time points x 3 treatments x 4 biological replicates). Four technical replicates of each biological sample were generated (624). The microarrays were two-channel CATMA arrays and hence two samples are hybridized to each array resulting in 312 arrays used in the experiment. A “loop design” was developed to enable efficient extraction of information about sample comparisons. Half the arrays were used to directly compare samples at different time points within the same treatment, and half directly compared the same time point and adjacent time points between treatments. This design maximizes the power of data comparisons over time and between treatments.
Contributor(s) Lewis LA, Polanski K, de Torres-Zabala M, Jayaraman S, Bowden L, Moore JD, Penfold CA, Jenkins DJ, Hill C, Baxter L, Kulasekaran S, Truman W, Littlejohn G, Pruskinska J, Mead A, Steinbrenner J, Hickman R, Rand D, Wild DL, Ott S, Buchanan-Wollaston V, Smirnoff N, Beynon J, Denby K, Grant M
Citation(s) Lewis LA, Polanski K, de Torres-Zabala M, Jayaraman S et al. Transcriptional Dynamics Driving MAMP-Triggered Immunity and Pathogen Effector-Mediated Immunosuppression in Arabidopsis Leaves Following Infection with Pseudomonas syringae pv tomato DC3000. Plant Cell 2015 Nov;27(11):3038-64. PMID: 26566919

The aim of the Complete Arabidopsis Transcriptome MicroArray (CATMA) project was the design and production of high quality Gene-specific Sequence Tags (GSTs) covering most Arabidopsis genes. The GST repertoire is used by numerous groups for the production of DNA arrays for transcript profiling experiments.

CATMA microarrays constitute the foundation of the CAGE project aiming at the construction of a gene expression reference database for Arabidopsis. The CATMA GSTs are also the basic materials in the AGRIKOLA project focusing on the large-scale systematic RNAi silencing of Arabidopsis genes.

Get GEO data

GSE56094

# http://bioconductor.org/packages/release/bioc/vignettes/GEOquery/inst/doc/GEOquery.html

# https://support.bioconductor.org/p/33487/
# you can run limma for differential expression analysis on your object 'gse' which is a list of ExpressionSets, 
# which contains one single element because the dataset is not too large. 
# This contains normalized log2 ratios after pre-processing done by the authors.

# assumes only one platform in the GSE
gse = getGEO('GSE56094')[[1]]
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : not all columns named in 'colClasses' exist
save (gse, file = 'GEO_GSE56094.RData')
# gse = get(load('GEO_GSE56094.RData'))
class(gse)
## [1] "ExpressionSet"
## attr(,"package")
## [1] "Biobase"
classVersion(gse)
##             R       Biobase          eSet ExpressionSet 
##       "3.3.1"      "2.34.0"       "1.3.0"       "1.0.0"
description(gse)
## Experiment data
##   Experimenter name:  
##   Laboratory:  
##   Contact information:  
##   Title:  
##   URL:  
##   PMIDs:  
##   No abstract available.
show(gse)
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 30336 features, 156 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: GSM1354976 GSM1354977 ... GSM1355131 (156 total)
##   varLabels: title geo_accession ... data_row_count (38 total)
##   varMetadata: labelDescription
## featureData
##   featureNames: CATMA1a00010 CATMA1a00020 ... CATMA5ctrl30 (30336
##     total)
##   fvarLabels: ID Chrom ... SPOT_ID (7 total)
##   fvarMetadata: Column Description labelDescription
## experimentData: use 'experimentData(object)'
## Annotation: GPL10840
varMetadata(gse)
##                         labelDescription
## title                               <NA>
## geo_accession                       <NA>
## status                              <NA>
## submission_date                     <NA>
## last_update_date                    <NA>
## type                                <NA>
## channel_count                       <NA>
## source_name_ch1                     <NA>
## organism_ch1                        <NA>
## characteristics_ch1                 <NA>
## characteristics_ch1.1               <NA>
## characteristics_ch1.2               <NA>
## characteristics_ch1.3               <NA>
## growth_protocol_ch1                 <NA>
## molecule_ch1                        <NA>
## extract_protocol_ch1                <NA>
## label_ch1                           <NA>
## label_protocol_ch1                  <NA>
## taxid_ch1                           <NA>
## hyb_protocol                        <NA>
## scan_protocol                       <NA>
## scan_protocol.1                     <NA>
## description                         <NA>
## data_processing                     <NA>
## platform_id                         <NA>
## contact_name                        <NA>
## contact_email                       <NA>
## contact_department                  <NA>
## contact_institute                   <NA>
## contact_address                     <NA>
## contact_city                        <NA>
## contact_zip/postal_code             <NA>
## contact_country                     <NA>
## supplementary_file                  <NA>
## supplementary_file.1                <NA>
## supplementary_file.2                <NA>
## supplementary_file.3                <NA>
## data_row_count                      <NA>
protocolData(gse)
## An object of class 'AnnotatedDataFrame': none
# isnorm = getGEO('GSE56094', GSEMatrix=FALSE)
# head(Meta(isnorm))
# names(GSMList(isnorm))
# GSMList(isnorm)[[1]]
featureData(gse)
## An object of class 'AnnotatedDataFrame'
##   featureNames: CATMA1a00010 CATMA1a00020 ... CATMA5ctrl30 (30336
##     total)
##   varLabels: ID Chrom ... SPOT_ID (7 total)
##   varMetadata: Column Description labelDescription
(info = varLabels(gse))
##  [1] "title"                   "geo_accession"          
##  [3] "status"                  "submission_date"        
##  [5] "last_update_date"        "type"                   
##  [7] "channel_count"           "source_name_ch1"        
##  [9] "organism_ch1"            "characteristics_ch1"    
## [11] "characteristics_ch1.1"   "characteristics_ch1.2"  
## [13] "characteristics_ch1.3"   "growth_protocol_ch1"    
## [15] "molecule_ch1"            "extract_protocol_ch1"   
## [17] "label_ch1"               "label_protocol_ch1"     
## [19] "taxid_ch1"               "hyb_protocol"           
## [21] "scan_protocol"           "scan_protocol.1"        
## [23] "description"             "data_processing"        
## [25] "platform_id"             "contact_name"           
## [27] "contact_email"           "contact_department"     
## [29] "contact_institute"       "contact_address"        
## [31] "contact_city"            "contact_zip/postal_code"
## [33] "contact_country"         "supplementary_file"     
## [35] "supplementary_file.1"    "supplementary_file.2"   
## [37] "supplementary_file.3"    "data_row_count"
for (i in info) {
  if (!grepl('/', i)) {
    cat(i, '::\t')
    print(eval(parse(text = paste0('gse$',i,'[[1]]'))))
    cat('\n')
  }
}
## title :: [1] Mock 1 hours biorep A
## 156 Levels: Mock 1 hours biorep A ... P. syringae pv tomato DC3000 wildtype 9 hours biorep D
## 
## geo_accession :: [1] GSM1354976
## 156 Levels: GSM1354976 GSM1354977 GSM1354978 GSM1354979 ... GSM1355131
## 
## status ::    [1] Public on Jun 02 2015
## Levels: Public on Jun 02 2015
## 
## submission_date ::   [1] Mar 21 2014
## Levels: Mar 21 2014
## 
## last_update_date ::  [1] Jun 03 2015
## Levels: Jun 03 2015
## 
## type ::  [1] RNA
## Levels: RNA
## 
## channel_count :: [1] 1
## Levels: 1
## 
## source_name_ch1 ::   [1] Mock 1 hours biorep A
## 156 Levels: Mock 1 hours biorep A ... P. syringae pv tomato DC3000 wildtype 9 hours biorep D
## 
## organism_ch1 ::  [1] Arabidopsis thaliana
## Levels: Arabidopsis thaliana
## 
## characteristics_ch1 ::   [1] strain: Col0
## Levels: strain: Col0
## 
## characteristics_ch1.1 :: [1] treatment: Mock
## 3 Levels: treatment: Infected P. syringae pv tomato DC3000 hrpA- mutant ...
## 
## characteristics_ch1.2 :: [1] hours_since_infection: 1
## 13 Levels: hours_since_infection: 1 ... hours_since_infection: 9
## 
## characteristics_ch1.3 :: [1] tissue: leaf
## Levels: tissue: leaf
## 
## growth_protocol_ch1 ::   [1] Arabidopsis plants Col-0 were grown under a 16:8 hr light:dark cycle (lights on 04:00 to 20:00) at 20°C, 60% humidity and light intensity of 100 μmol photons.m-2.s-1. Arabidopsis seed was stratified for three days in 0.1% agar at 4°C before sowing onto Arabidopsis soil mix (Scotts Levingtons F2s compost:sand:fine grade vermiculite in a ratio of 6:1:1).
## Levels: Arabidopsis plants Col-0 were grown under a 16:8 hr light:dark cycle (lights on 04:00 to 20:00) at 20°C, 60% humidity and light intensity of 100 μmol photons.m-2.s-1. Arabidopsis seed was stratified for three days in 0.1% agar at 4°C before sowing onto Arabidopsis soil mix (Scotts Levingtons F2s compost:sand:fine grade vermiculite in a ratio of 6:1:1).
## 
## molecule_ch1 ::  [1] total RNA
## Levels: total RNA
## 
## extract_protocol_ch1 ::  [1] Total RNA was isolated from four individual leaves from each sampled time point (arbitrarily labelled as biological replicates A, B, C and D) using TRIzol reagent (Invitrogen), purified with RNeasy columns (Qiagen) and amplified using the MessageAmp II aRNA Amplification kit (Ambion) in accordance with the kit protocol with a single round of amplification.
## Levels: Total RNA was isolated from four individual leaves from each sampled time point (arbitrarily labelled as biological replicates A, B, C and D) using TRIzol reagent (Invitrogen), purified with RNeasy columns (Qiagen) and amplified using the MessageAmp II aRNA Amplification kit (Ambion) in accordance with the kit protocol with a single round of amplification.
## 
## label_ch1 :: [1] Cy3 or Cy5
## Levels: Cy3 or Cy5
## 
## label_protocol_ch1 ::    [1] Cy3 and Cy5 labelled cDNA probes were prepared by reverse transcribing 5µg of aRNA with Cy3- or Cy5- dCTP (GE Healthcare) and a modified dNTP mix (10mM each dATP, dGTP and dTTP; 2mM dCTP) using random primers (Invitrogen) and SuperScript II reverse transcriptase (Invitrogen), with the inclusion of RNase inhibitor (RNaseOUT, Invitrogen) and DTT. Labelled probes were purified using QiaQuick PCR Purification columns (Qiagen), freeze-dried, and resuspended in 50µl hybridization buffer (25% formamide, 5xSSC, 0.1% SDS, 0.5µg/µl yeast tRNA [Invitrogen]).
## Levels: Cy3 and Cy5 labelled cDNA probes were prepared by reverse transcribing 5µg of aRNA with Cy3- or Cy5- dCTP (GE Healthcare) and a modified dNTP mix (10mM each dATP, dGTP and dTTP; 2mM dCTP) using random primers (Invitrogen) and SuperScript II reverse transcriptase (Invitrogen), with the inclusion of RNase inhibitor (RNaseOUT, Invitrogen) and DTT. Labelled probes were purified using QiaQuick PCR Purification columns (Qiagen), freeze-dried, and resuspended in 50µl hybridization buffer (25% formamide, 5xSSC, 0.1% SDS, 0.5µg/µl yeast tRNA [Invitrogen]).
## 
## taxid_ch1 :: [1] 3702
## Levels: 3702
## 
## hyb_protocol ::  [1] The order in which the 288 slides were hybridized and scanned was randomised to minimise the impact on differences between samples of any potential variation in the processing conditions. According to this randomised experimental design combinations of labelled samples were hybridized to slides overnight at 42oC.
## Levels: The order in which the 288 slides were hybridized and scanned was randomised to minimise the impact on differences between samples of any potential variation in the processing conditions. According to this randomised experimental design combinations of labelled samples were hybridized to slides overnight at 42oC.
## 
## scan_protocol :: [1] Following hybridization, slides were washed and scanned using an Affymetrix 428 array scanner at 532nm (Cy3) and 635nm (Cy5).
## Levels: Following hybridization, slides were washed and scanned using an Affymetrix 428 array scanner at 532nm (Cy3) and 635nm (Cy5).
## 
## scan_protocol.1 ::   [1] Scanned data were quantified using Imagene 7.5.0 software (BioDiscovery, Inc.).
## Levels: Scanned data were quantified using Imagene 7.5.0 software (BioDiscovery, Inc.).
## 
## description ::   [1] Randomised loop design providing dye-swaps between treatments, time points and bioreps
## Levels: Randomised loop design providing dye-swaps between treatments, time points and bioreps
## 
## data_processing ::   [1] Quantified microarray data were Lowess normalized within pintip groups and within arrays, then variation due to arrays and dyes was removed by a random effects model, and averaged, in log space, using R/MAANOVA (MicroArray ANalysis Of VAriance) (Wu et al. 2003). Oligo sequences of CATMA array probes were mapped to individual mRNA sequences of transcripts from the TAIR9 genome assembly using BLASTn with e-value cutoff of 0.01.  Additionally, alignments were filtered to exclude alignments shorter than 30bp or with less than 80% sequence identity.
## Levels: Quantified microarray data were Lowess normalized within pintip groups and within arrays, then variation due to arrays and dyes was removed by a random effects model, and averaged, in log space, using R/MAANOVA (MicroArray ANalysis Of VAriance) (Wu et al. 2003). Oligo sequences of CATMA array probes were mapped to individual mRNA sequences of transcripts from the TAIR9 genome assembly using BLASTn with e-value cutoff of 0.01.  Additionally, alignments were filtered to exclude alignments shorter than 30bp or with less than 80% sequence identity.
## 
## platform_id ::   [1] GPL10840
## Levels: GPL10840
## 
## contact_name ::  [1] Jonathan,David,Moore
## Levels: Jonathan,David,Moore
## 
## contact_email :: [1] jonathan.moore@tgac.ac.uk
## Levels: jonathan.moore@tgac.ac.uk
## 
## contact_department ::    [1] Platforms and Pipelines
## Levels: Platforms and Pipelines
## 
## contact_institute :: [1] The Genome Analysis Centre
## Levels: The Genome Analysis Centre
## 
## contact_address ::   [1] Norwich Research Park
## Levels: Norwich Research Park
## 
## contact_city ::  [1] Norwich
## Levels: Norwich
## 
## contact_country ::   [1] United Kingdom
## Levels: United Kingdom
## 
## supplementary_file ::    [1] ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13781092_Cy5.txt.gz
## 156 Levels: ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13781092_Cy5.txt.gz ...
## 
## supplementary_file.1 ::  [1] ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13781096_Cy3.txt.gz
## 156 Levels: ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13781096_Cy3.txt.gz ...
## 
## supplementary_file.2 ::  [1] ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13781649_Cy5.txt.gz
## 156 Levels: ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13781649_Cy5.txt.gz ...
## 
## supplementary_file.3 ::  [1] ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13876881_Cy3.txt.gz
## 156 Levels: ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/samples/GSM1354nnn/GSM1354976/GSM1354976_LB_13876881_Cy3.txt.gz ...
## 
## data_row_count ::    [1] 30336
## Levels: 30336
fvarMetadata(gse)
##                    Column
## ID                     ID
## Chrom               Chrom
## TAIR9_best     TAIR9_best
## TAIR9_others TAIR9_others
## SEQUENCE         SEQUENCE
## ORF                   ORF
## SPOT_ID           SPOT_ID
##                                                                                       Description
## ID                                                                                               
## Chrom                    TAIR9 pseudochromosome containing best BLAST alignment to probe sequence
## TAIR9_best   TAIR9 gene model whose mRNA sequence contains best BLAST alignment to probe sequence
## TAIR9_others                            Other TAIR9 gene models with alignments to probe sequence
## SEQUENCE                                                                            Probe sequece
## ORF                     LINK_PRE:"http://www.ncbi.nlm.nih.gov/gene/?term=" LINK_SUF:"[gene name]"
## SPOT_ID                                                                                          
##              labelDescription
## ID                       <NA>
## Chrom                    <NA>
## TAIR9_best               <NA>
## TAIR9_others             <NA>
## SEQUENCE                 <NA>
## ORF                      <NA>
## SPOT_ID                  <NA>
head(fData(gse))
##                        ID Chrom TAIR9_best TAIR9_others
## CATMA1a00010 CATMA1a00010     1  AT1G01010             
## CATMA1a00020 CATMA1a00020     1  AT1G01020    AT1G05400
## CATMA1a00030 CATMA1a00030     1  AT1G01030             
## CATMA1a00035 CATMA1a00035     1  AT1G01040             
## CATMA1a00040 CATMA1a00040     1  AT1G01050             
## CATMA1a00045 CATMA1a00045     1  AT1G01060             
##                                                                                                                                                                                                                                                                                                                                                                                                                    SEQUENCE
## CATMA1a00010                                                                                                                                     TCAACCAATCACGTCAACGAAATTCAGGATCTTACAACACTTACTCTGAGTATGATTCAGCAAATCATGGCCAGCAGTTTAATGAAAACTCTAACATTATGCAGCAGCAACCACTTCAAGGATCATTCAACCCTCTCCTTGAGTATGATTTTGCAAATCACGGCGGTCAGTGGCTGAGTGACTATATCGACCTGCAACAGCAAGTTCCTTACTTGGCACCTTATGAAAATGAGTCGGAGATGATTTGGAAGCATGTGATTGAAGAA
## CATMA1a00020                                                                                                                                                                                           GTGGTGGTACCCTCCTTGGAATGGTGGGCATATGTGGAACCCTCCTATGAATGTTGGAGATATGTGGGACTCTCCTATGAATGTGGGGCATCAACACATTGTTGCAACACCAAGTGAATGTCATAGGAAAGACGAGGAGACGGTTGAAGCCGAAGTTGAGGCTCCCCTTGATTCTTATCCGGAGAATGCTACTACCATGGCGAGAAAGCAAC
## CATMA1a00030                                                                                                                                                                                    CCAAACTTTACATAGATTGGAGGCATAGACCCGACATGAGCCTCGTTCAAGCACATCAGTTTGGTAATTTTGGTTTCAATTTCAATTTCCCGACCACTTCTCAATATTCCAACAGATTTCATCCATTGCCAGAATATAACTCCGTCCCGATTCACCGGGGCTTAAACATCGGAAATCACCAACGTTCCTATTATAACACCCAGCGTCAAGAGTTCGTAG
## CATMA1a00035                                                                                                                                                                                   GGCTCATGCAAAGAGATTTACGTTTGGGGTAAGAGTTAATACGAGCGATAGAGGATGGACCGATGAGTGTATTGGCGAGCCAATGCCGAGTGTTAAGAAAGCTAAGGATTCAGCTGCGGTTCTTCTACTTGAGCTTTTAAATAAAACTTTTTCTTGATTCTTTTACTCTCTTCAACGAGATGTAGTCATTACATTTTAAACCTTAAAACCATAGTGGTTG
## CATMA1a00040                                                                                                                                                                                                                                                         AGCTTCTCCTCAGAAGATTTCTGCAGCATCTATGTTTCTGTTACTTTTCATTGATAATAAGAAGCATATGATCCTAATTGATATTACTTTTATCTCTGCATTTTTCTTCCTTATCATTTGGTCCTTCTGTAATTTCGTTTTAAAGACCCT
## CATMA1a00045 TCCTAATTTCAAAAGCCAGGATTCTTGTGCTGCAGACCAAGAAGGAGTAGTAATGATCGGTGTTGGAACATGCAAGAGTCTTAAAACGAGACAGACAGGATTTAAGCCATACAAGAGATGTTCAATGGAAGTGAAAGAGAGCCAAGTTGGGAACATAAACAATCAAAGTGATGAAAAAGTCTGCAAAAGGCTTCGATTGGAAGGAGAAGCTTCTACATGACAGACTTGGAGGTAAAAAAAAAACATCCACATTTTTATCAATATCTTTAAATCTAGTGTTAGTAGTTTGCTTCTCCAATCTTTATGAAAGAGACTTTTAATTTTCCTTCCGAACATTTCTTTGGTCATGTCAGGTTCTGTACCATATTACCCCATGTCTTGTCTCTTGTCTCTGTTTG
##                    ORF SPOT_ID
## CATMA1a00010 AT1G01010        
## CATMA1a00020 AT1G01020        
## CATMA1a00030 AT1G01030        
## CATMA1a00035 AT1G01040        
## CATMA1a00040 AT1G01050        
## CATMA1a00045 AT1G01060
fvarLabels(gse)
## [1] "ID"           "Chrom"        "TAIR9_best"   "TAIR9_others"
## [5] "SEQUENCE"     "ORF"          "SPOT_ID"

Translation table: CATMA to Ath gene

catma = fData(gse)
cColNames = fvarLabels(gse)
ncol = length(cColNames)
dim(catma)
## [1] 30336     7
catma[catma==""]<-NA
numNAs <- apply(catma, 1, function(x) sum(is.na(x)))
length(numNAs) == dim(catma)[1]
## [1] TRUE
sum(is.na(catma$TAIR9_best))
## [1] 2728
# keep ones that have assigned Ath gene
catma = catma[!is.na(catma$TAIR9_best), ]
dim(catma)
## [1] 27608     7

Get expression values

# write.table(data.frame(fData(gse),exprs(gse)),sep="\t",row.names=FALSE,file='testGSE56094Table.txt')
myE = exprs(gse)
head(myE)[,1:6]
##              GSM1354976 GSM1354977 GSM1354978 GSM1354979 GSM1354980
## CATMA1a00010   9.429444  11.295529  10.720168  11.131236  10.630403
## CATMA1a00020   7.983924   7.732327   7.730905   7.942637   7.696408
## CATMA1a00030   8.298806   8.758403   8.341977   8.835087   8.293516
## CATMA1a00035  10.233715  10.668946   9.881458  10.483725  10.122867
## CATMA1a00040  11.137192  10.798991  10.760669  11.094889  11.641954
## CATMA1a00045   9.907937   9.546775   9.292673   8.630197   8.611419
##              GSM1354981
## CATMA1a00010  10.856614
## CATMA1a00020   7.968995
## CATMA1a00030   8.387448
## CATMA1a00035  10.275984
## CATMA1a00040  11.872051
## CATMA1a00045   8.989938
dim(myE)
## [1] 30336   156
nold = dim(myE)[2]

Plot values per experiment

# par(mfrow=c(2,1))
plot(density(myE[,1]), col=1, xlim=c(0,20), ylim=c(0,0.60), main = 'Kernel Density Estimation')
for (n in 2:nold) {lines(density(myE[,n]), col=n, xlim=c(0,20), ylim=c(0,0.60))}

boxplot(as.data.frame(myE[,1:nold]), col=rainbow(24), main= "Normalised data", las=3, cex.axis = 0.5)

# par(mfrow=c(1,1))

Extract expression data for wanted experimets

Order Sample ID Sample description Order Sample ID Sample description Order Sample ID Sample description
1 GSM1354976 Mock 1 hours biorep A 53 GSM1355028 P. syringae pv tomato DC3000 hrpA- mutant 1 hours biorep A 105 GSM1355080 P. syringae pv tomato DC3000 wildtype 1 hours biorep A
2 GSM1354977 Mock 2 hours biorep A 54 GSM1355029 P. syringae pv tomato DC3000 hrpA- mutant 2 hours biorep A 106 GSM1355081 P. syringae pv tomato DC3000 wildtype 2 hours biorep A
3 GSM1354978 Mock 3 hours biorep A 55 GSM1355030 P. syringae pv tomato DC3000 hrpA- mutant 3 hours biorep A 107 GSM1355082 P. syringae pv tomato DC3000 wildtype 3 hours biorep A
4 GSM1354979 Mock 4 hours biorep A 56 GSM1355031 P. syringae pv tomato DC3000 hrpA- mutant 4 hours biorep A 108 GSM1355083 P. syringae pv tomato DC3000 wildtype 4 hours biorep A
5 GSM1354980 Mock 5 hours biorep A 57 GSM1355032 P. syringae pv tomato DC3000 hrpA- mutant 5 hours biorep A 109 GSM1355084 P. syringae pv tomato DC3000 wildtype 5 hours biorep A
6 GSM1354981 Mock 6 hours biorep A 58 GSM1355033 P. syringae pv tomato DC3000 hrpA- mutant 6 hours biorep A 110 GSM1355085 P. syringae pv tomato DC3000 wildtype 6 hours biorep A
7 GSM1354982 Mock 7 hours biorep A 59 GSM1355034 P. syringae pv tomato DC3000 hrpA- mutant 7 hours biorep A 111 GSM1355086 P. syringae pv tomato DC3000 wildtype 7 hours biorep A
8 GSM1354983 Mock 8 hours biorep A 60 GSM1355035 P. syringae pv tomato DC3000 hrpA- mutant 8 hours biorep A 112 GSM1355087 P. syringae pv tomato DC3000 wildtype 8 hours biorep A
9 GSM1354984 Mock 9 hours biorep A 61 GSM1355036 P. syringae pv tomato DC3000 hrpA- mutant 9 hours biorep A 113 GSM1355088 P. syringae pv tomato DC3000 wildtype 9 hours biorep A
10 GSM1354985 Mock 10 hours biorep A 62 GSM1355037 P. syringae pv tomato DC3000 hrpA- mutant 10 hours biorep A 114 GSM1355089 P. syringae pv tomato DC3000 wildtype 10 hours biorep A
11 GSM1354986 Mock 11 hours biorep A 63 GSM1355038 P. syringae pv tomato DC3000 hrpA- mutant 11 hours biorep A 115 GSM1355090 P. syringae pv tomato DC3000 wildtype 11 hours biorep A
12 GSM1354987 Mock 12 hours biorep A 64 GSM1355039 P. syringae pv tomato DC3000 hrpA- mutant 12 hours biorep A 116 GSM1355091 P. syringae pv tomato DC3000 wildtype 12 hours biorep A
13 GSM1354988 Mock 13 hours biorep A 65 GSM1355040 P. syringae pv tomato DC3000 hrpA- mutant 13 hours biorep A 117 GSM1355092 P. syringae pv tomato DC3000 wildtype 13 hours biorep A
14 GSM1354989 Mock 1 hours biorep B 66 GSM1355041 P. syringae pv tomato DC3000 hrpA- mutant 1 hours biorep B 118 GSM1355093 P. syringae pv tomato DC3000 wildtype 1 hours biorep B
15 GSM1354990 Mock 2 hours biorep B 67 GSM1355042 P. syringae pv tomato DC3000 hrpA- mutant 2 hours biorep B 119 GSM1355094 P. syringae pv tomato DC3000 wildtype 2 hours biorep B
16 GSM1354991 Mock 3 hours biorep B 68 GSM1355043 P. syringae pv tomato DC3000 hrpA- mutant 3 hours biorep B 120 GSM1355095 P. syringae pv tomato DC3000 wildtype 3 hours biorep B
17 GSM1354992 Mock 4 hours biorep B 69 GSM1355044 P. syringae pv tomato DC3000 hrpA- mutant 4 hours biorep B 121 GSM1355096 P. syringae pv tomato DC3000 wildtype 4 hours biorep B
18 GSM1354993 Mock 5 hours biorep B 70 GSM1355045 P. syringae pv tomato DC3000 hrpA- mutant 5 hours biorep B 122 GSM1355097 P. syringae pv tomato DC3000 wildtype 5 hours biorep B
19 GSM1354994 Mock 6 hours biorep B 71 GSM1355046 P. syringae pv tomato DC3000 hrpA- mutant 6 hours biorep B 123 GSM1355098 P. syringae pv tomato DC3000 wildtype 6 hours biorep B
20 GSM1354995 Mock 7 hours biorep B 72 GSM1355047 P. syringae pv tomato DC3000 hrpA- mutant 7 hours biorep B 124 GSM1355099 P. syringae pv tomato DC3000 wildtype 7 hours biorep B
21 GSM1354996 Mock 8 hours biorep B 73 GSM1355048 P. syringae pv tomato DC3000 hrpA- mutant 8 hours biorep B 125 GSM1355100 P. syringae pv tomato DC3000 wildtype 8 hours biorep B
22 GSM1354997 Mock 9 hours biorep B 74 GSM1355049 P. syringae pv tomato DC3000 hrpA- mutant 9 hours biorep B 126 GSM1355101 P. syringae pv tomato DC3000 wildtype 9 hours biorep B
23 GSM1354998 Mock 10 hours biorep B 75 GSM1355050 P. syringae pv tomato DC3000 hrpA- mutant 10 hours biorep B 127 GSM1355102 P. syringae pv tomato DC3000 wildtype 10 hours biorep B
24 GSM1354999 Mock 11 hours biorep B 76 GSM1355051 P. syringae pv tomato DC3000 hrpA- mutant 11 hours biorep B 128 GSM1355103 P. syringae pv tomato DC3000 wildtype 11 hours biorep B
25 GSM1355000 Mock 12 hours biorep B 77 GSM1355052 P. syringae pv tomato DC3000 hrpA- mutant 12 hours biorep B 129 GSM1355104 P. syringae pv tomato DC3000 wildtype 12 hours biorep B
26 GSM1355001 Mock 13 hours biorep B 78 GSM1355053 P. syringae pv tomato DC3000 hrpA- mutant 13 hours biorep B 130 GSM1355105 P. syringae pv tomato DC3000 wildtype 13 hours biorep B
27 GSM1355002 Mock 1 hours biorep C 79 GSM1355054 P. syringae pv tomato DC3000 hrpA- mutant 1 hours biorep C 131 GSM1355106 P. syringae pv tomato DC3000 wildtype 1 hours biorep C
28 GSM1355003 Mock 2 hours biorep C 80 GSM1355055 P. syringae pv tomato DC3000 hrpA- mutant 2 hours biorep C 132 GSM1355107 P. syringae pv tomato DC3000 wildtype 2 hours biorep C
29 GSM1355004 Mock 3 hours biorep C 81 GSM1355056 P. syringae pv tomato DC3000 hrpA- mutant 3 hours biorep C 133 GSM1355108 P. syringae pv tomato DC3000 wildtype 3 hours biorep C
30 GSM1355005 Mock 4 hours biorep C 82 GSM1355057 P. syringae pv tomato DC3000 hrpA- mutant 4 hours biorep C 134 GSM1355109 P. syringae pv tomato DC3000 wildtype 4 hours biorep C
31 GSM1355006 Mock 5 hours biorep C 83 GSM1355058 P. syringae pv tomato DC3000 hrpA- mutant 5 hours biorep C 135 GSM1355110 P. syringae pv tomato DC3000 wildtype 5 hours biorep C
32 GSM1355007 Mock 6 hours biorep C 84 GSM1355059 P. syringae pv tomato DC3000 hrpA- mutant 6 hours biorep C 136 GSM1355111 P. syringae pv tomato DC3000 wildtype 6 hours biorep C
33 GSM1355008 Mock 7 hours biorep C 85 GSM1355060 P. syringae pv tomato DC3000 hrpA- mutant 7 hours biorep C 137 GSM1355112 P. syringae pv tomato DC3000 wildtype 7 hours biorep C
34 GSM1355009 Mock 8 hours biorep C 86 GSM1355061 P. syringae pv tomato DC3000 hrpA- mutant 8 hours biorep C 138 GSM1355113 P. syringae pv tomato DC3000 wildtype 8 hours biorep C
35 GSM1355010 Mock 9 hours biorep C 87 GSM1355062 P. syringae pv tomato DC3000 hrpA- mutant 9 hours biorep C 139 GSM1355114 P. syringae pv tomato DC3000 wildtype 9 hours biorep C
36 GSM1355011 Mock 10 hours biorep C 88 GSM1355063 P. syringae pv tomato DC3000 hrpA- mutant 10 hours biorep C 140 GSM1355115 P. syringae pv tomato DC3000 wildtype 10 hours biorep C
37 GSM1355012 Mock 11 hours biorep C 89 GSM1355064 P. syringae pv tomato DC3000 hrpA- mutant 11 hours biorep C 141 GSM1355116 P. syringae pv tomato DC3000 wildtype 11 hours biorep C
38 GSM1355013 Mock 12 hours biorep C 90 GSM1355065 P. syringae pv tomato DC3000 hrpA- mutant 12 hours biorep C 142 GSM1355117 P. syringae pv tomato DC3000 wildtype 12 hours biorep C
39 GSM1355014 Mock 13 hours biorep C 91 GSM1355066 P. syringae pv tomato DC3000 hrpA- mutant 13 hours biorep C 143 GSM1355118 P. syringae pv tomato DC3000 wildtype 13 hours biorep C
40 GSM1355015 Mock 1 hours biorep D 92 GSM1355067 P. syringae pv tomato DC3000 hrpA- mutant 1 hours biorep D 144 GSM1355119 P. syringae pv tomato DC3000 wildtype 1 hours biorep D
41 GSM1355016 Mock 2 hours biorep D 93 GSM1355068 P. syringae pv tomato DC3000 hrpA- mutant 2 hours biorep D 145 GSM1355120 P. syringae pv tomato DC3000 wildtype 2 hours biorep D
42 GSM1355017 Mock 3 hours biorep D 94 GSM1355069 P. syringae pv tomato DC3000 hrpA- mutant 3 hours biorep D 146 GSM1355121 P. syringae pv tomato DC3000 wildtype 3 hours biorep D
43 GSM1355018 Mock 4 hours biorep D 95 GSM1355070 P. syringae pv tomato DC3000 hrpA- mutant 4 hours biorep D 147 GSM1355122 P. syringae pv tomato DC3000 wildtype 4 hours biorep D
44 GSM1355019 Mock 5 hours biorep D 96 GSM1355071 P. syringae pv tomato DC3000 hrpA- mutant 5 hours biorep D 148 GSM1355123 P. syringae pv tomato DC3000 wildtype 5 hours biorep D
45 GSM1355020 Mock 6 hours biorep D 97 GSM1355072 P. syringae pv tomato DC3000 hrpA- mutant 6 hours biorep D 149 GSM1355124 P. syringae pv tomato DC3000 wildtype 6 hours biorep D
46 GSM1355021 Mock 7 hours biorep D 98 GSM1355073 P. syringae pv tomato DC3000 hrpA- mutant 7 hours biorep D 150 GSM1355125 P. syringae pv tomato DC3000 wildtype 7 hours biorep D
47 GSM1355022 Mock 8 hours biorep D 99 GSM1355074 P. syringae pv tomato DC3000 hrpA- mutant 8 hours biorep D 151 GSM1355126 P. syringae pv tomato DC3000 wildtype 8 hours biorep D
48 GSM1355023 Mock 9 hours biorep D 100 GSM1355075 P. syringae pv tomato DC3000 hrpA- mutant 9 hours biorep D 152 GSM1355127 P. syringae pv tomato DC3000 wildtype 9 hours biorep D
49 GSM1355024 Mock 10 hours biorep D 101 GSM1355076 P. syringae pv tomato DC3000 hrpA- mutant 10 hours biorep D 153 GSM1355128 P. syringae pv tomato DC3000 wildtype 10 hours biorep D
50 GSM1355025 Mock 11 hours biorep D 102 GSM1355077 P. syringae pv tomato DC3000 hrpA- mutant 11 hours biorep D 154 GSM1355129 P. syringae pv tomato DC3000 wildtype 11 hours biorep D
51 GSM1355026 Mock 12 hours biorep D 103 GSM1355078 P. syringae pv tomato DC3000 hrpA- mutant 12 hours biorep D 155 GSM1355130 P. syringae pv tomato DC3000 wildtype 12 hours biorep D
52 GSM1355027 Mock 13 hours biorep D 104 GSM1355079 P. syringae pv tomato DC3000 hrpA- mutant 13 hours biorep D 156 GSM1355131 P. syringae pv tomato DC3000 wildtype 13 hours biorep D
# gse$title

# select only mock and DC3000 wt
myE = myE[,c(1:(13*4),((13*4*2+1):(13*4*3)))]
dim(myE)
## [1] 30336   104
nold = dim(myE)[2]

# find match per CATMA
length(rownames(catma)) == length(unique(rownames(catma)))
## [1] TRUE
ind = match(rownames(myE),rownames(catma))
myE = cbind(myE,catma[ind,])
dim(myE)
## [1] 30336   111
# backup copy of expression file merged with pheno data
myEextended = myE

# find duplicated probes (one probe per gene)
# gives you a data frame with a list of ids and the number of times they occurred
n_occur <- data.frame(table(myE[,(nold+3)]))
# tells you which ids occurred more than once
duplicated = n_occur[n_occur$Freq > 1,]
# dim(duplicated)
sum(duplicated[,2])
## [1] 7000
hist(duplicated[,2])

# returns the records with more than one occurrence
duplicatedID = myE[myE[,(nold+3)] %in% n_occur$Var1[n_occur$Freq > 1],]
dim(duplicatedID)
## [1] 7000  111
length(unique(duplicatedID[,(nold+3)])) == dim(duplicated)[1]
## [1] TRUE
myE = myE[,c(1:nold, nold+1, nold+3)]
# remove probes that are not annotated with Ath gene
myE = myE[!is.na(myE[,dim(myE)[2]]),]
any(is.na(myE[,dim(myE)[2]]))
## [1] FALSE
dim(myE)
## [1] 27608   106
# average probe expression for ones that are matching the same gene
X = aggregate(myE[,1:nold], by=list(myE$TAIR9_best), FUN=mean)
dim(X)[1] == length(unique(myE$TAIR9_best))
## [1] TRUE
rownames(X) = X$Group.1
newCatma = rownames(X)
length(newCatma) == length(unique(newCatma))
## [1] TRUE
any(colnames(X)[2:dim(X)[2]] != colnames(myE)[1:nold])
## [1] FALSE
myE = X[,2:dim(X)[2]]

Phenodata

dim((pData(phenoData(gse))))
## [1] 156  38
colnames((pData(phenoData(gse))))
##  [1] "title"                   "geo_accession"          
##  [3] "status"                  "submission_date"        
##  [5] "last_update_date"        "type"                   
##  [7] "channel_count"           "source_name_ch1"        
##  [9] "organism_ch1"            "characteristics_ch1"    
## [11] "characteristics_ch1.1"   "characteristics_ch1.2"  
## [13] "characteristics_ch1.3"   "growth_protocol_ch1"    
## [15] "molecule_ch1"            "extract_protocol_ch1"   
## [17] "label_ch1"               "label_protocol_ch1"     
## [19] "taxid_ch1"               "hyb_protocol"           
## [21] "scan_protocol"           "scan_protocol.1"        
## [23] "description"             "data_processing"        
## [25] "platform_id"             "contact_name"           
## [27] "contact_email"           "contact_department"     
## [29] "contact_institute"       "contact_address"        
## [31] "contact_city"            "contact_zip/postal_code"
## [33] "contact_country"         "supplementary_file"     
## [35] "supplementary_file.1"    "supplementary_file.2"   
## [37] "supplementary_file.3"    "data_row_count"
show(pData(phenoData(gse))[1:5,c(1,6,8)])
##                            title type       source_name_ch1
## GSM1354976 Mock 1 hours biorep A  RNA Mock 1 hours biorep A
## GSM1354977 Mock 2 hours biorep A  RNA Mock 2 hours biorep A
## GSM1354978 Mock 3 hours biorep A  RNA Mock 3 hours biorep A
## GSM1354979 Mock 4 hours biorep A  RNA Mock 4 hours biorep A
## GSM1354980 Mock 5 hours biorep A  RNA Mock 5 hours biorep A
myP = (pData(phenoData(gse)))[,c(1,6,8)]
dim(myP)
## [1] 156   3
head(myP)
##                            title type       source_name_ch1
## GSM1354976 Mock 1 hours biorep A  RNA Mock 1 hours biorep A
## GSM1354977 Mock 2 hours biorep A  RNA Mock 2 hours biorep A
## GSM1354978 Mock 3 hours biorep A  RNA Mock 3 hours biorep A
## GSM1354979 Mock 4 hours biorep A  RNA Mock 4 hours biorep A
## GSM1354980 Mock 5 hours biorep A  RNA Mock 5 hours biorep A
## GSM1354981 Mock 6 hours biorep A  RNA Mock 6 hours biorep A
myP = myP[c(1:(13*4),((13*4*2+1):(13*4*3))),]
str(myP$source_name_ch1)
##  Factor w/ 156 levels "Mock 1 hours biorep A",..: 1 21 25 29 33 37 41 45 49 5 ...
##  - attr(*, "names")= chr [1:104] "V2" "V3" "V4" "V5" ...
all(rownames(myP)==colnames(myE[,1:nold]))
## [1] TRUE
# phenoData <- new("AnnotatedDataFrame",data=myP)
summary(myP)
##                     title     type                   source_name_ch1
##  Mock 1 hours biorep A : 1   RNA:104   Mock 1 hours biorep A : 1    
##  Mock 1 hours biorep B : 1             Mock 1 hours biorep B : 1    
##  Mock 1 hours biorep C : 1             Mock 1 hours biorep C : 1    
##  Mock 1 hours biorep D : 1             Mock 1 hours biorep D : 1    
##  Mock 10 hours biorep A: 1             Mock 10 hours biorep A: 1    
##  Mock 10 hours biorep B: 1             Mock 10 hours biorep B: 1    
##  (Other)               :98             (Other)               :98
names(myP)
## [1] "title"           "type"            "source_name_ch1"
sapply(myP, class)
##           title            type source_name_ch1 
##        "factor"        "factor"        "factor"

Design (or model) matrix

(lev <- unique(ldply(strsplit(gsub(" ", "_", as.character(myP$source_name_ch1), 
                                  fixed = TRUE), 
                             split = "_hours_biorep"))[[1]]))
##  [1] "Mock_1"                                  
##  [2] "Mock_2"                                  
##  [3] "Mock_3"                                  
##  [4] "Mock_4"                                  
##  [5] "Mock_5"                                  
##  [6] "Mock_6"                                  
##  [7] "Mock_7"                                  
##  [8] "Mock_8"                                  
##  [9] "Mock_9"                                  
## [10] "Mock_10"                                 
## [11] "Mock_11"                                 
## [12] "Mock_12"                                 
## [13] "Mock_13"                                 
## [14] "P._syringae_pv_tomato_DC3000_wildtype_1" 
## [15] "P._syringae_pv_tomato_DC3000_wildtype_2" 
## [16] "P._syringae_pv_tomato_DC3000_wildtype_3" 
## [17] "P._syringae_pv_tomato_DC3000_wildtype_4" 
## [18] "P._syringae_pv_tomato_DC3000_wildtype_5" 
## [19] "P._syringae_pv_tomato_DC3000_wildtype_6" 
## [20] "P._syringae_pv_tomato_DC3000_wildtype_7" 
## [21] "P._syringae_pv_tomato_DC3000_wildtype_8" 
## [22] "P._syringae_pv_tomato_DC3000_wildtype_9" 
## [23] "P._syringae_pv_tomato_DC3000_wildtype_10"
## [24] "P._syringae_pv_tomato_DC3000_wildtype_11"
## [25] "P._syringae_pv_tomato_DC3000_wildtype_12"
## [26] "P._syringae_pv_tomato_DC3000_wildtype_13"
myP$source_name_ch1 = (ldply(strsplit(gsub(" ", "_", as.character(myP$source_name_ch1), 
                                           fixed = TRUE), 
                                      split = "_hours_biorep"))[[1]])
(f <- factor(myP$source_name_ch1, levels=lev))
##   [1] Mock_1                                  
##   [2] Mock_2                                  
##   [3] Mock_3                                  
##   [4] Mock_4                                  
##   [5] Mock_5                                  
##   [6] Mock_6                                  
##   [7] Mock_7                                  
##   [8] Mock_8                                  
##   [9] Mock_9                                  
##  [10] Mock_10                                 
##  [11] Mock_11                                 
##  [12] Mock_12                                 
##  [13] Mock_13                                 
##  [14] Mock_1                                  
##  [15] Mock_2                                  
##  [16] Mock_3                                  
##  [17] Mock_4                                  
##  [18] Mock_5                                  
##  [19] Mock_6                                  
##  [20] Mock_7                                  
##  [21] Mock_8                                  
##  [22] Mock_9                                  
##  [23] Mock_10                                 
##  [24] Mock_11                                 
##  [25] Mock_12                                 
##  [26] Mock_13                                 
##  [27] Mock_1                                  
##  [28] Mock_2                                  
##  [29] Mock_3                                  
##  [30] Mock_4                                  
##  [31] Mock_5                                  
##  [32] Mock_6                                  
##  [33] Mock_7                                  
##  [34] Mock_8                                  
##  [35] Mock_9                                  
##  [36] Mock_10                                 
##  [37] Mock_11                                 
##  [38] Mock_12                                 
##  [39] Mock_13                                 
##  [40] Mock_1                                  
##  [41] Mock_2                                  
##  [42] Mock_3                                  
##  [43] Mock_4                                  
##  [44] Mock_5                                  
##  [45] Mock_6                                  
##  [46] Mock_7                                  
##  [47] Mock_8                                  
##  [48] Mock_9                                  
##  [49] Mock_10                                 
##  [50] Mock_11                                 
##  [51] Mock_12                                 
##  [52] Mock_13                                 
##  [53] P._syringae_pv_tomato_DC3000_wildtype_1 
##  [54] P._syringae_pv_tomato_DC3000_wildtype_2 
##  [55] P._syringae_pv_tomato_DC3000_wildtype_3 
##  [56] P._syringae_pv_tomato_DC3000_wildtype_4 
##  [57] P._syringae_pv_tomato_DC3000_wildtype_5 
##  [58] P._syringae_pv_tomato_DC3000_wildtype_6 
##  [59] P._syringae_pv_tomato_DC3000_wildtype_7 
##  [60] P._syringae_pv_tomato_DC3000_wildtype_8 
##  [61] P._syringae_pv_tomato_DC3000_wildtype_9 
##  [62] P._syringae_pv_tomato_DC3000_wildtype_10
##  [63] P._syringae_pv_tomato_DC3000_wildtype_11
##  [64] P._syringae_pv_tomato_DC3000_wildtype_12
##  [65] P._syringae_pv_tomato_DC3000_wildtype_13
##  [66] P._syringae_pv_tomato_DC3000_wildtype_1 
##  [67] P._syringae_pv_tomato_DC3000_wildtype_2 
##  [68] P._syringae_pv_tomato_DC3000_wildtype_3 
##  [69] P._syringae_pv_tomato_DC3000_wildtype_4 
##  [70] P._syringae_pv_tomato_DC3000_wildtype_5 
##  [71] P._syringae_pv_tomato_DC3000_wildtype_6 
##  [72] P._syringae_pv_tomato_DC3000_wildtype_7 
##  [73] P._syringae_pv_tomato_DC3000_wildtype_8 
##  [74] P._syringae_pv_tomato_DC3000_wildtype_9 
##  [75] P._syringae_pv_tomato_DC3000_wildtype_10
##  [76] P._syringae_pv_tomato_DC3000_wildtype_11
##  [77] P._syringae_pv_tomato_DC3000_wildtype_12
##  [78] P._syringae_pv_tomato_DC3000_wildtype_13
##  [79] P._syringae_pv_tomato_DC3000_wildtype_1 
##  [80] P._syringae_pv_tomato_DC3000_wildtype_2 
##  [81] P._syringae_pv_tomato_DC3000_wildtype_3 
##  [82] P._syringae_pv_tomato_DC3000_wildtype_4 
##  [83] P._syringae_pv_tomato_DC3000_wildtype_5 
##  [84] P._syringae_pv_tomato_DC3000_wildtype_6 
##  [85] P._syringae_pv_tomato_DC3000_wildtype_7 
##  [86] P._syringae_pv_tomato_DC3000_wildtype_8 
##  [87] P._syringae_pv_tomato_DC3000_wildtype_9 
##  [88] P._syringae_pv_tomato_DC3000_wildtype_10
##  [89] P._syringae_pv_tomato_DC3000_wildtype_11
##  [90] P._syringae_pv_tomato_DC3000_wildtype_12
##  [91] P._syringae_pv_tomato_DC3000_wildtype_13
##  [92] P._syringae_pv_tomato_DC3000_wildtype_1 
##  [93] P._syringae_pv_tomato_DC3000_wildtype_2 
##  [94] P._syringae_pv_tomato_DC3000_wildtype_3 
##  [95] P._syringae_pv_tomato_DC3000_wildtype_4 
##  [96] P._syringae_pv_tomato_DC3000_wildtype_5 
##  [97] P._syringae_pv_tomato_DC3000_wildtype_6 
##  [98] P._syringae_pv_tomato_DC3000_wildtype_7 
##  [99] P._syringae_pv_tomato_DC3000_wildtype_8 
## [100] P._syringae_pv_tomato_DC3000_wildtype_9 
## [101] P._syringae_pv_tomato_DC3000_wildtype_10
## [102] P._syringae_pv_tomato_DC3000_wildtype_11
## [103] P._syringae_pv_tomato_DC3000_wildtype_12
## [104] P._syringae_pv_tomato_DC3000_wildtype_13
## 26 Levels: Mock_1 Mock_2 Mock_3 Mock_4 Mock_5 Mock_6 Mock_7 ... P._syringae_pv_tomato_DC3000_wildtype_13
design <- model.matrix(~0+f)
dim(design)
## [1] 104  26
which(design!=0,arr.ind = T)
##     row col
## 1     1   1
## 14   14   1
## 27   27   1
## 40   40   1
## 2     2   2
## 15   15   2
## 28   28   2
## 41   41   2
## 3     3   3
## 16   16   3
## 29   29   3
## 42   42   3
## 4     4   4
## 17   17   4
## 30   30   4
## 43   43   4
## 5     5   5
## 18   18   5
## 31   31   5
## 44   44   5
## 6     6   6
## 19   19   6
## 32   32   6
## 45   45   6
## 7     7   7
## 20   20   7
## 33   33   7
## 46   46   7
## 8     8   8
## 21   21   8
## 34   34   8
## 47   47   8
## 9     9   9
## 22   22   9
## 35   35   9
## 48   48   9
## 10   10  10
## 23   23  10
## 36   36  10
## 49   49  10
## 11   11  11
## 24   24  11
## 37   37  11
## 50   50  11
## 12   12  12
## 25   25  12
## 38   38  12
## 51   51  12
## 13   13  13
## 26   26  13
## 39   39  13
## 52   52  13
## 53   53  14
## 66   66  14
## 79   79  14
## 92   92  14
## 54   54  15
## 67   67  15
## 80   80  15
## 93   93  15
## 55   55  16
## 68   68  16
## 81   81  16
## 94   94  16
## 56   56  17
## 69   69  17
## 82   82  17
## 95   95  17
## 57   57  18
## 70   70  18
## 83   83  18
## 96   96  18
## 58   58  19
## 71   71  19
## 84   84  19
## 97   97  19
## 59   59  20
## 72   72  20
## 85   85  20
## 98   98  20
## 60   60  21
## 73   73  21
## 86   86  21
## 99   99  21
## 61   61  22
## 74   74  22
## 87   87  22
## 100 100  22
## 62   62  23
## 75   75  23
## 88   88  23
## 101 101  23
## 63   63  24
## 76   76  24
## 89   89  24
## 102 102  24
## 64   64  25
## 77   77  25
## 90   90  25
## 103 103  25
## 65   65  26
## 78   78  26
## 91   91  26
## 104 104  26
colnames(design) <- lev
rownames(design) = rownames(myP)
str(design)
##  num [1:104, 1:26] 1 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:104] "GSM1354976" "GSM1354977" "GSM1354978" "GSM1354979" ...
##   ..$ : chr [1:26] "Mock_1" "Mock_2" "Mock_3" "Mock_4" ...
##  - attr(*, "assign")= int [1:26] 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "contrasts")=List of 1
##   ..$ f: chr "contr.treatment"

Fit linear model for each gene given a series of arrays

fit <- lmFit(myE, design, weights=NULL)
summary(fit)
##                  Length Class  Mode     
## coefficients     623324 -none- numeric  
## rank                  1 -none- numeric  
## assign               26 -none- numeric  
## qr                    5 qr     list     
## df.residual       23974 -none- numeric  
## sigma             23974 -none- numeric  
## cov.coefficients    676 -none- numeric  
## stdev.unscaled   623324 -none- numeric  
## pivot                26 -none- numeric  
## Amean             23974 -none- numeric  
## method                1 -none- character
## design             2704 -none- numeric
colnames(fit$design)
##  [1] "Mock_1"                                  
##  [2] "Mock_2"                                  
##  [3] "Mock_3"                                  
##  [4] "Mock_4"                                  
##  [5] "Mock_5"                                  
##  [6] "Mock_6"                                  
##  [7] "Mock_7"                                  
##  [8] "Mock_8"                                  
##  [9] "Mock_9"                                  
## [10] "Mock_10"                                 
## [11] "Mock_11"                                 
## [12] "Mock_12"                                 
## [13] "Mock_13"                                 
## [14] "P._syringae_pv_tomato_DC3000_wildtype_1" 
## [15] "P._syringae_pv_tomato_DC3000_wildtype_2" 
## [16] "P._syringae_pv_tomato_DC3000_wildtype_3" 
## [17] "P._syringae_pv_tomato_DC3000_wildtype_4" 
## [18] "P._syringae_pv_tomato_DC3000_wildtype_5" 
## [19] "P._syringae_pv_tomato_DC3000_wildtype_6" 
## [20] "P._syringae_pv_tomato_DC3000_wildtype_7" 
## [21] "P._syringae_pv_tomato_DC3000_wildtype_8" 
## [22] "P._syringae_pv_tomato_DC3000_wildtype_9" 
## [23] "P._syringae_pv_tomato_DC3000_wildtype_10"
## [24] "P._syringae_pv_tomato_DC3000_wildtype_11"
## [25] "P._syringae_pv_tomato_DC3000_wildtype_12"
## [26] "P._syringae_pv_tomato_DC3000_wildtype_13"
#colnames(fit65046.comp$contrasts)
#which(fit65046.comp$contrasts!=0,arr.ind = T)

Plot few pairs

pairs(myE[1:1000,c(1,14,27,40)]-myE[1:1000,c(1,14,27,40)+52])

Construct the contrast matrix corresponding to specified contrasts of a set of parameters.

#     print(eval(parse(text = paste0('gse$',i,'[[1]]'))))
# http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf
contrast.matrix = makeContrasts(
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 1])) - 
    eval(parse(text = colnames(fit$design)[1])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 2])) - 
    eval(parse(text = colnames(fit$design)[2])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 3])) - 
    eval(parse(text = colnames(fit$design)[3])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 4])) - 
    eval(parse(text = colnames(fit$design)[4])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 5])) - 
    eval(parse(text = colnames(fit$design)[5])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 6])) - 
    eval(parse(text = colnames(fit$design)[6])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 7])) - 
    eval(parse(text = colnames(fit$design)[7])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 8])) - 
    eval(parse(text = colnames(fit$design)[8])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 9])) - 
    eval(parse(text = colnames(fit$design)[9])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 10])) - 
    eval(parse(text = colnames(fit$design)[10])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 11])) - 
    eval(parse(text = colnames(fit$design)[11])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 12])) - 
    eval(parse(text = colnames(fit$design)[12])),
  eval(parse(text = colnames(fit$design)[length(colnames(fit$design))/2 + 13])) - 
    eval(parse(text = colnames(fit$design)[13])),
  levels=design)

# same as
# contrast.matrix.b = makeContrasts(P._syringae_pv_tomato_DC3000_wildtype_1 - Mock_1,
#                                 P._syringae_pv_tomato_DC3000_wildtype_2 - Mock_2,
#                                 P._syringae_pv_tomato_DC3000_wildtype_3 - Mock_3,
#                                 P._syringae_pv_tomato_DC3000_wildtype_4 - Mock_4,
#                                 P._syringae_pv_tomato_DC3000_wildtype_5 - Mock_5,
#                                 P._syringae_pv_tomato_DC3000_wildtype_6 - Mock_6,
#                                 P._syringae_pv_tomato_DC3000_wildtype_7 - Mock_7,
#                                 P._syringae_pv_tomato_DC3000_wildtype_8 - Mock_8,
#                                 P._syringae_pv_tomato_DC3000_wildtype_9 - Mock_9,
#                                 P._syringae_pv_tomato_DC3000_wildtype_10 - Mock_10,
#                                 P._syringae_pv_tomato_DC3000_wildtype_11 - Mock_11,
#                                 P._syringae_pv_tomato_DC3000_wildtype_12 - Mock_12,
#                                 P._syringae_pv_tomato_DC3000_wildtype_13 - Mock_13,
#                                 levels=design)
# any(contrast.matrix.b != contrast.matrix)

Compute Contrasts from Linear Model Fit

fit.comp <- contrasts.fit(fit, contrast.matrix)

Empirical Bayes Statistics for Differential Expression

fit.comp <- eBayes(fit.comp)

Examine unadjusted p-values

plot(density(fit.comp$p.value))

Plot Venn diagrams per tp

# par(mfrow=c(2,2))
c_1_5 <- decideTests(fit.comp[, 1:5], adjust='BH', p.value=0.05)
vennDiagram(c_1_5, include=c("up", "down"), counts.col=c("red", "blue"), 
            cex=c(0.8, 0.8, 0.8), circle.col=rainbow(5), show.include = TRUE,
            main = "Differences 1-5", 
            names = c ("t1-t1", "t2-t2", "t3-t3", "t4-t4", "t5-t5"))

c_6_10 <- decideTests(fit.comp[, 6:10], adjust='BH', p.value=0.05)
vennDiagram(c_6_10, include=c("up", "down"), counts.col=c("red", "blue"), 
            cex=c(0.8, 0.8, 0.8), circle.col=rainbow(5), show.include = TRUE,
            main = "Differences 6-10", 
            names = c ("t6-t6", "t7-t7", "t8-t8", "t9-t9", "t10-t10"))

c_11_13 <- decideTests(fit.comp[, 11:13], adjust='BH', p.value=0.05)
vennDiagram(c_11_13, include=c("up", "down"), counts.col=c("red", "blue"), 
            cex=c(0.8, 0.8, 0.8), circle.col=rainbow(5), show.include = TRUE,
            main = "Differences 11-13", 
            names = c ("t11-t11", "t12-t12", "t13-t13"))

# par(mfrow=c(1,1))

Up/down-regulated genes in infected leaves

temp = decideTests(fit.comp, adjust='fdr', p.value=0.05)
colnames(temp) = paste0('tp_',seq(1,13,1))
summary(temp)
##     tp_1  tp_2  tp_3  tp_4  tp_5  tp_6  tp_7  tp_8  tp_9 tp_10 tp_11 tp_12
## -1     2  1211   588  2573  1982  3163  2950  4273  4348  5251  4883  4770
## 0  23972 21297 22461 18656 19997 17743 18217 15433 15002 13029 14043 14218
## 1      0  1466   925  2745  1995  3068  2807  4268  4624  5694  5048  4986
##    tp_13
## -1  5432
## 0  13068
## 1   5474

Create output directory

mainDir <- getwd()
subDir <- "outputDirectory_GSE56094"
if (file.exists(subDir)){
  setwd(file.path(mainDir, subDir))
} else {
  dir.create(file.path(mainDir, subDir))
  setwd(file.path(mainDir, subDir))
  
}

Write results

tmp = getwd()
i = 1
comparisons <- topTable(fit.comp, coef=i, adjust='fdr', sort='p', n=Inf)
colnames(comparisons) = paste0(colnames(comparisons), '_tp_00', i)
head(comparisons)
##           logFC_tp_001 AveExpr_tp_001  t_tp_001 P.Value_tp_001
## AT5G62280   -1.1174457       9.307739 -6.396803   9.079465e-09
## AT4G32280   -0.8364199       8.454607 -5.069194   2.412759e-06
## AT5G66580   -0.7290080       9.549609 -4.759518   8.182172e-06
## AT4G22470    1.8837634      14.546944  4.388996   3.350426e-05
## AT5G18060   -0.6052422       8.600483 -4.292388   4.790469e-05
## AT4G34250   -0.6732834       8.489078 -4.245183   5.695893e-05
##           adj.P.Val_tp_001   B_tp_001
## AT5G62280     0.0002176711  1.1211599
## AT4G32280     0.0289217474 -0.6615033
## AT5G66580     0.0653864669 -1.0691053
## AT4G22470     0.2008077617 -1.5466730
## AT5G18060     0.2275889119 -1.6689062
## AT4G34250     0.2275889119 -1.7282407
write.table(cbind(rownames(comparisons), comparisons), 
            sep="\t", quote=F, row.names = FALSE,
            file=paste0(tmp, "/DE_GSE56094_tp_", i, ".tsv"))
for (i in 2:dim(fit.comp$contrasts)[2]){
  comparison <- topTable(fit.comp, coef=i, adjust='fdr', sort='p', n=Inf)
  colnames(comparison) = paste0(colnames(comparison), '_tp_00', i)
  write.table(cbind(rownames(comparison), comparison), 
              sep="\t", quote=F, row.names = FALSE,
              file=paste0(tmp, "/DE_GSE56094_tp_", i, ".tsv"))
  comparisons = cbind(comparisons, comparison)
}
write.table(cbind(rownames(comparisons), comparisons), 
            sep="\t", quote=F, row.names = FALSE, 
            file=paste0(tmp, "/DE_GSE56094_comparisons.tsv"))

setwd(mainDir)

save.image(file="GSE56094.RData")

Article results: http://www.plantcell.org/content/suppl/2015/10/23/tpc.15.00471.DC1/TPC2015-00471-LSBR1_Supplemental_Dataset_1.xlsx

Supplemental Data. Lewis et al. (2015). Plant Cell 10.1105/tpc15.00471.

Supplemental Data Set 1a. LIMMA 3D trend data: inoculation with virulent DC3000 versus MgCl (mock control).

Up Regulated

Time points: 1 2 3 4 5 6 7 8 9 10 11 12 13
Total ids: 0 1712 991 3109 2309 3592 3210 5053 5405 6830 6007 5940 6539

Down Regulated

Time points: 1 2 3 4 5 6 7 8 9 10 11 12 13
Total ids: 3 1331 628 2897 2198 3631 3391 4965 5263 6175 5712 5588 6379