Ted Laderas
March 11, 2015
Hadley Wickham started building solutions to specific R problems as part of his dissertation work, expanding to form an empire within R.
ggplot2 is a graphical plotting package that utilizes a “grammar of graphics”:
ggplot(iris) + geom_point(aes(x = Sepal.Length, y = Sepal.Width))
# + theme_minimal()
library(ggplot2)
data(iris)
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length)) + theme_minimal()
aes() lets you map a variable to a visual property of the geom. Aesthetics can include:
Documentation for aes() is remarkably terrible, as it’s different for each geom. Look at the documentation for each geom for a list of mappable aesthetics.
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length, color=Species, size=Petal.Length))
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length, color=Petal.Length)) +
scale_color_continuous(low = "white", high = "green")
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length, size=Petal.Length, color=Species))
Common geoms to use are below. Each of these requires a different aesthetic mapping.
I will try to talk about two or three of them.
ggplot(iris) + geom_histogram(aes(x= Sepal.Width, fill=Species, group=Species))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(iris) + geom_violin(aes(x=Species, y=Sepal.Width))
Where ggplot2 becomes very powerful is that it allows you to layer geoms. Here we’re looking at a smoothed version of the data over all coordinates using loess smoothing.
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length)) +
geom_smooth(aes(x= Sepal.Width, y=Sepal.Length))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
Perhaps we want to visualize the data onto three separate plots (one for each species). We can do this with facets:
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length)) +
facet_grid(facets=Species ~ .)
For rows, we can change the facet_grid() order:
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length)) +
facet_grid(facets=. ~ Species)
The look of everything on the graph can be customized, including
See ?theme for more information. Also: http://sharpstatistics.co.uk/r/ggplot2-guide/
The default theme is called theme_grey(). You can see all of the properties of a theme by looking at it.
theme_grey()
## List of 40
## $ line :List of 4
## ..$ colour : chr "black"
## ..$ size : num 0.5
## ..$ linetype: num 1
## ..$ lineend : chr "butt"
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ rect :List of 4
## ..$ fill : chr "white"
## ..$ colour : chr "black"
## ..$ size : num 0.5
## ..$ linetype: num 1
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ text :List of 8
## ..$ family : chr ""
## ..$ face : chr "plain"
## ..$ colour : chr "black"
## ..$ size : num 12
## ..$ hjust : num 0.5
## ..$ vjust : num 0.5
## ..$ angle : num 0
## ..$ lineheight: num 0.9
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : chr "grey50"
## ..$ size :Class 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ strip.text :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size :Class 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.line : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.text.x :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.y :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.ticks :List of 4
## ..$ colour : chr "grey50"
## ..$ size : NULL
## ..$ linetype: NULL
## ..$ lineend : NULL
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ axis.title.x :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.y :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : num 90
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.ticks.length :Class 'unit' atomic [1:1] 0.15
## .. ..- attr(*, "unit")= chr "cm"
## .. ..- attr(*, "valid.unit")= int 1
## $ axis.ticks.margin :Class 'unit' atomic [1:1] 0.1
## .. ..- attr(*, "unit")= chr "cm"
## .. ..- attr(*, "valid.unit")= int 1
## $ legend.background :List of 4
## ..$ fill : NULL
## ..$ colour : logi NA
## ..$ size : NULL
## ..$ linetype: NULL
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ legend.margin :Class 'unit' atomic [1:1] 0.2
## .. ..- attr(*, "unit")= chr "cm"
## .. ..- attr(*, "valid.unit")= int 1
## $ legend.key :List of 4
## ..$ fill : chr "grey95"
## ..$ colour : chr "white"
## ..$ size : NULL
## ..$ linetype: NULL
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ legend.key.size :Class 'unit' atomic [1:1] 1.2
## .. ..- attr(*, "unit")= chr "lines"
## .. ..- attr(*, "valid.unit")= int 3
## $ legend.key.height : NULL
## $ legend.key.width : NULL
## $ legend.text :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size :Class 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.text.align : NULL
## $ legend.title :List of 8
## ..$ family : NULL
## ..$ face : chr "bold"
## ..$ colour : NULL
## ..$ size :Class 'rel' num 0.8
## ..$ hjust : num 0
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.title.align : NULL
## $ legend.position : chr "right"
## $ legend.direction : NULL
## $ legend.justification: chr "center"
## $ legend.box : NULL
## $ panel.background :List of 4
## ..$ fill : chr "grey90"
## ..$ colour : logi NA
## ..$ size : NULL
## ..$ linetype: NULL
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ panel.border : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ panel.grid.major :List of 4
## ..$ colour : chr "white"
## ..$ size : NULL
## ..$ linetype: NULL
## ..$ lineend : NULL
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ panel.grid.minor :List of 4
## ..$ colour : chr "grey95"
## ..$ size : num 0.25
## ..$ linetype: NULL
## ..$ lineend : NULL
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ panel.margin :Class 'unit' atomic [1:1] 0.25
## .. ..- attr(*, "unit")= chr "lines"
## .. ..- attr(*, "valid.unit")= int 3
## $ panel.margin.x : NULL
## $ panel.margin.y : NULL
## $ strip.background :List of 4
## ..$ fill : chr "grey80"
## ..$ colour : logi NA
## ..$ size : NULL
## ..$ linetype: NULL
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ strip.text.x :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ strip.text.y :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : num -90
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ plot.background :List of 4
## ..$ fill : NULL
## ..$ colour : chr "white"
## ..$ size : NULL
## ..$ linetype: NULL
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ plot.title :List of 8
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size :Class 'rel' num 1.2
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight: NULL
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ plot.margin :Class 'unit' atomic [1:4] 1 1 0.5 0.5
## .. ..- attr(*, "unit")= chr "lines"
## .. ..- attr(*, "valid.unit")= int 3
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi TRUE
To customize the theme, you must use elements to fill out the various properties of the theme list. Each of these properties maps to a specific element, either element_text, element_rect, or element_line.
For example, we want a white background and different fonts for the axis titles.
theme_new <- theme(panel.background = element_rect(fill = "white"),
axis.title.x = element_text(family = "Times New Roman"),
axis.title.y = element_text(family = "Arial"))
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length)) + theme_new
ggplot(iris) + geom_point(aes(x= Sepal.Width, y=Sepal.Length, color=Species)) +
scale_color_manual(values= c("blue", "purple", "gold"))
For bioinformaticians, we often work with a matrix like format where genes are rows and samples are columns. This “wide” data format is not really usable by ggplot.
We need to “melt” the data frame to make it easier for ggplot to work with. Our melted data frame will have one data point for row, and additional Gene and Sample Columns. First we load our data in.
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.1.2
expData <- data.frame(
Gene = c("G1","G2","G3","G4", "G5", "G6", "G7", "G8"),
Tumor1 = c( 5.6, 6.2, 5.6, 6.6, 1.3, 1.4, 2.1, 2.3),
Tumor2 = c(5.4, 6.1, 5.8, 6.2, 1.3, 1.8, 2.2, 2.1),
Normal1 = c(2.0, 2.5, 1.1, 2.8, 5.5, 5.8, 6.1, 5.7),
Normal2 = c(2.1, 2.6, 1.3, 2.4, 5.7, 5.4, 5.8, 5.9)
)
expData
## Gene Tumor1 Tumor2 Normal1 Normal2
## 1 G1 5.6 5.4 2.0 2.1
## 2 G2 6.2 6.1 2.5 2.6
## 3 G3 5.6 5.8 1.1 1.3
## 4 G4 6.6 6.2 2.8 2.4
## 5 G5 1.3 1.3 5.5 5.7
## 6 G6 1.4 1.8 5.8 5.4
## 7 G7 2.1 2.2 6.1 5.8
## 8 G8 2.3 2.1 5.7 5.9
Then we melt the data using the melt() function:
expDataMelt <- melt(expData, id.vars="Gene", variable.name="Sample",
value.name = "value")
expDataMelt
## Gene Sample value
## 1 G1 Tumor1 5.6
## 2 G2 Tumor1 6.2
## 3 G3 Tumor1 5.6
## 4 G4 Tumor1 6.6
## 5 G5 Tumor1 1.3
## 6 G6 Tumor1 1.4
## 7 G7 Tumor1 2.1
## 8 G8 Tumor1 2.3
## 9 G1 Tumor2 5.4
## 10 G2 Tumor2 6.1
## 11 G3 Tumor2 5.8
## 12 G4 Tumor2 6.2
## 13 G5 Tumor2 1.3
## 14 G6 Tumor2 1.8
## 15 G7 Tumor2 2.2
## 16 G8 Tumor2 2.1
## 17 G1 Normal1 2.0
## 18 G2 Normal1 2.5
## 19 G3 Normal1 1.1
## 20 G4 Normal1 2.8
## 21 G5 Normal1 5.5
## 22 G6 Normal1 5.8
## 23 G7 Normal1 6.1
## 24 G8 Normal1 5.7
## 25 G1 Normal2 2.1
## 26 G2 Normal2 2.6
## 27 G3 Normal2 1.3
## 28 G4 Normal2 2.4
## 29 G5 Normal2 5.7
## 30 G6 Normal2 5.4
## 31 G7 Normal2 5.8
## 32 G8 Normal2 5.9
ggplot(expDataMelt) + geom_tile(aes(x=Sample, y=Gene, fill=value)) +
scale_fill_gradient(low="white", high = "darkgreen")
Try different palette numbers here!
ggplot(expDataMelt) + geom_tile(aes(x=Sample, y=Gene, fill=value)) +
scale_fill_distiller(type="div", palette = 2)
You can change labels using the labs() function and you can also use one theme as a starting point and modify it as you wish.
ggplot(expDataMelt) + geom_tile(aes(x=Sample, y=Gene, fill=value)) +
scale_fill_gradient(low="white", high = "darkgreen") +
labs(x= "Sample Label", fill="Expression") +
theme_minimal() +
theme(axis.text.x = element_text(angle=90))
Let’s plot the expression values as a line plot. To do this we need to tell ggplot2 that we want to plot each line by gene group. Additionaly we want a facet for each gene.
ggplot(expDataMelt) + geom_line(aes(x=Sample, y=value, color=Gene, group=Gene)) +
facet_grid(facets= Gene ~.)
ggplot(drugmelt[drugmelt$variable == "S6_pS235_S236" & !is.na(drugmelt$Stim) &
drugmelt$Cell == "BT20" & !drugmelt$Drug %in% c("DAS", "MEKi") &
drugmelt$Time < 720,], aes(Time, value, group=DS, colour=DS)) +
geom_point() + geom_line() + facet_grid(Stim ~ Drug, margin=T) +
scale_x_continuous(breaks = c(30, 120, 480, 720)) +
theme(legend.position = "none", axis.text.x = element_text(angle=90))
ggplot2 is great for static graphics. But for interactive graphics that allow you to change parameters, you will need to learn ggvis.
The good news: ggvis uses a similar grammar. The bad news: everything has a different name. Learning how to parametrize a plot can be tricky.
This presentation is available here: http://bit.ly/1GJu8GV