---
title: "Code for generating the statistics and data figures presented in the publication 'Considerations for Implementing Electronic Laboratory Notebooks in an Academic Research Environment'"
author: Stuart G. Higgins
date: "`r format(Sys.time(), '%d %B %Y')`"
output:
html_notebook:
theme: sandstone
toc: true
toc_depth: 5
toc_float: true
---
This R Notebook supports the electronic laboratory notebook (ELN) suvey data shown in the publication: "Considerations for Implementing Electronic Laboratory Notebooks in an Academic Research Environment", S.G. Higgins, A.A. Nogiwa-Valdez, M.M. Stevens (2021).
# Configure environment
Load required packages:
```{r}
library(here)
library(tidyverse)
library(plotly)
library(htmlwidgets)
```
# Import data
Load in survey data from file, recode 'ongoing' tags in the `date_defunct` column to the year 2021, calculate the total number of years active, and create a logical vector for each row determining whether the ELN is active or not in the year 2021: (Note: this Notebook expects file 'ELN_Review_Higgins_2021_Survey.csv' to be present in the same directory as the working directory identified by the `here` package)
```{r}
data <-
read_csv(here("ELN_Review_Higgins_2021_Survey.csv")) %>%
mutate(date_defunct_numeric = as.numeric(replace(date_defunct, date_defunct == "ongoing", 2021)),
years_active = date_defunct_numeric - date_released,
defunct_in_2021 =
case_when(
date_defunct == "ongoing" ~ FALSE,
date_defunct == 2021 ~ TRUE,
TRUE ~ TRUE
),
row_number = row_number())
```
# Generate statistics
How many ELNs were surveyed?
```{r}
data %>%
count()
```
How many of the ELNs surveyed are active (FALSE) or defunct (TRUE) in 2021?
```{r}
data %>%
count(defunct_in_2021)
```
What is the average (and spread) of the lifetime (`years_active`) of the ELNs surveyed? (Note: the median absolute estimate here has a default scaling constant of 1.4826, so that it acts as as a consistent estimator of the standard deviation)
```{r}
data %>%
summarise(mean_years_active = mean(years_active),
sd_years_active = sd(years_active),
median_years_active = median(years_active),
mad_years_active = mad(years_active),
iqr_years_active = IQR(years_active),
range_years_active = max(years_active)-min(years_active))
```
What are the average and spread of the lifetimes of ELNs, sub-divided by codebase?
```{r}
data %>%
group_by(codebase) %>%
summarise(mean_years_active = mean(years_active),
sd_years_active = sd(years_active),
median_years_active = median(years_active),
mad_years_active = mad(years_active),
iqr_years_active = IQR(years_active),
range_years_active = max(years_active)-min(years_active))
```
How many of the ELNs surveyed have open-source or proprietary codebases?
```{r}
data %>%
count(codebase)
```
Which are the longest running proprietary and open source ELNs (in the survey data)?
```{r}
data %>%
group_by(codebase) %>%
slice_max(n=1, order_by=years_active) %>%
select(product_name, manufacturer, years_active, date_defunct, codebase)
```
# Generate figures
Define a theme for plotting figures:
```{r}
mytheme <-
theme_bw() +
theme(
panel.background = element_rect(fill = "white", colour = "black", size = 2),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
text = element_text(size = 25, face = "plain", colour = "black"),
axis.title.x = element_text(size = 25, face = "plain"),
axis.title.y = element_text(size = 25),
element_line(size = 2),
axis.ticks.length = unit(0.15, "cm"))
```
Define functions for customising the appearance of plotted figures:
```{r}
get_point_colour <- function(x){
ifelse(x==TRUE, "grey", "grey30")
}
get_line_colour <- function(x){
ifelse(x!="opensource", "#0072B2", "#CC79A7")
}
```
Produce the timeline plot featured in Figure 1 of the main manuscript:
```{r}
p_timeline <-
data %>%
mutate(row_number = as_factor(row_number)) %>%
mutate(row_number = fct_reorder(fct_reorder(row_number, years_active, .desc=FALSE), codebase, .desc=FALSE)) %>%
mutate(row_number_new = as.numeric(row_number)) %>%
ggplot() +
geom_segment(aes(x=date_released, xend=date_defunct_numeric,y=row_number_new, yend=row_number_new),
colour=get_line_colour(data$codebase),
linetype="solid",
size=0.5) +
geom_point(aes(x=date_released, y=row_number_new), colour=get_point_colour(data$defunct_in_2021), shape=1, size=2 ) +
geom_point(aes(x=date_defunct_numeric, y=row_number_new), colour=get_point_colour(data$defunct_in_2021), shape=16, size=0.5) +
scale_x_continuous(position="bottom", breaks=c(seq(1980,2021,5))) +
coord_cartesian(xlim=c(1980,2021)) +
theme_bw() +
theme(
plot.margin = margin(0.1, 0.1, 0.1, 0.1, "cm"),
panel.border = element_blank(),
panel.grid.major.y = element_line(colour="grey95", size=0.25),
panel.grid.major.x = element_line(colour="grey95", size=0.25),
panel.grid.minor.x = element_line(colour="grey95", size=0.25),
axis.text.y = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom"
)
print(p_timeline)
ggsave(here("ELN_Review_Higgins_2021_Timeline.pdf"), plot=p_timeline, width=18.0, height=10, device="pdf", dpi=600, units="cm")
```
Generate a separate interactive version of the timeline shown in Figure 1, and export as a standalone HTML file using `plotly` and `htmlwidgets`:
```{r}
p_interactive <-
data %>%
mutate(row_number = as_factor(row_number)) %>%
mutate(row_number = fct_reorder(fct_reorder(row_number, years_active, .desc=FALSE), codebase, .desc=FALSE)) %>%
mutate(row_number_new = as.numeric(row_number)) %>%
mutate(codebase_name = recode(codebase, proprietary = "Proprietary", opensource = "Open-source"),
marker_colour = if_else(defunct_in_2021 == TRUE, "#C0C0C0", "#4d4d4d"),
hovertext =
paste0("",
product_name,
"
Date released: ",
date_released,
"
Date defunct: ",
date_defunct,
"")
) %>%
plot_ly() %>%
add_segments(
x = ~date_released, y = ~row_number_new,
xend = ~date_defunct_numeric, yend ~row_number_new,
color = ~factor(codebase_name),
colors = c("#CC79A7", "#0072B2")
) %>%
add_markers(
x = ~date_released,
y = ~row_number_new,
text = ~product_name,
name = "Date released",
customdata = ~hovertext,
hovertemplate = "%{customdata}",
marker =
list(
symbol = "circle-open",
size = 10,
color = ~marker_colour
),
inherit = FALSE
) %>%
add_markers(
x = ~date_defunct_numeric,
y = ~row_number_new,
text = ~product_name,
name = "Date defunct",
customdata = ~hovertext,
hovertemplate = "%{customdata}",
marker =
list(
size = 3,
color = ~marker_colour
)
) %>%
layout(
title = "How long do electronic laboratory notebooks last?",
xaxis = list(title = "Timeline of ELN products"),
yaxis = list(title = "Lifetimes of surveyed ELN products",
showticklabels = FALSE,
zeroline = FALSE,
showline = FALSE)
)
saveWidget(partial_bundle(p_interactive), here("ELN_Review_Higgins_2021_Lifetimes_Interactive_Figure1.html"), selfcontained = TRUE, title = "ELN survey")
```
Generate a plot showing the total number of new proprietary and open-source ELNs per year, as featured in Figure 1: (Note: axes for this plot were manually appended late in graphics software)
```{r}
data_summarised <-
data %>%
group_by(date_released, codebase) %>%
summarise(count = n(), .groups="drop_last")
p_releases <-
data_summarised %>%
ggplot(aes(x=date_released, y=as.factor(codebase), size=count)) +
geom_point(shape=21, fill=get_line_colour(data_summarised$codebase),alpha=0.5) +
scale_size(range=c(1,10)) +
scale_x_continuous(position="bottom", breaks=c(seq(1980,2021,5))) +
coord_cartesian(xlim=c(1980,2021)) +
theme_bw() +
theme(
plot.margin = margin(0.1, 0.1, 0.1, 0.1, "cm"),
panel.border = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(colour="grey95", size=0.25),
panel.grid.minor.x = element_line(colour="grey95", size=0.25),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
legend.position = "none"
)
print(p_releases)
ggsave(here("ELN_Review_Higgins_2021_Releases-Per-Year.pdf"), plot=p_releases, width=18.0, height=2.5, device="pdf", dpi=600, units="cm")
```
# Session information
```{r}
sessionInfo()
```