--- title: "Code for generating the statistics and data figures presented in the publication 'Considerations for Implementing Electronic Laboratory Notebooks in an Academic Research Environment'" author: Stuart G. Higgins date: "`r format(Sys.time(), '%d %B %Y')`" output: html_notebook: theme: sandstone toc: true toc_depth: 5 toc_float: true --- This R Notebook supports the electronic laboratory notebook (ELN) suvey data shown in the publication: "Considerations for Implementing Electronic Laboratory Notebooks in an Academic Research Environment", S.G. Higgins, A.A. Nogiwa-Valdez, M.M. Stevens (2021). # Configure environment Load required packages: ```{r} library(here) library(tidyverse) library(plotly) library(htmlwidgets) ``` # Import data Load in survey data from file, recode 'ongoing' tags in the `date_defunct` column to the year 2021, calculate the total number of years active, and create a logical vector for each row determining whether the ELN is active or not in the year 2021: (Note: this Notebook expects file 'ELN_Review_Higgins_2021_Survey.csv' to be present in the same directory as the working directory identified by the `here` package) ```{r} data <- read_csv(here("ELN_Review_Higgins_2021_Survey.csv")) %>% mutate(date_defunct_numeric = as.numeric(replace(date_defunct, date_defunct == "ongoing", 2021)), years_active = date_defunct_numeric - date_released, defunct_in_2021 = case_when( date_defunct == "ongoing" ~ FALSE, date_defunct == 2021 ~ TRUE, TRUE ~ TRUE ), row_number = row_number()) ``` # Generate statistics How many ELNs were surveyed? ```{r} data %>% count() ``` How many of the ELNs surveyed are active (FALSE) or defunct (TRUE) in 2021? ```{r} data %>% count(defunct_in_2021) ``` What is the average (and spread) of the lifetime (`years_active`) of the ELNs surveyed? (Note: the median absolute estimate here has a default scaling constant of 1.4826, so that it acts as as a consistent estimator of the standard deviation) ```{r} data %>% summarise(mean_years_active = mean(years_active), sd_years_active = sd(years_active), median_years_active = median(years_active), mad_years_active = mad(years_active), iqr_years_active = IQR(years_active), range_years_active = max(years_active)-min(years_active)) ``` What are the average and spread of the lifetimes of ELNs, sub-divided by codebase? ```{r} data %>% group_by(codebase) %>% summarise(mean_years_active = mean(years_active), sd_years_active = sd(years_active), median_years_active = median(years_active), mad_years_active = mad(years_active), iqr_years_active = IQR(years_active), range_years_active = max(years_active)-min(years_active)) ``` How many of the ELNs surveyed have open-source or proprietary codebases? ```{r} data %>% count(codebase) ``` Which are the longest running proprietary and open source ELNs (in the survey data)? ```{r} data %>% group_by(codebase) %>% slice_max(n=1, order_by=years_active) %>% select(product_name, manufacturer, years_active, date_defunct, codebase) ``` # Generate figures Define a theme for plotting figures: ```{r} mytheme <- theme_bw() + theme( panel.background = element_rect(fill = "white", colour = "black", size = 2), panel.grid.minor = element_blank(), panel.grid.major = element_blank(), text = element_text(size = 25, face = "plain", colour = "black"), axis.title.x = element_text(size = 25, face = "plain"), axis.title.y = element_text(size = 25), element_line(size = 2), axis.ticks.length = unit(0.15, "cm")) ``` Define functions for customising the appearance of plotted figures: ```{r} get_point_colour <- function(x){ ifelse(x==TRUE, "grey", "grey30") } get_line_colour <- function(x){ ifelse(x!="opensource", "#0072B2", "#CC79A7") } ``` Produce the timeline plot featured in Figure 1 of the main manuscript: ```{r} p_timeline <- data %>% mutate(row_number = as_factor(row_number)) %>% mutate(row_number = fct_reorder(fct_reorder(row_number, years_active, .desc=FALSE), codebase, .desc=FALSE)) %>% mutate(row_number_new = as.numeric(row_number)) %>% ggplot() + geom_segment(aes(x=date_released, xend=date_defunct_numeric,y=row_number_new, yend=row_number_new), colour=get_line_colour(data$codebase), linetype="solid", size=0.5) + geom_point(aes(x=date_released, y=row_number_new), colour=get_point_colour(data$defunct_in_2021), shape=1, size=2 ) + geom_point(aes(x=date_defunct_numeric, y=row_number_new), colour=get_point_colour(data$defunct_in_2021), shape=16, size=0.5) + scale_x_continuous(position="bottom", breaks=c(seq(1980,2021,5))) + coord_cartesian(xlim=c(1980,2021)) + theme_bw() + theme( plot.margin = margin(0.1, 0.1, 0.1, 0.1, "cm"), panel.border = element_blank(), panel.grid.major.y = element_line(colour="grey95", size=0.25), panel.grid.major.x = element_line(colour="grey95", size=0.25), panel.grid.minor.x = element_line(colour="grey95", size=0.25), axis.text.y = element_blank(), axis.text.x = element_blank(), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(), legend.position = "bottom" ) print(p_timeline) ggsave(here("ELN_Review_Higgins_2021_Timeline.pdf"), plot=p_timeline, width=18.0, height=10, device="pdf", dpi=600, units="cm") ``` Generate a separate interactive version of the timeline shown in Figure 1, and export as a standalone HTML file using `plotly` and `htmlwidgets`: ```{r} p_interactive <- data %>% mutate(row_number = as_factor(row_number)) %>% mutate(row_number = fct_reorder(fct_reorder(row_number, years_active, .desc=FALSE), codebase, .desc=FALSE)) %>% mutate(row_number_new = as.numeric(row_number)) %>% mutate(codebase_name = recode(codebase, proprietary = "Proprietary", opensource = "Open-source"), marker_colour = if_else(defunct_in_2021 == TRUE, "#C0C0C0", "#4d4d4d"), hovertext = paste0("", product_name, "
Date released: ", date_released, "
Date defunct: ", date_defunct, "") ) %>% plot_ly() %>% add_segments( x = ~date_released, y = ~row_number_new, xend = ~date_defunct_numeric, yend ~row_number_new, color = ~factor(codebase_name), colors = c("#CC79A7", "#0072B2") ) %>% add_markers( x = ~date_released, y = ~row_number_new, text = ~product_name, name = "Date released", customdata = ~hovertext, hovertemplate = "%{customdata}", marker = list( symbol = "circle-open", size = 10, color = ~marker_colour ), inherit = FALSE ) %>% add_markers( x = ~date_defunct_numeric, y = ~row_number_new, text = ~product_name, name = "Date defunct", customdata = ~hovertext, hovertemplate = "%{customdata}", marker = list( size = 3, color = ~marker_colour ) ) %>% layout( title = "How long do electronic laboratory notebooks last?", xaxis = list(title = "Timeline of ELN products"), yaxis = list(title = "Lifetimes of surveyed ELN products", showticklabels = FALSE, zeroline = FALSE, showline = FALSE) ) saveWidget(partial_bundle(p_interactive), here("ELN_Review_Higgins_2021_Lifetimes_Interactive_Figure1.html"), selfcontained = TRUE, title = "ELN survey") ``` Generate a plot showing the total number of new proprietary and open-source ELNs per year, as featured in Figure 1: (Note: axes for this plot were manually appended late in graphics software) ```{r} data_summarised <- data %>% group_by(date_released, codebase) %>% summarise(count = n(), .groups="drop_last") p_releases <- data_summarised %>% ggplot(aes(x=date_released, y=as.factor(codebase), size=count)) + geom_point(shape=21, fill=get_line_colour(data_summarised$codebase),alpha=0.5) + scale_size(range=c(1,10)) + scale_x_continuous(position="bottom", breaks=c(seq(1980,2021,5))) + coord_cartesian(xlim=c(1980,2021)) + theme_bw() + theme( plot.margin = margin(0.1, 0.1, 0.1, 0.1, "cm"), panel.border = element_blank(), panel.grid.major.y = element_blank(), panel.grid.major.x = element_line(colour="grey95", size=0.25), panel.grid.minor.x = element_line(colour="grey95", size=0.25), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), legend.position = "none" ) print(p_releases) ggsave(here("ELN_Review_Higgins_2021_Releases-Per-Year.pdf"), plot=p_releases, width=18.0, height=2.5, device="pdf", dpi=600, units="cm") ``` # Session information ```{r} sessionInfo() ```