Last updated on Wed Sep 9 11:33:34 2020.
How to re-use this work
If you use these figures in your own work, please cite this website: https://doi.org/10.5281/zenodo.3470119
# Load required packages
library(magrittr)
library(dplyr)
library(jsonlite)
library(forcats)
library(ggplot2)
library(plotly)
library(here)
library(lubridate)
# We need two queries: one for uppercase titles, the other for lowercase ones
pdb_query <- 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=title:*SARS-CoV-2*%20AND%20status:REL&fl=pdb_id,deposition_date,citation_year,title,experimental_method,resolution,organism_scientific_name,molecule_name,molecule_type&rows=1000000&wt=json'
# This is a helper pipeline to extract data
dig_up_data <- . %>%
.$response %>%
.$docs %>%
as_tibble()
# Query the PDB and clean up data
pdb_data <- pdb_query %>%
fromJSON() %>%
dig_up_data() %>%
bind_rows() %>%
mutate(
experimental_method = as_factor(as.character(experimental_method)),
deposition_date = as_date(ymd_hms(deposition_date)),
citation_year = as.integer(citation_year),
molecule_name = as.character(molecule_name),
molecule_type = as_factor(as.character(molecule_type))
) %>%
distinct(pdb_id, .keep_all = TRUE)
About this section
With the ongoing COVID-19 pandemic, new structures of proteins from SARS-CoV-2 are deposited to the PDB at a fast pace. The visualizations presented here wonโt help curb the pandemic (and are not medical advice, obviously), but I hope they will make curious and helpless minds (like myself) get a feel for the research effort currently being deployed against it.
The following resources are a lot more relevant and actionable:
SARS-CoV-2 structures by deposition date
All figures are interactive (you can zoom in, and hovering over elements will show more information).
sars_cov2_structures_year <- pdb_data %>%
ggplot() +
geom_bar(mapping = aes(x = deposition_date)) +
ggtitle("Structures of SARS-CoV-2 by deposition date") +
xlab("Deposition date") +
ylab("Number of PDB entries") +
theme_bw()
ggplotly(sars_cov2_structures_year)
The two noticeable spikes (68 structures deposited on March 15th and 13 structures deposited on April 7th) come from a fragment screening experiment performed at Diamond Light Source. All these structures are the same protein (main protease) in complex with candidate molecules for drug design.
Download figure in SVG format
SARS-CoV-2 structures by experimental method
sars_cov2_structures_method <- pdb_data %>%
ggplot() +
geom_bar(mapping = aes(x = experimental_method, fill = experimental_method)) +
guides(fill = guide_legend(title = "Experimental method")) +
ggtitle("Structures of SARS-CoV-2 by experimental method") +
xlab("") +
ylab("Number of PDB entries") +
theme_bw()
ggplotly(sars_cov2_structures_method)
Download figure in SVG format
Dataset
The graphs presented above are derived from the following dataset:
# Format table for display
pdb_table <- pdb_data %>%
arrange(desc(citation_year)) %>%
select(`PDB code` = pdb_id,
`Citation year` = citation_year,
`Experimental method` = experimental_method,
Title = title)
pdb_table
Download raw dataset in JSON format
LS0tCnRpdGxlOiAiU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIgotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQobWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0UpCmBgYAoKKipMYXN0IHVwZGF0ZWQgb24gYHIgZGF0ZSgpYC4qKgoKIyMgSG93IHRvIHJlLXVzZSB0aGlzIHdvcmsKCklmIHlvdSB1c2UgdGhlc2UgZmlndXJlcyBpbiB5b3VyIG93biB3b3JrLCBwbGVhc2UgY2l0ZSB0aGlzIHdlYnNpdGU6CjxodHRwczovL2RvaS5vcmcvMTAuNTI4MS96ZW5vZG8uMzQ3MDExOT4KCmBgYHtyIExvYWQgcGFja2FnZXMsIGRvd25sb2FkIGRhdGEgYW5kIHByZXBhcmUgZGF0YXNldH0KIyBMb2FkIHJlcXVpcmVkIHBhY2thZ2VzCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoanNvbmxpdGUpCmxpYnJhcnkoZm9yY2F0cykKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeShoZXJlKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKCiMgV2UgbmVlZCB0d28gcXVlcmllczogb25lIGZvciB1cHBlcmNhc2UgdGl0bGVzLCB0aGUgb3RoZXIgZm9yIGxvd2VyY2FzZSBvbmVzCnBkYl9xdWVyeSA8LSAnaHR0cHM6Ly93d3cuZWJpLmFjLnVrL3BkYmUvc2VhcmNoL3BkYi9zZWxlY3Q/cT10aXRsZToqU0FSUy1Db1YtMiolMjBBTkQlMjBzdGF0dXM6UkVMJmZsPXBkYl9pZCxkZXBvc2l0aW9uX2RhdGUsY2l0YXRpb25feWVhcix0aXRsZSxleHBlcmltZW50YWxfbWV0aG9kLHJlc29sdXRpb24sb3JnYW5pc21fc2NpZW50aWZpY19uYW1lLG1vbGVjdWxlX25hbWUsbW9sZWN1bGVfdHlwZSZyb3dzPTEwMDAwMDAmd3Q9anNvbicKCiMgVGhpcyBpcyBhIGhlbHBlciBwaXBlbGluZSB0byBleHRyYWN0IGRhdGEKZGlnX3VwX2RhdGEgPC0gLiAlPiUKICAgIC4kcmVzcG9uc2UgJT4lCiAgICAuJGRvY3MgJT4lCiAgICBhc190aWJibGUoKQoKIyBRdWVyeSB0aGUgUERCIGFuZCBjbGVhbiB1cCBkYXRhCnBkYl9kYXRhIDwtIHBkYl9xdWVyeSAlPiUgCiAgICBmcm9tSlNPTigpICU+JSAKICAgIGRpZ191cF9kYXRhKCkgJT4lIAogICAgYmluZF9yb3dzKCkgJT4lIAogICAgbXV0YXRlKAogICAgICBleHBlcmltZW50YWxfbWV0aG9kICAgPSBhc19mYWN0b3IoYXMuY2hhcmFjdGVyKGV4cGVyaW1lbnRhbF9tZXRob2QpKSwKICAgICAgZGVwb3NpdGlvbl9kYXRlICAgICAgID0gYXNfZGF0ZSh5bWRfaG1zKGRlcG9zaXRpb25fZGF0ZSkpLAogICAgICBjaXRhdGlvbl95ZWFyICAgICAgICAgPSBhcy5pbnRlZ2VyKGNpdGF0aW9uX3llYXIpLAogICAgICBtb2xlY3VsZV9uYW1lICAgICAgICAgPSBhcy5jaGFyYWN0ZXIobW9sZWN1bGVfbmFtZSksCiAgICAgIG1vbGVjdWxlX3R5cGUgICAgICAgICA9IGFzX2ZhY3Rvcihhcy5jaGFyYWN0ZXIobW9sZWN1bGVfdHlwZSkpCiAgICAgICkgJT4lIAogICAgZGlzdGluY3QocGRiX2lkLCAua2VlcF9hbGwgPSBUUlVFKQpgYGAKCiMjIEFib3V0IHRoaXMgc2VjdGlvbgoKV2l0aCB0aGUgb25nb2luZyBbQ09WSUQtMTkKcGFuZGVtaWNdKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93aWtpL0NPVklELTE5X3BhbmRlbWljKSwgbmV3IHN0cnVjdHVyZXMgb2YKcHJvdGVpbnMgZnJvbQpbU0FSUy1Db1YtMl0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvU2V2ZXJlX2FjdXRlX3Jlc3BpcmF0b3J5X3N5bmRyb21lX2Nvcm9uYXZpcnVzXzIpIGFyZSBkZXBvc2l0ZWQgdG8gdGhlIFBEQiBhdCBhIGZhc3QgcGFjZS4gVGhlIHZpc3VhbGl6YXRpb25zIHByZXNlbnRlZCBoZXJlCndvbid0IGhlbHAgY3VyYiB0aGUgcGFuZGVtaWMgKGFuZCBhcmUgbm90IG1lZGljYWwgYWR2aWNlLCBvYnZpb3VzbHkpLCBidXQgSSBob3BlCnRoZXkgd2lsbCBtYWtlIGN1cmlvdXMgYW5kIGhlbHBsZXNzIG1pbmRzIChsaWtlIG15c2VsZikgZ2V0IGEgZmVlbCBmb3IgdGhlCnJlc2VhcmNoIGVmZm9ydCBjdXJyZW50bHkgYmVpbmcgZGVwbG95ZWQgYWdhaW5zdCBpdC4KClRoZSBmb2xsb3dpbmcgcmVzb3VyY2VzIGFyZSBhIGxvdCBtb3JlIHJlbGV2YW50IGFuZCBhY3Rpb25hYmxlOgoKLSBbQ29yb25hdmlydXMgU3RydWN0dXJhbCBUYXNrIEZvcmNlXShodHRwczovL2luc2lkZWNvcm9uYS5uZXQvKQotIFtQREJlIENPVklELTE5IERhdGEgUG9ydGFsXShodHRwczovL3d3dy5lYmkuYWMudWsvcGRiZS9jb3ZpZC0xOSkKLSBbQ3Jvd2RmaWdodCBDT1ZJRC0xOV0oaHR0cHM6Ly9jcm93ZGZpZ2h0Y292aWQxOS5vcmcvKQoKIyMgU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIGJ5IGRlcG9zaXRpb24gZGF0ZQoKQWxsIGZpZ3VyZXMgYXJlIGludGVyYWN0aXZlICh5b3UgY2FuIHpvb20gaW4sIGFuZCBob3ZlcmluZyBvdmVyIGVsZW1lbnRzIHdpbGwKc2hvdyBtb3JlIGluZm9ybWF0aW9uKS4KCmBgYHtyIFNBUlMtQ29WLTIgc3RydWN0dXJlcyBieSB5ZWFyfQpzYXJzX2NvdjJfc3RydWN0dXJlc195ZWFyIDwtIHBkYl9kYXRhICU+JSAKICAgIGdncGxvdCgpICsKICAgIGdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IGRlcG9zaXRpb25fZGF0ZSkpICsKICAgIGdndGl0bGUoIlN0cnVjdHVyZXMgb2YgU0FSUy1Db1YtMiBieSBkZXBvc2l0aW9uIGRhdGUiKSArCiAgICB4bGFiKCJEZXBvc2l0aW9uIGRhdGUiKSArCiAgICB5bGFiKCJOdW1iZXIgb2YgUERCIGVudHJpZXMiKSArCiAgICB0aGVtZV9idygpCmdncGxvdGx5KHNhcnNfY292Ml9zdHJ1Y3R1cmVzX3llYXIpCmBgYAoKVGhlIHR3byBub3RpY2VhYmxlIHNwaWtlcyAoNjggc3RydWN0dXJlcyBkZXBvc2l0ZWQgb24gTWFyY2ggMTV0aCBhbmQgMTMKc3RydWN0dXJlcyBkZXBvc2l0ZWQgb24gQXByaWwgN3RoKSBjb21lIGZyb20gW2EgZnJhZ21lbnQgc2NyZWVuaW5nCmV4cGVyaW1lbnRdKGh0dHBzOi8vd3d3LmRpYW1vbmQuYWMudWsvY292aWQtMTkvZm9yLXNjaWVudGlzdHMvTWFpbi1wcm90ZWFzZS1zdHJ1Y3R1cmUtYW5kLVhDaGVtLmh0bWwpCnBlcmZvcm1lZCBhdCBEaWFtb25kIExpZ2h0IFNvdXJjZS4gQWxsIHRoZXNlIHN0cnVjdHVyZXMgYXJlIHRoZSBzYW1lIHByb3RlaW4KKG1haW4gcHJvdGVhc2UpIGluIGNvbXBsZXggd2l0aCBjYW5kaWRhdGUgbW9sZWN1bGVzIGZvciBkcnVnIGRlc2lnbi4KCmBgYHtyIFNhdmUgU1ZHIGZpbGUgb2YgeWVhciBncmFwaCwgaW5jbHVkZT1GQUxTRX0KIyBTYXZlIGZpZ3VyZSBmb3IgZG93bmxvYWQKaWYgKCFkaXIuZXhpc3RzKGhlcmUoImZpZ3VyZXMiKSkpIHsKICAgIGRpci5jcmVhdGUoaGVyZSgiZmlndXJlcyIpKQp9Cmdnc2F2ZShmaWxlbmFtZSA9ICJzYXJzLWNvdjItc3RydWN0dXJlcy1ieS15ZWFyLnN2ZyIsCiAgICAgICBwbG90ID0gc2Fyc19jb3YyX3N0cnVjdHVyZXNfeWVhciwKICAgICAgIGRldmljZSA9ICJzdmciLAogICAgICAgcGF0aCA9IGhlcmUoImZpZ3VyZXMiKSkKYGBgCgpbKipEb3dubG9hZCBmaWd1cmUgaW4gU1ZHIGZvcm1hdCoqXShmaWd1cmVzL3NhcnMtY292Mi1zdHJ1Y3R1cmVzLWJ5LXllYXIuc3ZnKQoKIyMgU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIGJ5IGV4cGVyaW1lbnRhbCBtZXRob2QKCmBgYHtyIFNBUlMtQ29WLTIgc3RydWN0dXJlcyBieSBleHBlcmltZW50YWwgbWV0aG9kfQpzYXJzX2NvdjJfc3RydWN0dXJlc19tZXRob2QgPC0gcGRiX2RhdGEgJT4lIAogICAgZ2dwbG90KCkgKwogICAgZ2VvbV9iYXIobWFwcGluZyA9IGFlcyh4ID0gZXhwZXJpbWVudGFsX21ldGhvZCwgZmlsbCA9IGV4cGVyaW1lbnRhbF9tZXRob2QpKSArCiAgICBndWlkZXMoZmlsbCA9IGd1aWRlX2xlZ2VuZCh0aXRsZSA9ICJFeHBlcmltZW50YWwgbWV0aG9kIikpICsKICAgIGdndGl0bGUoIlN0cnVjdHVyZXMgb2YgU0FSUy1Db1YtMiBieSBleHBlcmltZW50YWwgbWV0aG9kIikgKwogICAgeGxhYigiIikgKwogICAgeWxhYigiTnVtYmVyIG9mIFBEQiBlbnRyaWVzIikgKwogICAgdGhlbWVfYncoKQpnZ3Bsb3RseShzYXJzX2NvdjJfc3RydWN0dXJlc19tZXRob2QpCmBgYAoKYGBge3IgU2F2ZSBTVkcgZmlsZSBvZiBtZXRob2QgZ3JhcGgsIGluY2x1ZGU9RkFMU0V9CiMgU2F2ZSBmaWd1cmUgZm9yIGRvd25sb2FkCmlmICghZGlyLmV4aXN0cyhoZXJlKCJmaWd1cmVzIikpKSB7CiAgICBkaXIuY3JlYXRlKGhlcmUoImZpZ3VyZXMiKSkKfQpnZ3NhdmUoZmlsZW5hbWUgPSAic2Fycy1jb3YyLXN0cnVjdHVyZXMtYnktbWV0aG9kLnN2ZyIsCiAgICAgICBwbG90ID0gc2Fyc19jb3YyX3N0cnVjdHVyZXNfbWV0aG9kLAogICAgICAgZGV2aWNlID0gInN2ZyIsCiAgICAgICBwYXRoID0gaGVyZSgiZmlndXJlcyIpKQpgYGAKClsqKkRvd25sb2FkIGZpZ3VyZSBpbiBTVkcgZm9ybWF0KipdKGZpZ3VyZXMvc2Fycy1jb3YyLXN0cnVjdHVyZXMtYnktbWV0aG9kLnN2ZykKCgojIyBEYXRhc2V0CgpUaGUgZ3JhcGhzIHByZXNlbnRlZCBhYm92ZSBhcmUgZGVyaXZlZCBmcm9tIHRoZSBmb2xsb3dpbmcgZGF0YXNldDoKCmBgYHtyIEVudGlyZSBkYXRhc2V0fQojIEZvcm1hdCB0YWJsZSBmb3IgZGlzcGxheQpwZGJfdGFibGUgPC0gcGRiX2RhdGEgJT4lIAogICAgYXJyYW5nZShkZXNjKGNpdGF0aW9uX3llYXIpKSAlPiUgCiAgICBzZWxlY3QoYFBEQiBjb2RlYCA9IHBkYl9pZCwKICAgICAgICAgICBgQ2l0YXRpb24geWVhcmAgPSBjaXRhdGlvbl95ZWFyLAogICAgICAgICAgIGBFeHBlcmltZW50YWwgbWV0aG9kYCA9IGV4cGVyaW1lbnRhbF9tZXRob2QsCiAgICAgICAgICAgVGl0bGUgPSB0aXRsZSkKcGRiX3RhYmxlCmBgYAoKYGBge3IgU2F2ZSBkYXRhc2V0IGFzIEpTT04sIGluY2x1ZGU9RkFMU0V9CiMgU2F2ZSBkYXRhc2V0IGZvciBkb3dubG9hZAppZiAoIWRpci5leGlzdHMoaGVyZSgiZGF0YXNldHMiKSkpIHsKICAgIGRpci5jcmVhdGUoaGVyZSgiZGF0YXNldHMiKSkKfQoKd3JpdGVfanNvbihwZGJfZGF0YSwgaGVyZSgiZGF0YXNldHMiLCAic2Fycy1jb3YyLXN0cnVjdHVyZXMuanNvbiIpKQpgYGAKClsqKkRvd25sb2FkIHJhdyBkYXRhc2V0IGluIEpTT04gZm9ybWF0KipdKGRhdGFzZXRzL3NhcnMtY292Mi1zdHJ1Y3R1cmVzLmpzb24pCg==