{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2018-05-12T02:11:29.718225+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 12, "enabled": true, "entries": { "part-i-chemical-disease-path-theme-distributions.txt.gz": { "checksum": "md5:a36e2e9516ce1d3ebcfa921b5c10467a", "ext": "gz", "id": "6d8d8c65-b034-4b0f-92de-ac72b18b06ca", "key": "part-i-chemical-disease-path-theme-distributions.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 70016566 }, "part-i-chemical-gene-path-theme-distributions.txt.gz": { "checksum": "md5:4f02054eec8b92fafada5cb7098ac7dc", "ext": "gz", "id": "dca229c9-f7de-49bd-aa02-de2a59dfdeef", "key": "part-i-chemical-gene-path-theme-distributions.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 24784283 }, "part-i-gene-disease-path-theme-distributions.txt.gz": { "checksum": "md5:66949134a3fde6752e8e36d21bc8c2b7", "ext": "gz", "id": "23cafffa-085c-4dff-a147-85479705b4ef", "key": "part-i-gene-disease-path-theme-distributions.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 63732849 }, "part-i-gene-gene-path-theme-distributions.txt.gz": { "checksum": "md5:9be6e7eac7ea044ebaca06a6fa83c34c", "ext": "gz", "id": "39402e34-5bdc-4be3-bacd-aa34c00efe96", "key": "part-i-gene-gene-path-theme-distributions.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 53028496 }, "part-ii-dependency-paths-chemical-disease-sorted-with-themes.txt.gz": { "checksum": "md5:02ebd618f619fca5cadcaa527ad1549c", "ext": "gz", "id": "e140e7d0-5898-42f5-bbb3-2c9ee1449472", "key": "part-ii-dependency-paths-chemical-disease-sorted-with-themes.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 395887502 }, "part-ii-dependency-paths-chemical-disease-sorted.txt.gz": { "checksum": "md5:b947d498a7c54b051aab2e75b5cb173f", "ext": "gz", "id": "0cc7a114-da24-4ba2-9de3-3ddca2fdc781", "key": "part-ii-dependency-paths-chemical-disease-sorted.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 1427076854 }, "part-ii-dependency-paths-chemical-gene-sorted-with-themes.txt.gz": { "checksum": "md5:b4a43f02f9ecb6ea1737b025e9e445c8", "ext": "gz", "id": "e9d0aa9e-4659-48fe-9ac7-e404f298daff", "key": "part-ii-dependency-paths-chemical-gene-sorted-with-themes.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 149936059 }, "part-ii-dependency-paths-chemical-gene-sorted.txt.gz": { "checksum": "md5:51437d31c4f139e4778c15b452c08bbd", "ext": "gz", "id": "5253bb26-4a11-46ac-8447-688ab6d294ec", "key": "part-ii-dependency-paths-chemical-gene-sorted.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 847352230 }, "part-ii-dependency-paths-gene-disease-sorted-with-themes.txt.gz": { "checksum": "md5:4c949f854eb4a60c641a6e147dc81b8d", "ext": "gz", "id": "819582d3-a09c-4396-93f1-57897fc3ecb1", "key": "part-ii-dependency-paths-gene-disease-sorted-with-themes.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 312221666 }, "part-ii-dependency-paths-gene-disease-sorted.txt.gz": { "checksum": "md5:d210bb11a0dc39ab8d7500a457a147a7", "ext": "gz", "id": "15925b6c-aa55-4f82-ad1e-f89d962a4148", "key": "part-ii-dependency-paths-gene-disease-sorted.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 1080242193 }, "part-ii-dependency-paths-gene-gene-sorted-with-themes.txt.gz": { "checksum": "md5:614f7becd38ad4c4543aec5b2e8781b6", "ext": "gz", "id": "1689b857-aae6-4be2-836f-2494039ccda5", "key": "part-ii-dependency-paths-gene-gene-sorted-with-themes.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 382071196 }, "part-ii-dependency-paths-gene-gene-sorted.txt.gz": { "checksum": "md5:bdfd25cc6a272483d0c490f138bebeae", "ext": "gz", "id": "6b310b50-a452-44bf-9326-687de24afb78", "key": "part-ii-dependency-paths-gene-gene-sorted.txt.gz", "metadata": null, "mimetype": "application/gzip", "size": 2489250707 } }, "order": [], "total_bytes": 7295600601 }, "id": "1243969", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/1243969/access", "access_links": "https://zenodo.org/api/records/1243969/access/links", "access_request": "https://zenodo.org/api/records/1243969/access/request", "access_users": "https://zenodo.org/api/records/1243969/access/users", "archive": "https://zenodo.org/api/records/1243969/files-archive", "archive_media": "https://zenodo.org/api/records/1243969/media-files-archive", "communities": "https://zenodo.org/api/records/1243969/communities", "communities-suggestions": "https://zenodo.org/api/records/1243969/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.1243969", "draft": "https://zenodo.org/api/records/1243969/draft", "files": "https://zenodo.org/api/records/1243969/files", "latest": "https://zenodo.org/api/records/1243969/versions/latest", "latest_html": "https://zenodo.org/records/1243969/latest", "media_files": "https://zenodo.org/api/records/1243969/media-files", "parent": "https://zenodo.org/api/records/1035252", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.1035252", "parent_html": "https://zenodo.org/records/1035252", "requests": "https://zenodo.org/api/records/1243969/requests", "reserve_doi": "https://zenodo.org/api/records/1243969/draft/pids/doi", "self": "https://zenodo.org/api/records/1243969", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.1243969", "self_html": "https://zenodo.org/records/1243969", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:1243969/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:1243969/sequence/default", "versions": "https://zenodo.org/api/records/1243969/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "affiliations": [ { "name": "Icahn School of Medicine at Mount Sinai" } ], "person_or_org": { "family_name": "Percha", "given_name": "Bethany", "name": "Percha, Bethany", "type": "personal" } }, { "affiliations": [ { "name": "Stanford University" } ], "person_or_org": { "family_name": "Altman", "given_name": "Russ B.", "name": "Altman, Russ B.", "type": "personal" } } ], "description": "
This repository contains labeled, weighted networks of chemical-gene, gene-gene, gene-disease, and chemical-disease relationships based on single sentences in PubMed abstracts. All raw dependency paths are provided in addition to the labeled relationships.
\n\nPART I: Connects dependency paths to labels, or "themes". Each record contains a dependency path followed by its score for each theme, and indicators of whether or not the path is part of the flagship path set for each theme (meaning that it was manually reviewed and determined to reflect that theme). The themes themselves are listed below and are in our paper (reference below).
\n\nPART II: Connects sentences to dependency paths. It consists of sentences and associated metadata, entity pairs found in the sentences, and dependency paths connecting those entity pairs. Each record contains the following information:
\n\nThe "with-themes.txt" files only contain dependency paths with corresponding theme assignments from Part I. The plain ".txt" files contain all dependency paths.
\n\nThis release contains the annotated network for the April 22, 2018 version of PubTator. The version discussed in our paper, below, is an older one - from April 30, 2016. If you're interested in that network, it can be found in Version 1 of this repository. We will be releasing updated networks periodically, as the PubTator community continues to release new versions of named entity annotations for Medline each month or so.
\n\n------------------------------------------------------------------------------------
\nREFERENCES
Percha B, Altman RBA (2017) A global network of biomedical relationships derived from text. (In press at Bioinformatics.)
\nPercha B, Altman RBA (2015) Learning the structure of biomedical relationships from unstructured text. PLoS Computational Biology, 11(7): e1004216.
This project depends on named entity annotations from the PubTator project:
\nhttps://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/
Reference:
\nWei CH et. al., PubTator: a Web-based text mining tool for assisting Biocuration, Nucleic acids research, 2013, 41 (W1): W518-W522. doi: 10.1093/nar/gkt44
Dependency parsing was provided by the Stanford CoreNLP toolkit:
\nhttps://stanfordnlp.github.io/CoreNLP/index.html
Reference:
\nManning, Christopher D., Mihai Surdeanu, John Bauer, Jenny Finkel, Steven J. Bethard, and David McClosky. 2014. The Stanford CoreNLP Natural Language Processing Toolkit In Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations, pp. 55-60.
------------------------------------------------------------------------------------
\nTHEMES
chemical-gene
\n(A+) agonism, activation
\n(A-) antagonism, blocking
\n(B) binding, ligand (esp. receptors)
\n(E+) increases expression/production
\n(E-) decreases expression/production
\n(E) affects expression/production (neutral)
\n(N) inhibits
gene-chemical
\n(O) transport, channels
\n(K) metabolism, pharmacokinetics
\n(Z) enzyme activity
chemical-disease
\n(T) treatment/therapy (including investigatory)
\n(C) inhibits cell growth (esp. cancers)
\n(Sa) side effect/adverse event
\n(Pr) prevents, suppresses
\n(Pa) alleviates, reduces
\n(J) role in disease pathogenesis
disease-chemical
\n(Mp) biomarkers (of disease progression)
gene-disease
\n(U) causal mutations
\n(Ud) mutations affecting disease course
\n(D) drug targets
\n(J) role in pathogenesis
\n(Te) possible therapeutic effect
\n(Y) polymorphisms alter risk
\n(G) promotes progression
disease-gene
\n(Md) biomarkers (diagnostic)
\n(X) overexpression in disease
\n(L) improper regulation linked to disease
gene-gene
\n(B) binding, ligand (esp. receptors)
\n(W) enhances response
\n(V+) activates, stimulates
\n(E+) increases expression/production
\n(E) affects expression/production (neutral)
\n(I) signaling pathway
\n(H) same protein or complex
\n(Rg) regulation
\n(Q) production by cell population