Dataset Open Access
{ "files": [ { "links": { "self": "https://zenodo.org/api/files/41dda5d0-badc-49d2-8d0a-5c8e14a4a93d/arxiv_v2.1.tar.gz" }, "checksum": "md5:ebdbfdcb65636a0c1d0ffae827dde8b0", "bucket": "41dda5d0-badc-49d2-8d0a-5c8e14a4a93d", "key": "arxiv_v2.1.tar.gz", "type": "gz", "size": 22891008306 } ], "owners": [ 57099 ], "doi": "10.5281/zenodo.2609187", "stats": { "version_unique_downloads": 3065.0, "unique_views": 493.0, "views": 556.0, "version_views": 3428.0, "unique_downloads": 902.0, "version_unique_views": 2746.0, "volume": 266497118698452.0, "version_downloads": 24194.0, "downloads": 11642.0, "version_volume": 494290492185778.0 }, "links": { "doi": "https://doi.org/10.5281/zenodo.2609187", "conceptdoi": "https://doi.org/10.5281/zenodo.2553522", "bucket": "https://zenodo.org/api/files/41dda5d0-badc-49d2-8d0a-5c8e14a4a93d", "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.2553522.svg", "html": "https://zenodo.org/record/2609187", "latest_html": "https://zenodo.org/record/4313164", "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.2609187.svg", "latest": "https://zenodo.org/api/records/4313164" }, "conceptdoi": "10.5281/zenodo.2553522", "created": "2019-03-30T09:28:29.813865+00:00", "updated": "2021-11-10T15:57:22.323231+00:00", "conceptrecid": "2553522", "revision": 20, "id": 2609187, "metadata": { "access_right_category": "success", "doi": "10.5281/zenodo.2609187", "description": "<p>We propose a new data set based on <strong>all publications from all scientific fields available on arXiv.org</strong>. Apart from providing the <strong>papers' plain text</strong>, <strong>in-text citations</strong> were annotated via global identifiers. As far as possible, cited publications were linked to the <strong>Microsoft Academic Graph</strong>. Our data set consists of <strong>over one million documents</strong> and <strong>29.2 million citation contexts</strong>. The data set, which is made freely available for research purposes, not only can enhance the future evaluation of researchpaper-based and citation context-based approaches but also serve as a basis for novel ideas to analyze papers.</p>\n\n<p>More information can be found in our paper <a href=\"http://ceur-ws.org/Vol-2345/paper2.pdf\">Bibliometric-Enhanced arXiv: A Data Set for Paper-Based and Citation-Based Tasks</a>.</p>\n\n<p>See <a href=\"https://github.com/IllDepence/unarXive\">https://github.com/IllDepence/unarXive</a> for the source code which has been used for creating the data set.</p>", "license": { "id": "other-at" }, "title": "Bibliometric-Enhanced arXiv: A Data Set for Paper-Based and Citation-Based Tasks", "relations": { "version": [ { "count": 4, "index": 1, "parent": { "pid_type": "recid", "pid_value": "2553522" }, "is_last": false, "last_child": { "pid_type": "recid", "pid_value": "4313164" } } ] }, "communities": [ { "id": "bibliometrics" }, { "id": "natural-language-processing" }, { "id": "scholarly-data" } ], "references": [], "keywords": [ "scholarly data", "citations", "papers", "arXiv.org", "digital libraries", "dataset" ], "publication_date": "2019-02-01", "creators": [ { "affiliation": "University of Freiburg", "name": "Saier, Tarek" }, { "orcid": "0000-0001-5458-8645", "affiliation": "University of Freiburg", "name": "F\u00e4rber, Michael" } ], "access_right": "open", "resource_type": { "type": "dataset", "title": "Dataset" }, "related_identifiers": [ { "scheme": "url", "identifier": "http://ceur-ws.org/Vol-2345/paper2.pdf", "relation": "isDocumentedBy" }, { "scheme": "doi", "identifier": "10.5281/zenodo.2553522", "relation": "isVersionOf" } ] } }
All versions | This version | |
---|---|---|
Views | 3,428 | 556 |
Downloads | 24,194 | 11,642 |
Data volume | 494.3 TB | 266.5 TB |
Unique views | 2,746 | 493 |
Unique downloads | 3,065 | 902 |