There is a newer version of this record available.

Dataset Open Access

DOIBoost Dataset Dump

La Bruzzo, Sandro; Manghi, Paolo; Mannocci, Andrea


JSON-LD (schema.org) Export

{
  "inLanguage": {
    "alternateName": "eng", 
    "@type": "Language", 
    "name": "English"
  }, 
  "description": "<p>&nbsp;</p>\n\n<p>Research in information science and scholarly communication strongly relies on the availability of openly accessible datasets of metadata and, where possible, their relative payloads. To this end, CrossRef plays a pivotal role by providing free access to its entire metadata collection, and allowing other initiatives to link and enrich its information. Therefore, a number of key pieces of information result scattered across diverse datasets and resources freely available online. As a result of this fragmentation, researchers in this domain end up struggling with daily integration problems producing a plethora of ad-hoc datasets, therefore incurring in a waste of time, resources, and infringing open science best practices.&nbsp;DOIBoost is&nbsp;a metadata collection that enriches CrossRef with inputs from Microsoft Academic Graph, ORCID, and Unpaywall for the purpose of supporting high-quality and robust research experiments, saving times to researchers and enabling their comparison.</p>\n\n<p>This entry consists of two files: <strong>doiBoost.tar.gz</strong> (which contains a set of part.gz files, each one containing the JSON files realtive to the enriched CrossRef records) and <strong>termsOfUse.doc&nbsp;</strong>(which contains details on the terms of use of DOIBoost).</p>\n\n<p>Note that this records comes with two relationships to other results of this experiment:&nbsp;</p>\n\n<ol>\n\t<li>link to the data paper: for more information on how the dataset is (and can be) generated;</li>\n\t<li>link to the software: to repeat the experiment&nbsp; .</li>\n</ol>\n\n<p>&nbsp;</p>", 
  "license": "https://creativecommons.org/licenses/by/4.0/legalcode", 
  "creator": [
    {
      "affiliation": "Institute of Information Science and Technology - CNR", 
      "@id": "https://orcid.org/0000-0003-2855-1245", 
      "@type": "Person", 
      "name": "La Bruzzo, Sandro"
    }, 
    {
      "affiliation": "Institute of Information Science and Technology - CNR", 
      "@id": "https://orcid.org/0000-0001-7291-3210", 
      "@type": "Person", 
      "name": "Manghi, Paolo"
    }, 
    {
      "affiliation": "Knowledge Media Institute - Open University", 
      "@id": "https://orcid.org/0000-0002-5193-7851", 
      "@type": "Person", 
      "name": "Mannocci, Andrea"
    }
  ], 
  "url": "https://zenodo.org/record/1438356", 
  "datePublished": "2018-09-28", 
  "version": "2.0", 
  "keywords": [
    "dataset", 
    "CrossRef", 
    "Microsoft Academic Graph", 
    "Unpaywall", 
    "Spark", 
    "aggregation", 
    "metadata", 
    "enrichment", 
    "ORCID"
  ], 
  "@context": "https://schema.org/", 
  "distribution": [
    {
      "contentUrl": "https://zenodo.org/api/files/ac731fe5-16bc-48e6-a162-f67cf0dc8ff2/doiBoost.tar.gz", 
      "encodingFormat": "gz", 
      "@type": "DataDownload"
    }, 
    {
      "contentUrl": "https://zenodo.org/api/files/ac731fe5-16bc-48e6-a162-f67cf0dc8ff2/schemaAndSample.zip", 
      "encodingFormat": "zip", 
      "@type": "DataDownload"
    }, 
    {
      "contentUrl": "https://zenodo.org/api/files/ac731fe5-16bc-48e6-a162-f67cf0dc8ff2/termsOfUse_dataset.docx", 
      "encodingFormat": "docx", 
      "@type": "DataDownload"
    }
  ], 
  "identifier": "https://doi.org/10.5281/zenodo.1438356", 
  "@id": "https://doi.org/10.5281/zenodo.1438356", 
  "@type": "Dataset", 
  "name": "DOIBoost Dataset Dump"
}
2,444
3,563
views
downloads
All versions This version
Views 2,4441,000
Downloads 3,563617
Data volume 172.3 TB21.7 TB
Unique views 2,073835
Unique downloads 735220

Share

Cite as