There is a newer version of this record available.

Dataset Open Access

BIP4COVID19: Impact metrics and indicators for coronavirus related publications

Thanasis Vergoulis; Ilias Kanellos; Serafeim Chatzopoulos; Danae Pla Karidi; Theodore Dalamagas


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:cnt="http://www.w3.org/2011/content#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3922999">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3922999</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3922999"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-0555-4128">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Thanasis Vergoulis</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Athena Research Center</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-2146-3795">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Ilias Kanellos</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Athena Research Center</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-1714-5225">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Serafeim Chatzopoulos</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Athena Research Center</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-3154-6212">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Danae Pla Karidi</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Athena Research Center</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-5002-7901">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Theodore Dalamagas</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Athena Research Center</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>BIP4COVID19: Impact metrics and indicators for coronavirus related publications</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dcat:keyword>COVID-19</dcat:keyword>
    <dcat:keyword>coronavirus</dcat:keyword>
    <dcat:keyword>scientometrics</dcat:keyword>
    <dcat:keyword>bibliometrics</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-06-29</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3922999"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3922999</skos:notation>
      </adms:Identifier>
    </adms:identifier>
    <dct:relation rdf:resource="https://pages.semanticscholar.org/coronavirus-research"/>
    <dct:relation rdf:resource="https://hdl.handle.net/www.biorxiv.org/content/10.1101/2020.04.11.037093v2"/>
    <dct:relation rdf:resource="https://github.com/diwis/PaperRanking"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3723281"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/covid-19"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/zenodo"/>
    <owl:versionInfo>7</owl:versionInfo>
    <dct:description>&lt;p&gt;This dataset contains impact metrics and indicators for a set of publications that are related to the &lt;a href="https://en.wikipedia.org/wiki/Coronavirus_disease_2019"&gt;COVID-19 infectious disease&lt;/a&gt; and the coronavirus that causes it. It is based on:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;&amp;Tau;he &lt;a href="https://pages.semanticscholar.org/coronavirus-research"&gt;CORD-19 dataset&lt;/a&gt; released by the team of &lt;a href="https://www.semanticscholar.org/"&gt;Semantic Scholar&lt;/a&gt;&lt;sup&gt;1&lt;/sup&gt; and&lt;/li&gt; &lt;li&gt;&amp;Tau;he curated data provided by the &lt;a href="https://www.ncbi.nlm.nih.gov/research/coronavirus/"&gt;LitCovid hub&lt;/a&gt;&lt;sup&gt;2&lt;/sup&gt;.&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;These data have been cleaned and integrated with data from &lt;a href="https://github.com/echen102/COVID-19-TweetIDs"&gt;COVID-19-TweetIDs&lt;/a&gt; and from other sources (e.g., PMC). The result was dataset of 125,480 unique articles along with relevant metadata (e.g., the underlying citation network). We utilized this dataset to produce, for each article, the values of the following impact measures:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;em&gt;&lt;strong&gt;Influence:&lt;/strong&gt;&lt;/em&gt; Citation-based measure reflecting the total impact of an article. This is based on the PageRank&lt;sup&gt;3&lt;/sup&gt; network analysis method. In the context of citation networks, it estimates the importance of each article based on its centrality in the whole network. This measure was calculated using the PaperRanking (&lt;a href="https://github.com/diwis/PaperRanking"&gt;https://github.com/diwis/PaperRanking&lt;/a&gt;) library&lt;sup&gt;4&lt;/sup&gt;.&lt;/li&gt; &lt;li&gt;&lt;em&gt;&lt;strong&gt;Popularity:&lt;/strong&gt;&lt;/em&gt; Citation-based measure reflecting the current impact of an article. This is based on the RAM&lt;sup&gt;5&lt;/sup&gt; citation network analysis method. Methods like PageRank are biased against recently published articles (new articles need time to receive their first citations). RAM alleviates this problem using an approach known as &amp;quot;time-awareness&amp;quot;. This is why it is more suitable to capture the current &amp;quot;hype&amp;quot; of an article. This measure was calculated using the PaperRanking (&lt;a href="https://github.com/diwis/PaperRanking"&gt;https://github.com/diwis/PaperRanking&lt;/a&gt;) library&lt;sup&gt;4&lt;/sup&gt;.&lt;/li&gt; &lt;li&gt;&lt;em&gt;&lt;strong&gt;Social Media Attention: &lt;/strong&gt;&lt;/em&gt;The number of tweets related to this article. Relevant data were collected from the &lt;a href="https://github.com/echen102/COVID-19-TweetIDs"&gt;COVID-19-TweetIDs&lt;/a&gt; dataset. In this version, only tweets between Jan. 21&lt;sup&gt;st&lt;/sup&gt; and Mar. 31&lt;sup&gt;st&lt;/sup&gt; have been considered from the previous dataset. The rest will be included during next updates.&amp;nbsp;&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;We provide three CSV files, all containing the same information, however each having its entries ordered by a different impact measure. All CSV files are tab separated and have the same columns (PubMed_id, PMC_id, DOI, popularity_score, influence_score, tweets count).&lt;/p&gt; &lt;p&gt;The work is based on the following publications:&lt;/p&gt; &lt;blockquote&gt; &lt;ol&gt; &lt;li&gt;COVID-19 Open Research Dataset (CORD-19). 2020. Version 2020-06-21 Retrieved from https://pages.semanticscholar.org/coronavirus-research. Accessed 2020-06-21. doi:10.5281/zenodo.3715506&lt;/li&gt; &lt;li&gt;Chen Q, Allot A, &amp;amp; Lu Z. (2020) Keep up with the latest coronavirus research, Nature 579:193 (version 2020-06-22)&lt;/li&gt; &lt;li&gt;R. Motwani L. Page, S. Brin and T. Winograd. 1999. The PageRank Citation Ranking: Bringing Order to the Web. Technical Report. Stanford InfoLab.&lt;/li&gt; &lt;li&gt;I. Kanellos, T. Vergoulis, D. Sacharidis, T. Dalamagas, Y. Vassiliou: Impact-Based Ranking of Scientific Publications: A Survey and Experimental Evaluation. TKDE 2019&lt;/li&gt; &lt;li&gt;Rumi Ghosh, Tsung-Ting Kuo, Chun-Nan Hsu, Shou-De Lin, and Kristina Lerman. 2011. Time-Aware Ranking in Dynamic Citation Networks. In Data Mining Workshops (ICDMW). 373&amp;ndash;380&lt;/li&gt; &lt;/ol&gt; &lt;/blockquote&gt; &lt;p&gt;A Web user interface that uses these data to facilitate the COVID-19 literature exploration, can be found &lt;a href="https://bip.covid19.athenarc.gr"&gt;here&lt;/a&gt;. More details in our preprint &lt;a href="https://www.biorxiv.org/content/10.1101/2020.04.11.037093v2"&gt;here&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;Terms of use:&lt;/strong&gt;&lt;/em&gt; These data are provided &amp;quot;as is&amp;quot;, without any warranties of any kind. The data are provided under the Creative Commons Attribution 4.0 International license.&lt;/p&gt;</dct:description>
    <dct:description xml:lang="">We acknowledge support of this work by the project "Moving from Big Data Management to Data Science" (MIS 5002437/3) which is implemented under the Action "Reinforcement of the Research and Innovation Infrastructure", funded by the Operational Programme "Competitiveness, Entrepreneurship and Innovation" (NSRF 2014-2020) and co-financed by Greece and the European Union (European Regional Development Fund).</dct:description>
    <dct:description xml:lang="">{"references": ["COVID-19 Open Research Dataset (CORD-19). 2020. Version 2020-06-21. Retrieved from https://pages.semanticscholar.org/coronavirus-research. Accessed 2020-06-21.", "I. Kanellos, T. Vergoulis, D. Sacharidis, T. Dalamagas, Y. Vassiliou: Impact-Based Ranking of Scientific Publications: A Survey and Experimental Evaluation. TKDE 2019", "Rumi Ghosh, Tsung-Ting Kuo, Chun-Nan Hsu, Shou-De Lin, and Kristina Lerman. 2011. Time-Aware Ranking in Dynamic Citation Networks. In Data Mining Workshops (ICDMW). 373\u2013380", "R. Motwani L. Page, S. Brin and T. Winograd. 1999. The PageRank Citation Ranking: Bringing Order to the Web. Technical Report. Stanford InfoLab.", "Chen Q, Allot A, &amp; Lu Z. (2020) Keep up with the latest coronavirus research, Nature 579:193 (version 2020-06-22)"]}</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:rights>
          <dct:RightsStatement rdf:about="https://creativecommons.org/licenses/by/4.0/legalcode">
            <rdfs:label>Creative Commons Attribution 4.0 International</rdfs:label>
          </dct:RightsStatement>
        </dct:rights>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3922999"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
6,368
3,959
views
downloads
All versions This version
Views 6,368179
Downloads 3,95925
Data volume 12.2 GB217.9 MB
Unique views 5,074172
Unique downloads 3,06919

Share

Cite as