There is a newer version of this record available.

Dataset Open Access

BIP! Finder DB: A Dataset of Impact Measures for Scientific Publications

Thanasis Vergoulis; Ilias Kanellos; Claudio Atzori; Andrea Mannocci; Sandro La Bruzzo; Natalia Manola; Paolo Manghi


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.4386935">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.4386935</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.4386935"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-0555-4128">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-0555-4128</dct:identifier>
        <foaf:name>Thanasis Vergoulis</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>IMSI, ATHENA RC</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-2146-3795">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-2146-3795</dct:identifier>
        <foaf:name>Ilias Kanellos</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>IMSI, ATHENA RC</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Claudio Atzori</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>CNR</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Andrea Mannocci</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>CNR</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Sandro La Bruzzo</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>CNR</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Natalia Manola</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>OpenAIRE</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-7291-3210">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-7291-3210</dct:identifier>
        <foaf:name>Paolo Manghi</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>CNR</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>BIP! Finder DB: A Dataset of Impact Measures for Scientific Publications</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dcat:keyword>Scientometrics</dcat:keyword>
    <dcat:keyword>Research assessment</dcat:keyword>
    <dcat:keyword>Research impact</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-12-23</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4386935"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4386935</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.4386934"/>
    <owl:versionInfo>0.1</owl:versionInfo>
    <dct:description>&lt;p&gt;This dataset contains impact measures (metrics/indicators) for 104,769,307 scientific articles. In particular, for each article we have calculated the following measures:&lt;/p&gt; &lt;ul&gt; &lt;li&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;PageRank score:&lt;/strong&gt;&lt;/em&gt; This is a citation-based measure reflecting the influence (i.e., the total impact) of an article. It is based on the PageRank&lt;sup&gt;1&lt;/sup&gt; network analysis method. In the context of citation networks, PageRank estimates the importance of each article based on its centrality in the whole network.&amp;nbsp;&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;RAM score:&lt;/strong&gt;&lt;/em&gt; This is a citation-based measure reflecting the popularity (i.e., the current impact) of an article. It is based on the RAM&lt;sup&gt;2&lt;/sup&gt; citation network analysis method. Methods like PageRank are biased against recently published articles (new articles need time to receive their first citations). RAM alleviates this problem using an approach known as &amp;ldquo;time-awareness&amp;rdquo;. This is why it is more suitable to capture the current &amp;ldquo;hype&amp;rdquo; of an article.&amp;nbsp;&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;AttRank score:&lt;/strong&gt;&lt;/em&gt; This is a citation-based measure reflecting the popularity (i.e., the current impact) of an article. It is based on the AttRank&lt;sup&gt;3&lt;/sup&gt; citation network analysis method. Methods like PageRank are biased against recently published articles (new articles need time to receive their first citations). AttRank alleviates this problem incorporating an attention-based mechanism, akin to a time-restricted version of preferential attachment, to explicitly capture a researcher&amp;rsquo;s preference to read papers which received a lot of attention recently. This is why it is more suitable to capture the current &amp;ldquo;hype&amp;rdquo; of an article.&lt;/p&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;We provide three compressed CSV files (one for each measure/score provided) having lines of the form &amp;ldquo;DOI \t score&amp;rdquo;. The configuration of each measure have bes captured in the corresponding filename. Regarding the different measures/scores, you can find more intuition inside a previous extensive experimental study&lt;sup&gt;4&lt;/sup&gt;.&amp;nbsp;&lt;/p&gt; &lt;p&gt;The data of the citation network used to produce this dataset have been gathered from (a) the OpenCitations&amp;rsquo; COCI dataset (Sep-2020 version), (b) a &lt;a href="https://aka.ms/msracad"&gt;MAG&lt;/a&gt;&lt;sup&gt;5,6&lt;/sup&gt; snapshot from Aug-2020, and (c) a Crossref snapshot from Mar-2020. The union of all distinct DOI-to-DOI citations that could be found in these sources have been considered (entries without a DOI were omitted).&amp;nbsp;&lt;/p&gt; &lt;p&gt;The work is based on the following publications:&lt;/p&gt; &lt;ol&gt; &lt;li&gt; &lt;p&gt;R. Motwani L. Page, S. Brin and T. Winograd. 1999. The PageRank Citation Ranking: Bringing Order to the Web. Technical Report. Stanford InfoLab.&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Rumi Ghosh, Tsung-Ting Kuo, Chun-Nan Hsu, Shou-De Lin, and Kristina Lerman. 2011. Time-Aware Ranking in Dynamic Citation Networks. In Data Mining Workshops (ICDMW). 373&amp;ndash;380&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;I. Kanellos, T. Vergoulis, D. Sacharidis, T. Dalamagas, Y. Vassiliou: Ranking Papers by their Short-Term Scientific Impact. CoRR abs/2006.00951 (2020)&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;I. Kanellos, T. Vergoulis, D. Sacharidis, T. Dalamagas, Y. Vassiliou: Impact-Based Ranking of Scientific Publications: A Survey and Experimental Evaluation. TKDE 2019 (early access)&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;Arnab Sinha, Zhihong Shen, Yang Song, Hao Ma, Darrin Eide, Bo-June (Paul) Hsu, and Kuansan Wang. 2015. An Overview of Microsoft Academic Service (MA) and Applications. In Proceedings of the 24th International Conference on World Wide Web (WWW &amp;#39;15 Companion). ACM, New York, NY, USA, 243-246. DOI=http://dx.doi.org/10.1145/2740908.2742839&lt;/p&gt; &lt;/li&gt; &lt;li&gt; &lt;p&gt;K. Wang et al., &amp;ldquo;A Review of Microsoft Academic Services for Science of Science Studies&amp;rdquo;, Frontiers in Big Data, 2019, doi: 10.3389/fdata.2019.00045&lt;/p&gt; &lt;/li&gt; &lt;/ol&gt; &lt;p&gt;A Web user interface that uses these data to facilitate literature exploration, can be found&lt;a href="https://bip.imsi.athenarc.gr/"&gt; here&lt;/a&gt;. Moreover, the exact same scores can be gathered through BIP! Finder&amp;rsquo;s API.&amp;nbsp;&lt;/p&gt; &lt;p&gt;&lt;em&gt;Terms of use:&lt;/em&gt; These data are provided &amp;quot;as is&amp;quot;, without any warranties of any kind. The data are provided under the Creative Commons Attribution 4.0 International license.&lt;/p&gt;</dct:description>
    <dct:description>{"references": ["R. Motwani L. Page, S. Brin and T. Winograd. 1999. The PageRank Citation Ranking: Bringing Order to the Web. Technical Report. Stanford InfoLab.", "Rumi Ghosh, Tsung-Ting Kuo, Chun-Nan Hsu, Shou-De Lin, and Kristina Lerman. 2011. Time-Aware Ranking in Dynamic Citation Networks. In Data Mining Workshops (ICDMW). 373\u2013380", "I. Kanellos, T. Vergoulis, D. Sacharidis, T. Dalamagas, Y. Vassiliou: Ranking Papers by their Short-Term Scientific Impact. CoRR abs/2006.00951 (2020)", "I. Kanellos, T. Vergoulis, D. Sacharidis, T. Dalamagas, Y. Vassiliou: Impact-Based Ranking of Scientific Publications: A Survey and Experimental Evaluation. TKDE 2019 (early access)", "Arnab Sinha, Zhihong Shen, Yang Song, Hao Ma, Darrin Eide, Bo-June (Paul) Hsu, and Kuansan Wang. 2015. An Overview of Microsoft Academic Service (MA) and Applications. In Proceedings of the 24th International Conference on World Wide Web (WWW '15 Companion). ACM, New York, NY, USA, 243-246. DOI=http://dx.doi.org/10.1145/2740908.2742839", "K. Wang et al., \"A Review of Microsoft Academic Services for Science of Science Studies\", Frontiers in Big Data, 2019, doi: 10.3389/fdata.2019.00045"]}</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4386935"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4386935</dcat:accessURL>
        <dcat:byteSize>1158194090</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4386935/files/AttRank_graph_universe2_1.txt_a0.2_b0.5_g0.3_rho-0.16_year2017-2020_error1e-12.gz</dcat:downloadURL>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4386935</dcat:accessURL>
        <dcat:byteSize>1379670999</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4386935/files/PR_graph_universe2_1.txt_a0.5_error1e-12.gz</dcat:downloadURL>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4386935</dcat:accessURL>
        <dcat:byteSize>1036847381</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4386935/files/RAM_graph_universe2_1.txt_c0.6_year2020.gz</dcat:downloadURL>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
363
52
views
downloads
All versions This version
Views 363217
Downloads 5217
Data volume 57.5 GB20.0 GB
Unique views 334205
Unique downloads 207

Share

Cite as