Poster Open Access

Detecting Informal Data Use in Literature

Sara Lafia; Elizabeth Moss; Andrea Thomer; Libby Hemphill


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.5748382">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Text"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.5748382</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.5748382"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-5896-7295">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-5896-7295</dct:identifier>
        <foaf:name>Sara Lafia</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Michigan, USA</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-5464-8716">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-5464-8716</dct:identifier>
        <foaf:name>Elizabeth Moss</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Michigan, USA</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-6238-3498">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-6238-3498</dct:identifier>
        <foaf:name>Andrea Thomer</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Michigan, USA</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-3793-7281">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-3793-7281</dct:identifier>
        <foaf:name>Libby Hemphill</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Michigan, USA</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Detecting Informal Data Use in Literature</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <dcat:keyword>data archive</dcat:keyword>
    <dcat:keyword>data citation</dcat:keyword>
    <dcat:keyword>data discovery</dcat:keyword>
    <dcat:keyword>natural language processing</dcat:keyword>
    <dcat:keyword>scholarly communication</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-12-01</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/5748382"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/5748382</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.5748381"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/force2021"/>
    <owl:versionInfo>1</owl:versionInfo>
    <dct:description>&lt;p&gt;The Inter-university Consortium for Political and Social Research (ICPSR) is developing a computational approach to detect informal data use and construct reliable data impact metrics. Formal data citations that use unique identifiers are readily discoverable; however, informal references made to data are challenging to infer and detect as they are described in many ways and tend to occur in article footnotes, tables, figures, or elsewhere where they are not indexed for search. Identifying data citations is an essential step toward characterizing the impact of research data (i.e., who reuses research data and for what purposes). We use features of text including the presence of indicator terms, sections of articles, and frequency of acronyms, to predict the portions of articles that are likely to indicate data use. We then use a natural language processing (NLP) pipeline to extract candidate data references. In production, our model will support the review of publications to ingest into the ICPSR Bibliography of Data-related Literature as part of a broader effort to measure the impact of research data.&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5748382"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5748382"/>
        <dcat:byteSize>956706</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/5748382/files/FORCE-11-poster-2021.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
104
54
views
downloads
All versions This version
Views 104104
Downloads 5454
Data volume 51.7 MB51.7 MB
Unique views 9191
Unique downloads 5151

Share

Cite as