Dataset Open Access

Pilot study: Ranking of textual snippets based on the writing style

Andi Rexha; Mark Kröll; Hermann Ziak; Roman Kern


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.437461">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.437461</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.437461"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Andi Rexha</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Research Assistant at TU Graz</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Mark Kröll</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Post Doc at Know-Center GmbH</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Hermann Ziak</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Research Assistant at Know-Center GmbH</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Roman Kern</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Head of Knowledge Discovery at Know-Center GmbH</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Pilot study: Ranking of textual snippets based on the writing style</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2017</dct:issued>
    <dcat:keyword>authorship attribution</dcat:keyword>
    <dcat:keyword>stylometry</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-03-22</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/437461"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/437461</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:description>&lt;p&gt;In this pilot study, we tried to capture humans' behavior when identifying authorship of text snippets. At first, we selected textual snippets from the introduction of scientific articles written by single authors. Later, we presented to the evaluators a source and four target snippets, and then, ask them to rank the target snippets from the most to the least similar from the writing style.&lt;/p&gt; &lt;p&gt;The dataset is composed by 66 experiments manually checked for not having any clear hint during the ranking for the evaluators. For each experiment, we have evaluations from three different evaluators.&lt;/p&gt; &lt;p&gt;We present each experiment in a single line (in the CSV file), where, at first we present the metadata of the Source-Article (Journal, Title, Authorship, Snippet), and the metadata for the 4 target snippets (Journal, Title, Authorship, Snippet, Written From the same Author, Published in the same Journal) and the ranking given by each evaluator. This task was performed in the open source platform, Crowd Flower. &lt;/p&gt; &lt;p&gt;The headers of the CSV are self-explained. In the TXT file, you can find a human-readable version of the experiment. &lt;/p&gt; &lt;p&gt;For more information about the extraction of the data, please consider reading our paper: "Extending Scientific Literature Search by Including the Author’s Writing Style" @BIR: http://www.gesis.org/en/services/events/events-archive/conferences/ecir-workshops/ecir-workshop-2017 &lt;/p&gt;</dct:description>
    <dct:description>Acknowledgements: The Know-Center is funded within the Austrian COMET Program under the auspices of the Austrian Ministry of Transport, Innovation and Technology, the Austrian Ministry of Economics and Labour and by the State of Styria. COMET is managed by the Austrian Research Promotion Agency FFG.</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.437461"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.437461</dcat:accessURL>
        <dcat:byteSize>225559</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/437461/files/result-pilot-study.csv">https://zenodo.org/record/437461/files/result-pilot-study.csv</dcat:downloadURL>
        <dcat:mediaType>text/csv</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.437461</dcat:accessURL>
        <dcat:byteSize>285281</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/437461/files/result-pilot-study.txt">https://zenodo.org/record/437461/files/result-pilot-study.txt</dcat:downloadURL>
        <dcat:mediaType>text/plain</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
755
227
views
downloads
All versions This version
Views 755756
Downloads 227227
Data volume 53.7 MB53.7 MB
Unique views 744745
Unique downloads 200200

Share

Cite as