Dataset Restricted Access

Data for PAN at SemEval 2019 Task 4: Hyperpartisan News Detection

Johannes Kiesel; Maria Mestre; Rishabh Shukla; Emmanuel Vincent; David Corney; Payam Adineh; Benno Stein; Martin Potthast


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:cnt="http://www.w3.org/2011/content#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.1489920">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.1489920</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.1489920"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-1617-6508">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Johannes Kiesel</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Maria Mestre</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Factmata Ltd.</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Rishabh Shukla</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Factmata Ltd.</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Emmanuel Vincent</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Factmata Ltd.</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>David Corney</foaf:name>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Payam Adineh</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-9033-2217">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Benno Stein</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-2451-0665">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Martin Potthast</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Leipzig University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Data for PAN at SemEval 2019 Task 4: Hyperpartisan News Detection</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2018</dct:issued>
    <dcat:keyword>Hyperpartisan news</dcat:keyword>
    <dcat:keyword>SemEval</dcat:keyword>
    <dcat:keyword>SemEval 2019</dcat:keyword>
    <dcat:keyword>SemEval 2019 Task 4</dcat:keyword>
    <dcat:keyword>Biased news</dcat:keyword>
    <dcat:keyword>News bias</dcat:keyword>
    <dcat:keyword>Hyperpartisan</dcat:keyword>
    <dcat:keyword>Hyperpartisanship</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2018-11-22</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/1489920"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/1489920</skos:notation>
      </adms:Identifier>
    </adms:identifier>
    <dct:isReferencedBy rdf:resource="https://pan.webis.de/semeval19/semeval19-web/"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.1310145"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/pan"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/webis"/>
    <owl:versionInfo>Training and validation v1</owl:versionInfo>
    <dct:description>&lt;p&gt;Training and validation data for the &lt;a href="https://webis.de/events/semeval-19/"&gt;PAN @ SemEval 2019 Task 4: Hyperpartisan News Detection&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;The data is split into multiple files. The articles are contained in the files with names starting with &amp;quot;articles-&amp;quot; (which validate against the XML schema article.xsd). The ground-truth information is contained in the files with names starting with &amp;quot;ground-truth-&amp;quot; (which validate against the XML schema ground-truth.xsd).&lt;/p&gt; &lt;p&gt;The first part of the data (filename contains &amp;quot;bypublisher&amp;quot;) is labeled by the overall bias of the publisher as provided by BuzzFeed journalists or MediaBiasFactCheck.com. It contains a total of 750,000 articles, half of which (375,000) are hyperpartisan and half of which are not. Half of the articles that are hyperpartisan (187,500) are on the left side of the political spectrum, half are on the right side. This data is split into a training set (80%, 600,000 articles) and a validation set (20%, 150,000 articles), where &lt;strong&gt;no&lt;/strong&gt; publisher that occurs in the training set also occurs in the validation set. Similarly, none of the publishers in those sets will occur in the test set.&lt;/p&gt; &lt;p&gt;The second part of the data (filename contains &amp;quot;byarticle&amp;quot;) is labeled through crowdsourcing on an article basis. The data contains only articles for which a consensus among the crowdsourcing workers existed. It contains a total of 645 articles. Of these, 238 (37%) are hyperpartisan and 407 (63%) are not, We will use a similar (but balanced!) test set. Again, none of the publishers in this set will occur in the test set.&lt;/p&gt; &lt;p&gt;Note that article IDs are only unique within the parts.&lt;/p&gt; &lt;p&gt;Acknowledgements: Thanks to Jonathan Miller for his assistance in cleaning the data!&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/RESTRICTED"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/restrictedAccess">
        <rdfs:label>Restricted Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.1489920"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
13,923
5,944
views
downloads
All versions This version
Views 13,9235,192
Downloads 5,9443,751
Data volume 1.9 TB755.3 GB
Unique views 11,8954,039
Unique downloads 1,467867

Share

Cite as