Dataset Open Access

Webis Clickbait Corpus 2017 (Webis-Clickbait-17)

Potthast, Martin; Gollub, Tim; Wiegmann, Matti; Stein, Benno; Hagen, Matthias; Komlossy, Kristof; Schuster, Sebstian; Fernandez, Erika P. Garces


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3346491">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3346491</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3346491"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-2451-0665">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-2451-0665</dct:identifier>
        <foaf:name>Potthast, Martin</foaf:name>
        <foaf:givenName>Martin</foaf:givenName>
        <foaf:familyName>Potthast</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-1737-6517">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-1737-6517</dct:identifier>
        <foaf:name>Gollub, Tim</foaf:name>
        <foaf:givenName>Tim</foaf:givenName>
        <foaf:familyName>Gollub</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-3911-0456">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-3911-0456</dct:identifier>
        <foaf:name>Wiegmann, Matti</foaf:name>
        <foaf:givenName>Matti</foaf:givenName>
        <foaf:familyName>Wiegmann</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-9033-2217">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-9033-2217</dct:identifier>
        <foaf:name>Stein, Benno</foaf:name>
        <foaf:givenName>Benno</foaf:givenName>
        <foaf:familyName>Stein</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-9733-2890">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-9733-2890</dct:identifier>
        <foaf:name>Hagen, Matthias</foaf:name>
        <foaf:givenName>Matthias</foaf:givenName>
        <foaf:familyName>Hagen</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Komlossy, Kristof</foaf:name>
        <foaf:givenName>Kristof</foaf:givenName>
        <foaf:familyName>Komlossy</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Schuster, Sebstian</foaf:name>
        <foaf:givenName>Sebstian</foaf:givenName>
        <foaf:familyName>Schuster</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Fernandez, Erika P. Garces</foaf:name>
        <foaf:givenName>Erika P. Garces</foaf:givenName>
        <foaf:familyName>Fernandez</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Webis Clickbait Corpus 2017 (Webis-Clickbait-17)</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2018</dct:issued>
    <dcat:keyword>clickbait</dcat:keyword>
    <dcat:keyword>click</dcat:keyword>
    <dcat:keyword>bait</dcat:keyword>
    <dcat:keyword>detection</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2018-06-11</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3346491"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3346491</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3346490"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/webis"/>
    <dct:description>&lt;p&gt;The Webis Clickbait Corpus 2017 (Webis-Clickbait-17) comprises a total of 38,517 Twitter posts from 27 major US news publishers. In addition to the posts, information about the articles linked in the posts are included. The posts had been published between November 2016 and June 2017. To avoid publisher and topical biases, a maximum of ten posts per day and publisher were sampled. All posts were annotated on a 4-point scale [not click baiting (0.0), slightly click baiting (0.33), considerably click baiting (0.66), heavily click baiting (1.0)] by five annotators from Amazon Mechanical Turk. A total of 9,276 posts are considered clickbait by the majority of annotators. In terms of its size, this corpus outranges the &lt;a href="https://doi.org/10.5281/zenodo.3251557"&gt;Webis Clickbait Corpus 2016&lt;/a&gt; by one order of magnitude. The corpus is divided into two logical parts, a training and a test dataset. The training dataset has been released in the course of the &lt;a href="http://www.clickbait-challenge.org"&gt;Clickbait Challenge&lt;/a&gt; and a download link is provided below. To allow for an objective evaulatuion of clickbait detection systems, the test dataset is available only through the Evaluation-as-a-Service platform &lt;a href="http://www.tira.io/"&gt;TIRA&lt;/a&gt; at the moment. On TIRA, developers can deploy clickbait detection systems and execute them against the test dataset. The performance of the submitted systems can be viewed on the &lt;a href="http://www.tira.io/task/clickbait-detection/dataset/clickbait17-test-170720/"&gt;TIRA page of the Clickbait Challenge&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;To make working with the Webis Clickbait Corpus 2017 convenient, and to allow for its validation and replication, we are developing and sharing a number of software tags:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;a href="https://github.com/webis-de/corpus-viewer"&gt;Corpus Viewer&lt;/a&gt;. Our Django web service for exploring corpora. For importing the Webis Clickbait Corpus 2017 into the corpus viewer, we provide an appropriate &lt;a href="https://webis.de/data/clickbait17-corpus-viewer-config.py"&gt;configuration file&lt;/a&gt;.&lt;/li&gt; &lt;li&gt;&lt;a href="https://github.com/webis-de/mturk-manager"&gt;MTurk Manager&lt;/a&gt;. Our Django web service for conducting sophisticated crowd sourcing tasks on Amazon Mechanical Turk. The service allows to manage projects, upload batches of HITS, apply custom reviewing interfaces, and more. To make the clickbait crowd-sourcing task replicable, we share the &lt;a href="https://webis.de/data/clickbait17-worker-template.html"&gt; worker template&lt;/a&gt; that we used to instruct the workers and to display the tweets. Also shared is a &lt;a href="https://webis.de/data/clickbait17-review-template.html"&gt;reviewing template&lt;/a&gt; that can be used to accept/reject assignments and to assess the quality of the received annotations quickly.&lt;/li&gt; &lt;li&gt;&lt;a href="https://github.com/webis-de/webis-web-archiver"&gt;Web Archiver&lt;/a&gt;. Software for archiving web pages as WARC files and reproducing them later on. This software can be used to open the WARC archives provided above.&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;In addition to the corpus &amp;quot;clickbait17-train-170630.zip&amp;quot;, we provide the original WARC archives of the articles that are linked in the posts. They are split in 5 archives that can be extracted separately.&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3346491"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3346491</dcat:accessURL>
        <dcat:byteSize>20589882119</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3346491/files/archives-clickbait17-train-170630-part[00-19].zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3346491</dcat:accessURL>
        <dcat:byteSize>20625578485</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3346491/files/archives-clickbait17-train-170630-part[20-39].zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3346491</dcat:accessURL>
        <dcat:byteSize>20853102573</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3346491/files/archives-clickbait17-train-170630-part[40-59].zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3346491</dcat:accessURL>
        <dcat:byteSize>20427747249</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3346491/files/archives-clickbait17-train-170630-part[60-79].zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3346491</dcat:accessURL>
        <dcat:byteSize>19623971113</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3346491/files/archives-clickbait17-train-170630-part[80-99].zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3346491</dcat:accessURL>
        <dcat:byteSize>937094590</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3346491/files/clickbait17-train-170630.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
1,082
9,736
views
downloads
All versions This version
Views 1,0821,090
Downloads 9,7369,745
Data volume 196.2 TB196.3 TB
Unique views 913921
Unique downloads 1,2271,229

Share

Cite as