Dataset Open Access

Webis EditorialSum Corpus 2020

Syed, Shahbaz; El Baff, Roxanne; Al-Khatib, Khalid; Kiesel, Johannes; Stein, Benno; Potthast, Martin


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.4105765">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.4105765</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.4105765"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Syed, Shahbaz</foaf:name>
        <foaf:givenName>Shahbaz</foaf:givenName>
        <foaf:familyName>Syed</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Leipzig University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>El Baff, Roxanne</foaf:name>
        <foaf:givenName>Roxanne</foaf:givenName>
        <foaf:familyName>El Baff</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>German Aerospace Centre (DLR)</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Al-Khatib, Khalid</foaf:name>
        <foaf:givenName>Khalid</foaf:givenName>
        <foaf:familyName>Al-Khatib</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus Universität, Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Kiesel, Johannes</foaf:name>
        <foaf:givenName>Johannes</foaf:givenName>
        <foaf:familyName>Kiesel</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus Universität, Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Stein, Benno</foaf:name>
        <foaf:givenName>Benno</foaf:givenName>
        <foaf:familyName>Stein</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus Universität, Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Potthast, Martin</foaf:name>
        <foaf:givenName>Martin</foaf:givenName>
        <foaf:familyName>Potthast</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Leipzig University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Webis EditorialSum Corpus 2020</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dcat:keyword>editorial summarization</dcat:keyword>
    <dcat:keyword>argumentation summarization</dcat:keyword>
    <dcat:keyword>extractive summarization</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-10-19</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4105765"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4105765</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.4105764"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/webis"/>
    <dct:description>&lt;p&gt;The Webis EditorialSum Corpus consists of 1330 manually curated extractive summaries for 266 news editorials spanning three diverse portals: Al-Jazeera, Guardian and Fox News. Each editorial has 5 summaries, each labeled for overall quality and fine grained properties such as thesis-relevance, persuasiveness, reasonableness, self-containedness.&lt;/p&gt; &lt;p&gt;The files are organized as follows:&lt;/p&gt; &lt;p&gt;&lt;br&gt; &lt;em&gt;corpus.csv&lt;/em&gt; - &lt;strong&gt;Contains all the editorials and their acquired summaries&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;&lt;br&gt; Note: (X = [1,5] for five summaries)&lt;/p&gt; &lt;p&gt;- article_id : Article ID in the corpus&lt;br&gt; - title : Title of the editorial&lt;br&gt; - article_text : Plain text of the editorial&lt;br&gt; - summary_{X}_text : Plain text of the corresponding summary&lt;br&gt; - thesis_{X}_text : Plain text of the thesis from the corresponding summary&lt;br&gt; - lead : top 15% of the editorial&amp;#39;s segments&lt;br&gt; - body : segments between lead and conclusion sections&lt;br&gt; - conclusion : bottom 15% of the editorial&amp;#39;s segments&lt;br&gt; - article_segments: Collection of paragraphs, each further divided into collection of segments containing:&lt;br&gt; &amp;nbsp;{ &amp;quot;number&amp;quot;: segment order in the editorial,&lt;br&gt; &amp;nbsp;&amp;nbsp; &amp;quot;text&amp;quot; : segment text,&lt;br&gt; &amp;nbsp;&amp;nbsp; &amp;quot;label&amp;quot;: ADU type&lt;br&gt; &amp;nbsp;}&lt;br&gt; - summary_{X}_segments: Collection of summary segments containing:&lt;br&gt; { &amp;quot;number&amp;quot;: segment order in the editorial,&lt;br&gt; &amp;nbsp; &amp;quot;text&amp;quot; : segment text,&lt;br&gt; &amp;nbsp; &amp;quot;adu_label&amp;quot;: ADU type from the editorial,&lt;br&gt; &amp;nbsp; &amp;quot;summary_label&amp;quot;: can be &amp;#39;thesis&amp;#39; or &amp;#39;justification&amp;#39;&lt;br&gt; }&lt;/p&gt; &lt;p&gt;&lt;br&gt; &lt;em&gt;quality-groups.csv&lt;/em&gt; - &lt;strong&gt;Contains the IDs for high(and low)-quality summaries for each quality dimension per editorial&lt;/strong&gt;&lt;br&gt; &lt;br&gt; For example: article_id 2 has four high_quality summaries (summary_1, summary_2, summary_3, summary_4) and one low_quality summary (summary_5) in terms of overall quality.&lt;br&gt; The summary texts can be obtained from corpus.csv respectively.&lt;/p&gt; &lt;p&gt;&amp;nbsp;&lt;/p&gt; &lt;p&gt;&amp;nbsp;&lt;/p&gt; &lt;p&gt;&amp;nbsp;&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4105765"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4105765"/>
        <dcat:byteSize>10733231</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4105765/files/corpus.csv"/>
        <dcat:mediaType>text/csv</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4105765"/>
        <dcat:byteSize>94974</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4105765/files/quality-groups.csv"/>
        <dcat:mediaType>text/csv</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
56
63
views
downloads
All versions This version
Views 5656
Downloads 6363
Data volume 548.5 MB548.5 MB
Unique views 4848
Unique downloads 3838

Share

Cite as