Dataset Open Access

Webis-Simple-Sentences-17 Corpus

Kiesel, Johannes; Stein, Benno; Lucks, Stefan


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.205950">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.205950</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.205950"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-1617-6508">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-1617-6508</dct:identifier>
        <foaf:name>Kiesel, Johannes</foaf:name>
        <foaf:givenName>Johannes</foaf:givenName>
        <foaf:familyName>Kiesel</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-9033-2217">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-9033-2217</dct:identifier>
        <foaf:name>Stein, Benno</foaf:name>
        <foaf:givenName>Benno</foaf:givenName>
        <foaf:familyName>Stein</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Lucks, Stefan</foaf:name>
        <foaf:givenName>Stefan</foaf:givenName>
        <foaf:familyName>Lucks</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Bauhaus-Universität Weimar</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Webis-Simple-Sentences-17 Corpus</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2017</dct:issued>
    <dcat:keyword>Web Crawl</dcat:keyword>
    <dcat:keyword>Sentence</dcat:keyword>
    <dcat:keyword>Readability</dcat:keyword>
    <dcat:keyword>Password</dcat:keyword>
    <dcat:keyword>Password Mnemonic</dcat:keyword>
    <dcat:keyword>Mnemonic</dcat:keyword>
    <dcat:keyword>Web</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-02-27</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/205950"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/205950</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:relation rdf:resource="https://doi.org/10.14722/ndss.2017.23077"/>
    <foaf:page rdf:resource="http://www.uni-weimar.de/en/media/chairs/webis/corpora/corpus-webis-sentences-17/"/>
    <dct:relation rdf:resource="https://doi.org/10.5281/zenodo.398838"/>
    <dct:relation rdf:resource="https://doi.org/10.5281/zenodo.398837"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/webis"/>
    <dct:description>&lt;p&gt;A corpus of 471,085,690 English sentences extracted from the ClueWeb12 Web Crawl. The sentences were sampled from a larger corpus to achieve a level of sentence complexity similar to the one of sentences that humans make up as a memory aid for remembering passwords. Sentence complexity was determined by syllables per word.&lt;/p&gt; &lt;p&gt;The corpus is split in training and test set as it is used in the associated publication.&amp;nbsp; The test set is extracted from part 00 of the ClueWeb12, while the training set is extracted from the other parts.&lt;/p&gt; &lt;p&gt;More information on the corpus can be found on the corpus web page at our university (listed under documented by).&lt;/p&gt;</dct:description>
    <dct:description>{"references": ["Johannes Kiesel, Benno Stein, and Stefan Lucks (2017). A Large-scale Analysis of the Mnemonic Password Advice. In Proceedings of the 24th Annual Network and Distributed System Security Symposium (NDSS 17)."]}</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.205950"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.205950"/>
        <dcat:byteSize>1062566945</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/205950/files/webis-simple-sentences-17-corpus-test.txt.gz"/>
        <dcat:mediaType>text/plain</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.205950"/>
        <dcat:byteSize>11588106487</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/205950/files/webis-simple-sentences-17-corpus-training.txt.gz"/>
        <dcat:mediaType>text/plain</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
579
324
views
downloads
All versions This version
Views 579580
Downloads 324324
Data volume 2.0 TB2.0 TB
Unique views 529530
Unique downloads 208208

Share

Cite as