Dataset Open Access

Webis-Simple-Sentences-17 Corpus

Kiesel, Johannes; Stein, Benno; Lucks, Stefan


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nmm##2200000uu#4500</leader>
  <datafield tag="999" ind1="C" ind2="5">
    <subfield code="x">Johannes Kiesel, Benno Stein, and Stefan Lucks (2017). A Large-scale Analysis of the Mnemonic Password Advice. In Proceedings of the 24th Annual Network and Distributed System Security Symposium (NDSS 17).</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Web Crawl</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Sentence</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Readability</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Password</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Password Mnemonic</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Mnemonic</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Web</subfield>
  </datafield>
  <controlfield tag="005">20200124192605.0</controlfield>
  <controlfield tag="001">205950</controlfield>
  <datafield tag="711" ind1=" " ind2=" ">
    <subfield code="d">February 26 - March 1, 2017</subfield>
    <subfield code="g">NDSS 2017</subfield>
    <subfield code="a">Network and Distributed System Security Symposium 2017</subfield>
    <subfield code="c">San Diego, California.</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="0">(orcid)0000-0001-9033-2217</subfield>
    <subfield code="a">Stein, Benno</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="a">Lucks, Stefan</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1062566945</subfield>
    <subfield code="z">md5:06099a2ec0e941080c37c8cf12bd7f75</subfield>
    <subfield code="u">https://zenodo.org/record/205950/files/webis-simple-sentences-17-corpus-test.txt.gz</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">11588106487</subfield>
    <subfield code="z">md5:7b3047871ad00bb2a83a5402f8237445</subfield>
    <subfield code="u">https://zenodo.org/record/205950/files/webis-simple-sentences-17-corpus-training.txt.gz</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="y">Conference website</subfield>
    <subfield code="u">http://www.internetsociety.org/events/ndss-symposium/ndss-symposium-2017</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2017-02-27</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire_data</subfield>
    <subfield code="p">user-webis</subfield>
    <subfield code="o">oai:zenodo.org:205950</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="0">(orcid)0000-0002-1617-6508</subfield>
    <subfield code="a">Kiesel, Johannes</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Webis-Simple-Sentences-17 Corpus</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-webis</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">https://creativecommons.org/licenses/by/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;A corpus of 471,085,690 English sentences extracted from the ClueWeb12 Web Crawl. The sentences were sampled from a larger corpus to achieve a level of sentence complexity similar to the one of sentences that humans make up as a memory aid for remembering passwords. Sentence complexity was determined by syllables per word.&lt;/p&gt;

&lt;p&gt;The corpus is split in training and test set as it is used in the associated publication.&amp;nbsp; The test set is extracted from part 00 of the ClueWeb12, while the training set is extracted from the other parts.&lt;/p&gt;

&lt;p&gt;More information on the corpus can be found on the corpus web page at our university (listed under documented by).&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isCompiledBy</subfield>
    <subfield code="a">10.14722/ndss.2017.23077</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">url</subfield>
    <subfield code="i">isDocumentedBy</subfield>
    <subfield code="a">http://www.uni-weimar.de/en/media/chairs/webis/corpora/corpus-webis-sentences-17/</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isSupplementedBy</subfield>
    <subfield code="a">10.5281/zenodo.398838</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isSupplementedBy</subfield>
    <subfield code="a">10.5281/zenodo.398837</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.205950</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">dataset</subfield>
  </datafield>
</record>
579
324
views
downloads
All versions This version
Views 579580
Downloads 324324
Data volume 2.0 TB2.0 TB
Unique views 529530
Unique downloads 208208

Share

Cite as