There is a newer version of this record available.

Dataset Open Access

Webis Query Spelling Corpus 2017 (Webis-QSpell-17)

Hagen, Matthias; Potthast, Martin; Stein, Benno; Gohsen, Marcel; Rathgeber, Anja


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nmm##2200000uu#4500</leader>
  <datafield tag="999" ind1="C" ind2="5">
    <subfield code="x">Matthias Hagen, Martin Potthast, Marcel Gohsen, Anja Rathgeber, and Benno Stein. A Large-Scale Query Spelling Correction Corpus. In Noriko Kando et al, editors, 40th International ACM Conference on Research and Development in Information Retrieval (SIGIR 2017), pages 1261-1264, August 2017. ACM. ISBN 978-1-4503-5022-8.</subfield>
  </datafield>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">queries</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">AOL</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">spelling</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">dictionaries</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">search engines</subfield>
  </datafield>
  <controlfield tag="005">20200124192225.0</controlfield>
  <controlfield tag="001">3256201</controlfield>
  <datafield tag="711" ind1=" " ind2=" ">
    <subfield code="g">SIGIR 2017</subfield>
    <subfield code="a">40th International ACM Conference on Research and Development in Information Retrieval</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="0">(orcid)0000-0003-2451-0665</subfield>
    <subfield code="a">Potthast, Martin</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="0">(orcid)0000-0001-9033-2217</subfield>
    <subfield code="a">Stein, Benno</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="a">Gohsen, Marcel</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="a">Rathgeber, Anja</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1073998</subfield>
    <subfield code="z">md5:cda2d9bf28ff94a2f17dc97a642b8b84</subfield>
    <subfield code="u">https://zenodo.org/record/3256201/files/corpus-webis-qspell-17.zip</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2017-08-11</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire_data</subfield>
    <subfield code="p">user-webis</subfield>
    <subfield code="o">oai:zenodo.org:3256201</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="0">(orcid)0000-0002-9733-2890</subfield>
    <subfield code="a">Hagen, Matthias</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Webis Query Spelling Corpus 2017 (Webis-QSpell-17)</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-webis</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">https://creativecommons.org/licenses/by/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;The Webis Query Spelling Corpus 2017 (Webis-QSpell-17) contains 54,772 web queries that were manually spell-checked; for 9,171 queries alternative spelling variants are contained.&lt;/p&gt;

&lt;p&gt;As for segmentations of many of the queries (i.e., tagged concepts and phrases), please refer to the companion corpus &lt;a href="https://webis.de/data/webis-qsec-10.html"&gt;Webis-QSeC-10&lt;/a&gt;.&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isVersionOf</subfield>
    <subfield code="a">10.5281/zenodo.3256200</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.3256201</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">dataset</subfield>
  </datafield>
</record>
270
76
views
downloads
All versions This version
Views 270214
Downloads 7649
Data volume 114.3 MB52.6 MB
Unique views 238202
Unique downloads 6141

Share

Cite as