Dataset Open Access

Palmetto position storing Lucene index of Dutch Wikipedia

van der Zwaan, Janneke M.; Marx, Maarten; Kamps, Jaap


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nmm##2200000uu#4500</leader>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">http://creativecommons.org/licenses/by-sa/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution Share Alike 4.0 International</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2016-02-22</subfield>
  </datafield>
  <controlfield tag="005">20190410034400.0</controlfield>
  <controlfield tag="001">46377</controlfield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire_data</subfield>
    <subfield code="o">oai:zenodo.org:46377</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;Dutch language resource for calculating topic coherence with Palmetto [1, 2]. The dataset is a position storing Lucene index of the Dutch Wikipedia [3]. It was created in the context of the Netherlands eScience Center Dilipad project [4]. The pdf file contains the results of a case study that shows best topic coherence measure for topics consisting of Dutch nouns is NPMI.&lt;/p&gt;

&lt;p&gt;More details can be found in the README.&lt;/p&gt;

&lt;p&gt;[1] M. Roeder, A. Both, and A. Hinneburg. Exploring the space of topic coherence measures. In &lt;em&gt;Proceedings of the Eighth ACM International Conference on Web Search and Data Mining&lt;/em&gt;, pages 399&amp;ndash;408, 2015.&lt;/p&gt;

&lt;p&gt;[2] http://aksw.org/Projects/Palmetto.html&lt;/p&gt;

&lt;p&gt;[3] https://dumps.wikimedia.org/nlwiki/20151102/&lt;/p&gt;

&lt;p&gt;[4] https://www.esciencecenter.nl/project/dilipad&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">University of Amsterdam</subfield>
    <subfield code="a">Marx, Maarten</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">University of Amsterdam</subfield>
    <subfield code="a">Kamps, Jaap</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">146622</subfield>
    <subfield code="z">md5:11b81cdd6ed9520fbc46ada4bf0012b5</subfield>
    <subfield code="u">https://zenodo.org/record/46377/files/case_study.pdf</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1777</subfield>
    <subfield code="z">md5:17f782f72275d98e71f4eb901ae26146</subfield>
    <subfield code="u">https://zenodo.org/record/46377/files/README.md</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">658182830</subfield>
    <subfield code="z">md5:c7762b00271203e5fde48816cf1f9f03</subfield>
    <subfield code="u">https://zenodo.org/record/46377/files/nlwiki-palmetto.tar.gz</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">dataset</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">Netherlands eScience Center</subfield>
    <subfield code="a">van der Zwaan,  Janneke M.</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">topic modeling</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">topic coherence</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Palmetto</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Dutch</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Wikipedia</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.46377</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Palmetto position storing Lucene index of Dutch Wikipedia</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
</record>
3,390
49
views
downloads
All versions This version
Views 3,3903,390
Downloads 4949
Data volume 6.6 GB6.6 GB
Unique views 3,3763,376
Unique downloads 3737

Share

Cite as