Dataset Open Access

A part-of-speech (POS) lexicon of Classical Tibetan for NLP

Hill, Nathan W.; Garrett, Edward


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nmm##2200000uu#4500</leader>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Tibetan language</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Natural language processing</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">part-of-speech tagging</subfield>
  </datafield>
  <controlfield tag="005">20210429142804.0</controlfield>
  <datafield tag="500" ind1=" " ind2=" ">
    <subfield code="a">funded by the UK's Arts and Humanities Research Council (grant code: AH/J00152X/1)</subfield>
  </datafield>
  <controlfield tag="001">574876</controlfield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">SOAS, University of London</subfield>
    <subfield code="0">(orcid)0000-0001-8875-7654</subfield>
    <subfield code="a">Garrett, Edward</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">88095</subfield>
    <subfield code="z">md5:021d0e1089f91ef7cc65d42dbf21518c</subfield>
    <subfield code="u">https://zenodo.org/record/574876/files/Lexicons.zip</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2017-05-11</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire_data</subfield>
    <subfield code="p">user-tibnlp</subfield>
    <subfield code="o">oai:zenodo.org:574876</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">SOAS, Univeristy of London</subfield>
    <subfield code="0">(orcid)0000-0001-6423-017X</subfield>
    <subfield code="a">Hill, Nathan W.</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">A part-of-speech (POS) lexicon of Classical Tibetan for NLP</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-tibnlp</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">https://creativecommons.org/licenses/by/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;This part-of-speech (POS) lexicon of Classical Tibetan was prepared in the course of the research project &amp;#39;Tibetan in Digital Communication&amp;#39; (2012-2015) hosted at SOAS, University of London and funded by the UK&amp;#39;s Arts and Humanities Research Council (grant code: AH/J00152X/1). The data for verbs comes from a digitized version of &lt;em&gt;A Lexicon of Tibetan Verb Stems as Reported by the Grammatical Tradition&lt;/em&gt; (Munich: Bayerische Akademie der Wissenschaften, 2010) by Nathan W. Hill. Otherwise data comes from the manually part-of-speech tagged training data produced by the corpus and a few lexical items specifically added by hand to improve rule based tagging.&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.574876</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">dataset</subfield>
  </datafield>
</record>
278
93
views
downloads
All versions This version
Views 278278
Downloads 9393
Data volume 8.2 MB8.2 MB
Unique views 258258
Unique downloads 9292

Share

Cite as