Dataset Open Access

A part-of-speech (POS) lexicon of Classical Tibetan for NLP

Hill, Nathan W.; Garrett, Edward


JSON Export

{
  "files": [
    {
      "links": {
        "self": "https://zenodo.org/api/files/41c0dc18-29ec-4345-b387-7434a2db6bdd/Lexicons.zip"
      }, 
      "checksum": "md5:021d0e1089f91ef7cc65d42dbf21518c", 
      "bucket": "41c0dc18-29ec-4345-b387-7434a2db6bdd", 
      "key": "Lexicons.zip", 
      "type": "zip", 
      "size": 88095
    }
  ], 
  "owners": [
    30511
  ], 
  "doi": "10.5281/zenodo.574876", 
  "stats": {
    "version_unique_downloads": 92.0, 
    "unique_views": 258.0, 
    "views": 278.0, 
    "version_views": 278.0, 
    "unique_downloads": 92.0, 
    "version_unique_views": 258.0, 
    "volume": 8192835.0, 
    "version_downloads": 93.0, 
    "downloads": 93.0, 
    "version_volume": 8192835.0
  }, 
  "links": {
    "doi": "https://doi.org/10.5281/zenodo.574876", 
    "latest_html": "https://zenodo.org/record/574876", 
    "bucket": "https://zenodo.org/api/files/41c0dc18-29ec-4345-b387-7434a2db6bdd", 
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.574876.svg", 
    "html": "https://zenodo.org/record/574876", 
    "latest": "https://zenodo.org/api/records/574876"
  }, 
  "created": "2017-05-11T18:27:43.401198+00:00", 
  "updated": "2021-04-29T14:28:04.549442+00:00", 
  "conceptrecid": "778165", 
  "revision": 6, 
  "id": 574876, 
  "metadata": {
    "access_right_category": "success", 
    "doi": "10.5281/zenodo.574876", 
    "description": "<p>This part-of-speech (POS) lexicon of Classical Tibetan was prepared in the course of the research project &#39;Tibetan in Digital Communication&#39; (2012-2015) hosted at SOAS, University of London and funded by the UK&#39;s Arts and Humanities Research Council (grant code: AH/J00152X/1). The data for verbs comes from a digitized version of <em>A Lexicon of Tibetan Verb Stems as Reported by the Grammatical Tradition</em> (Munich: Bayerische Akademie der Wissenschaften, 2010) by Nathan W. Hill. Otherwise data comes from the manually part-of-speech tagged training data produced by the corpus and a few lexical items specifically added by hand to improve rule based tagging.</p>", 
    "license": {
      "id": "CC-BY-4.0"
    }, 
    "title": "A part-of-speech (POS) lexicon of Classical Tibetan for NLP", 
    "notes": "funded by the UK's Arts and Humanities Research Council (grant code: AH/J00152X/1)", 
    "relations": {
      "version": [
        {
          "count": 1, 
          "index": 0, 
          "parent": {
            "pid_type": "recid", 
            "pid_value": "778165"
          }, 
          "is_last": true, 
          "last_child": {
            "pid_type": "recid", 
            "pid_value": "574876"
          }
        }
      ]
    }, 
    "communities": [
      {
        "id": "tibnlp"
      }
    ], 
    "keywords": [
      "Tibetan language", 
      "Natural language processing", 
      "part-of-speech tagging"
    ], 
    "publication_date": "2017-05-11", 
    "creators": [
      {
        "orcid": "0000-0001-6423-017X", 
        "affiliation": "SOAS, Univeristy of London", 
        "name": "Hill, Nathan W."
      }, 
      {
        "orcid": "0000-0001-8875-7654", 
        "affiliation": "SOAS, University of London", 
        "name": "Garrett, Edward"
      }
    ], 
    "access_right": "open", 
    "resource_type": {
      "type": "dataset", 
      "title": "Dataset"
    }
  }
}
278
93
views
downloads
All versions This version
Views 278278
Downloads 9393
Data volume 8.2 MB8.2 MB
Unique views 258258
Unique downloads 9292

Share

Cite as