Software Open Access
{ "files": [ { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_BuddhFoundCorpusNgramsRedux.csv" }, "checksum": "md5:ebeceb54230207b55968c113486c979f", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_BuddhFoundCorpusNgramsRedux.csv", "type": "csv", "size": 192850 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_BuddhSktSegmenterLemmatiser2019.R" }, "checksum": "md5:432313287a5a2d084ac64b70b14b9a2a", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_BuddhSktSegmenterLemmatiser2019.R", "type": "r", "size": 244573 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_CL2019_BuddhistSanskritSegmenterPresentation.pptx" }, "checksum": "md5:394107767ce92f6be6b64e9c8cec9923", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_CL2019_BuddhistSanskritSegmenterPresentation.pptx", "type": "pptx", "size": 9160483 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_FiveTextsSegmentedTokensDFWithCleanFreq.csv" }, "checksum": "md5:37ce1893b1f8a9f0aa55b3f6a850e3f0", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_FiveTextsSegmentedTokensDFWithCleanFreq.csv", "type": "csv", "size": 212507 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_GretilBuddhRelLit_NgramsRedux.csv" }, "checksum": "md5:5e169775fc20db5ea684bb35015fe11a", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_GretilBuddhRelLit_NgramsRedux.csv", "type": "csv", "size": 473717 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_GretilBuddhSastraSastra_NgramsRedux.csv" }, "checksum": "md5:63a22b8b1d18c08c6b12506d90c3fc16", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_GretilBuddhSastraSastra_NgramsRedux.csv", "type": "csv", "size": 311888 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_NonStemmedWordlist.csv" }, "checksum": "md5:baee76cc1ec672d92cdb8deb6ba52a51", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_NonStemmedWordlist.csv", "type": "csv", "size": 3037610 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_Segmenter_Eva_AllGoldSent.csv" }, "checksum": "md5:56a7ab6ba81ceac3954c38c5ad6a7525", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_Segmenter_Eva_AllGoldSent.csv", "type": "csv", "size": 75297 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_SegmenterEva_RawOneSentencePerLine.zip" }, "checksum": "md5:4aa0a4e3672b4ce21e927a7420b07e5f", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_SegmenterEva_RawOneSentencePerLine.zip", "type": "zip", "size": 34650 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_WordlistNoA_June2019.csv" }, "checksum": "md5:d64b2a4b9e10dd9e45e95d4f2f701648", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_WordlistNoA_June2019.csv", "type": "csv", "size": 1105346 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Lugli_Wordlist_ReadMe.html" }, "checksum": "md5:a48598508f02a794ee6fd021c937962c", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Lugli_Wordlist_ReadMe.html", "type": "html", "size": 62929 }, { "links": { "self": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5/Seq2Seq_segmentertest-full-vocabulary_GeoffroyNoel.txt" }, "checksum": "md5:5d507c0ac8219998e5150944db8461e5", "bucket": "108636c3-5e93-4821-b43f-62158b06f7a5", "key": "Seq2Seq_segmentertest-full-vocabulary_GeoffroyNoel.txt", "type": "txt", "size": 25325 } ], "owners": [ 76604 ], "doi": "10.5281/zenodo.3459219", "stats": { "version_unique_downloads": 448.0, "unique_views": 30.0, "views": 33.0, "version_views": 111.0, "unique_downloads": 406.0, "version_unique_views": 101.0, "volume": 383306571.0, "version_downloads": 557.0, "downloads": 463.0, "version_volume": 577167458.0 }, "links": { "doi": "https://doi.org/10.5281/zenodo.3459219", "conceptdoi": "https://doi.org/10.5281/zenodo.3459218", "bucket": "https://zenodo.org/api/files/108636c3-5e93-4821-b43f-62158b06f7a5", "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.3459218.svg", "html": "https://zenodo.org/record/3459219", "latest_html": "https://zenodo.org/record/3526469", "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.3459219.svg", "latest": "https://zenodo.org/api/records/3526469" }, "conceptdoi": "10.5281/zenodo.3459218", "created": "2019-10-06T21:23:31.879418+00:00", "updated": "2020-01-25T07:26:51.611242+00:00", "conceptrecid": "3459218", "revision": 5, "id": 3459219, "metadata": { "access_right_category": "success", "doi": "10.5281/zenodo.3459219", "description": "<p>This folder contains R code for a rule-based Buddhist Sanskrit Segmenter and Lemmatiser, as well as data necessary to use and evaluate the Segmenter and explanatory materials.</p>\n\n<p>The segmenter has been tested on 639 sentences from 13 Buddhist text (9 s\u016btras, 4 \u015b\u0101stra) and has been evaluated as achieving 97% accuracy.</p>\n\n<p>The code and materials contained in this folder have been developed as part of a Newton International Fellowship at King's College London, funded by the British Academy (NF161436)</p>\n\n<p> </p>\n\n<p><strong>Contents</strong></p>\n\n<p>R code for segmentation, lemmatisation and evaluation (includes instructions to run code)</p>\n\n<p>powerpoint presentation with background and explanation of project</p>\n\n<p>Wordlists and Wordlists documentation</p>\n\n<p>ngrams and stems frequency tables necessary for segmentation</p>\n\n<p>gold standard set of manually segmented and stemmed sentences for evaluation</p>\n\n<p>set of raw sentences for evaluation</p>\n\n<p>evaluation of Krisha et al. seq2seq segmenter on Buddhist sentences for reference purposes</p>\n\n<p> </p>\n\n<p>This segmenter has been used to prepare the Sanskrit Corpus at DOI 10.5281/zenodo.3457822</p>", "language": "eng", "title": "Buddhist Sanskrit Segmenter", "license": { "id": "CC-BY-4.0" }, "relations": { "version": [ { "count": 2, "index": 0, "parent": { "pid_type": "recid", "pid_value": "3459218" }, "is_last": false, "last_child": { "pid_type": "recid", "pid_value": "3526469" } } ] }, "version": "1", "keywords": [ "Buddhist Sanskrit", "Natural Language Processing" ], "publication_date": "2019-09-24", "creators": [ { "orcid": "0000-0003-0473-4290", "affiliation": "King's College London", "name": "Ligeia Lugli" } ], "access_right": "open", "resource_type": { "type": "software", "title": "Software" }, "related_identifiers": [ { "scheme": "doi", "identifier": "10.5281/zenodo.3459218", "relation": "isVersionOf" } ] } }
All versions | This version | |
---|---|---|
Views | 111 | 33 |
Downloads | 557 | 463 |
Data volume | 577.2 MB | 383.3 MB |
Unique views | 101 | 30 |
Unique downloads | 448 | 406 |