Software Open Access
Amrith Krishna; Bishal Santra; Sasi Prasanth Bandaru; Gaurav Sahu; Vishnu Dutt Sharma; Pavankumar Satuluri; Pawan Goyal
{ "files": [ { "links": { "self": "https://zenodo.org/api/files/4f81d490-746b-4892-9433-5ed6dd3c5be8/dir.zip" }, "checksum": "md5:016462cbd311404a6c9fb9af950d38a5", "bucket": "4f81d490-746b-4892-9433-5ed6dd3c5be8", "key": "dir.zip", "type": "zip", "size": 453229783 }, { "links": { "self": "https://zenodo.org/api/files/4f81d490-746b-4892-9433-5ed6dd3c5be8/README.md" }, "checksum": "md5:c0163b57ec0ab0013603d017556e2f2b", "bucket": "4f81d490-746b-4892-9433-5ed6dd3c5be8", "key": "README.md", "type": "md", "size": 2418 }, { "links": { "self": "https://zenodo.org/api/files/4f81d490-746b-4892-9433-5ed6dd3c5be8/wordsegmentation.rar" }, "checksum": "md5:6339b68e76df5aab37d2850fccf68c98", "bucket": "4f81d490-746b-4892-9433-5ed6dd3c5be8", "key": "wordsegmentation.rar", "type": "rar", "size": 41733267455 } ], "owners": [ 37333 ], "doi": "10.5281/zenodo.1035413", "stats": { "version_unique_downloads": 248.0, "unique_views": 610.0, "views": 654.0, "version_views": 653.0, "unique_downloads": 248.0, "version_unique_views": 609.0, "volume": 12983506289201.0, "version_downloads": 531.0, "downloads": 531.0, "version_volume": 12983506289201.0 }, "links": { "doi": "https://doi.org/10.5281/zenodo.1035413", "conceptdoi": "https://doi.org/10.5281/zenodo.1035412", "bucket": "https://zenodo.org/api/files/4f81d490-746b-4892-9433-5ed6dd3c5be8", "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.1035412.svg", "html": "https://zenodo.org/record/1035413", "latest_html": "https://zenodo.org/record/1035413", "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.1035413.svg", "latest": "https://zenodo.org/api/records/1035413" }, "conceptdoi": "10.5281/zenodo.1035412", "created": "2018-08-23T08:10:34.250406+00:00", "updated": "2020-01-25T07:25:05.287678+00:00", "conceptrecid": "1035412", "revision": 6, "id": 1035413, "metadata": { "access_right_category": "success", "doi": "10.5281/zenodo.1035413", "description": "<p>This is the repository for word segmentation in sanskrit using energy based models.</p>\n\n<p> </p>\n\n<p># Word Segmentation in Sanskrit Using Energy Based Models<br>\n<br>\n <br>\n## Getting Started<br>\n <br>\nPlease download the 2 compressed files 'dir.zip' and 'wordsegmentation.rar' to your working directory and extract them into folders named 'dir' and 'wordsegmentation' respectively.<br>\n <br>\nYour working directory should be as follows<br>\n* Working Directory<br>\n * wordsegmentation<br>\n * skt_dcs_DS.bz2_4K_bigram_mir_10K<br>\n * skt_dcs_DS.bz2_4K_bigram_mir_heldout<br>\n * dir<br>\n <br>\n## Prerequisites<br>\n* Python3<br>\n * scipy<br>\n * numpy<br>\n * csv<br>\n * pickle<br>\n * multiprocessing<br>\n * bz2<br>\n## Instructions for Training<br>\nChange your current directory to 'dir'<br>\n <br>\nRun the file Train_clique.py by using the following command<br>\n <br>\n* python Train_clique.py<br>\n <br>\nTo train on different input features like BM2,BM3,BR2,BR3,PM2,PM3,PR,PR3 please modify the bz2_input_folder value in the main function before beginning the training.<br>\n <br>\nFeature | bz2_input_folder<br>\n------------- | -------------<br>\nBM2 | wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/<br>\nBM3 | wordsegmentation/skt_dcs_DS.bz2_1L_bigram_mir_10K<br>\nBR2 | wordsegmentation/skt_dcs_DS.bz2_4K_bigram_rfe_10K/<br>\nBR3 | wordsegmentation/skt_dcs_DS.bz2_1L_bigram_rfe_10K/<br>\nPM2 | wordsegmentation/skt_dcs_DS.bz2_4K_pmi_mir_10K/<br>\nPM3 | wordsegmentation/skt_dcs_DS.bz2_1L_pmi_mir_10K2/<br>\nPR2 | wordsegmentation/skt_dcs_DS.bz2_4K_pmi_rfe_10K/<br>\nPR3 | wordsegmentation/skt_dcs_DS.bz2_1L_pmi_rfe_10K/<br>\n <br>\n## Instructions for Testing<br>\n <br>\nAfter training, please modify the 'modelList' dictionary in 'test_clique.py' with the name of the neural network that has been saved during training. While testing for a feature, please provide the name of the neural net which was trained for the same feature.<br>\n <br>\nWe only provide the trained model for the feature BM2 which was our best performing feature. If the name of the neural net is not changed, then the testing will be performed on the pre-trained model for BM2 provided in outputs/train_t7978754709018<br>\n <br>\nTo test with a particular feature vector use the tag of the feature while execution<br>\n <br>\n* python test_clique.py -t <tag><br>\n <br>\nFor example: <br>\n * python test_clique.py -t BM2<br>\n <br>\nAfter finishing the testing please run the following command to see the precision and recall values for both the word and word++ prediction tasks<br>\n <br>\n* python evaluate.py <tag><br>\n <br>\nFor example: <br>\n * python evaluate.py BM2</p>", "license": { "id": "CC-BY-4.0" }, "title": "Word Segmentation in Sanskrit Using Energy Based Models", "relations": { "version": [ { "count": 1, "index": 0, "parent": { "pid_type": "recid", "pid_value": "1035412" }, "is_last": true, "last_child": { "pid_type": "recid", "pid_value": "1035413" } } ] }, "subjects": [], "communities": [ { "id": "cnerg" } ], "publication_date": "2018-08-23", "creators": [ { "affiliation": "IIT Kharagpur", "name": "Amrith Krishna" }, { "affiliation": "IIT Kharagpur", "name": "Bishal Santra" }, { "affiliation": "IIT Kharagpur", "name": "Sasi Prasanth Bandaru" }, { "affiliation": "IIT Kharagpur", "name": "Gaurav Sahu" }, { "affiliation": "American Express", "name": "Vishnu Dutt Sharma" }, { "affiliation": "Chinmya Visvavidyapeeth", "name": "Pavankumar Satuluri" }, { "affiliation": "IIT Kharagpur", "name": "Pawan Goyal" } ], "meeting": { "acronym": "EMNLP`", "url": "http://emnlp2018.org/", "dates": "October 31\u2013November 4 @018", "place": "Brussels, Belgium", "title": "Conference on Empirical Methods in Natural Language Processing" }, "access_right": "open", "resource_type": { "type": "software", "title": "Software" }, "related_identifiers": [ { "scheme": "doi", "identifier": "10.5281/zenodo.1035412", "relation": "isVersionOf" } ] } }
All versions | This version | |
---|---|---|
Views | 653 | 654 |
Downloads | 531 | 531 |
Data volume | 13.0 TB | 13.0 TB |
Unique views | 609 | 610 |
Unique downloads | 248 | 248 |