{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2021-07-30T14:30:07.230122+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 1, "enabled": true, "entries": { "CT3-dataset-20210729.zip": { "checksum": "md5:f0bd6c8b0e13ccf1ef039f46640a6c5a", "ext": "zip", "id": "c4aba83a-a44b-4b8a-a942-37e55952db42", "key": "CT3-dataset-20210729.zip", "metadata": null, "mimetype": "application/zip", "size": 1050354220 } }, "order": [], "total_bytes": 1050354220 }, "id": "5148586", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/5148586/access", "access_links": "https://zenodo.org/api/records/5148586/access/links", "access_request": "https://zenodo.org/api/records/5148586/access/request", "access_users": "https://zenodo.org/api/records/5148586/access/users", "archive": "https://zenodo.org/api/records/5148586/files-archive", "archive_media": "https://zenodo.org/api/records/5148586/media-files-archive", "communities": "https://zenodo.org/api/records/5148586/communities", "communities-suggestions": "https://zenodo.org/api/records/5148586/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.5148586", "draft": "https://zenodo.org/api/records/5148586/draft", "files": "https://zenodo.org/api/records/5148586/files", "latest": "https://zenodo.org/api/records/5148586/versions/latest", "latest_html": "https://zenodo.org/records/5148586/latest", "media_files": "https://zenodo.org/api/records/5148586/media-files", "parent": "https://zenodo.org/api/records/5148585", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.5148585", "parent_html": "https://zenodo.org/records/5148585", "requests": "https://zenodo.org/api/records/5148586/requests", "reserve_doi": "https://zenodo.org/api/records/5148586/draft/pids/doi", "self": "https://zenodo.org/api/records/5148586", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.5148586", "self_html": "https://zenodo.org/records/5148586", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:5148586/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:5148586/sequence/default", "versions": "https://zenodo.org/api/records/5148586/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "affiliations": [ { "name": "Heidelberg University" } ], "person_or_org": { "family_name": "Le", "given_name": "Kim Tuyen", "name": "Le, Kim Tuyen", "type": "personal" } }, { "affiliations": [ { "name": "Heidelberg University" } ], "person_or_org": { "family_name": "Rashidi", "given_name": "Gabriel", "name": "Rashidi, Gabriel", "type": "personal" } }, { "affiliations": [ { "name": "Heidelberg University" } ], "person_or_org": { "family_name": "Andrzejak", "given_name": "Artur", "name": "Andrzejak, Artur", "type": "personal" } } ], "description": "
Code Token Type Taxonomy (CT3) is a methodology for refined evaluation of ML-based code completion approaches.
\n\nWe published the CT3-enhanced dataset with pre-computed token types for each token in the Python150k dataset.
\n\nThe dataset was obtained from an empirical study of the below paper:
\n\nKim Tuyen Le, Gabriel Rashidi, and Artur Andrzejak. A Methodology for Refined Evaluation of ML-based Code Completion Approaches. In KDD Workshop on Programming Language Processing (PLP), August 14-18, 2021 (Virtual).
\n\nPlease read the README.txt file for detailed information of structuring the enhanced dataset.
", "publication_date": "2021-07-30", "publisher": "Zenodo", "resource_type": { "id": "dataset", "title": { "de": "Datensatz", "en": "Dataset" } }, "rights": [ { "description": { "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited." }, "icon": "cc-by-icon", "id": "cc-by-4.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/licenses/by/4.0/legalcode" }, "title": { "en": "Creative Commons Attribution 4.0 International" } } ], "subjects": [ { "subject": "code completion" }, { "subject": "accuracy evaluation" }, { "subject": "code token types" } ], "title": "A Code Token Type Taxonomy-enhanced dataset with pre-computed token types for Python150k" }, "parent": { "access": { "owned_by": { "user": 222463 } }, "communities": {}, "id": "5148585", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.5148585", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.5148586", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:5148586", "provider": "oai" } }, "revision_id": 3, "stats": { "all_versions": { "data_volume": 4241202565.0, "downloads": 4, "unique_downloads": 4, "unique_views": 251, "views": 263 }, "this_version": { "data_volume": 1050354220.0, "downloads": 1, "unique_downloads": 1, "unique_views": 107, "views": 112 } }, "status": "published", "updated": "2021-11-28T21:24:46.506531+00:00", "versions": { "index": 1, "is_latest": false } }