{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2019-11-01T12:50:40.210079+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 1, "enabled": true, "entries": { "IWSLT2019_paper_18.pdf": { "checksum": "md5:92181f24bdccacf2a7f0f42c7b27fee9", "ext": "pdf", "id": "35cc48e2-03dd-4de9-b9d7-08879db81c29", "key": "IWSLT2019_paper_18.pdf", "metadata": null, "mimetype": "application/pdf", "size": 428595 } }, "order": [], "total_bytes": 428595 }, "id": "3525003", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/3525003/access", "access_links": "https://zenodo.org/api/records/3525003/access/links", "access_request": "https://zenodo.org/api/records/3525003/access/request", "access_users": "https://zenodo.org/api/records/3525003/access/users", "archive": "https://zenodo.org/api/records/3525003/files-archive", "archive_media": "https://zenodo.org/api/records/3525003/media-files-archive", "communities": "https://zenodo.org/api/records/3525003/communities", "communities-suggestions": "https://zenodo.org/api/records/3525003/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.3525003", "draft": "https://zenodo.org/api/records/3525003/draft", "files": "https://zenodo.org/api/records/3525003/files", "latest": "https://zenodo.org/api/records/3525003/versions/latest", "latest_html": "https://zenodo.org/records/3525003/latest", "media_files": "https://zenodo.org/api/records/3525003/media-files", "parent": "https://zenodo.org/api/records/3525002", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.3525002", "parent_html": "https://zenodo.org/records/3525002", "requests": "https://zenodo.org/api/records/3525003/requests", "reserve_doi": "https://zenodo.org/api/records/3525003/draft/pids/doi", "self": "https://zenodo.org/api/records/3525003", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.3525003", "self_html": "https://zenodo.org/records/3525003", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:3525003/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:3525003/sequence/default", "versions": "https://zenodo.org/api/records/3525003/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "affiliations": [ { "name": "Department of Computer Science, University of Sheffield, Sheffield S1 4DP, UK" } ], "person_or_org": { "family_name": "Scarton", "given_name": "Scarton", "name": "Scarton, Scarton", "type": "personal" } }, { "affiliations": [ { "name": "Dept. Llenguatges i Sist. Inform., Universitat d'Alacant, 03690 St. Vicent del Raspeig, Spain" } ], "person_or_org": { "family_name": "Forcada", "given_name": "Mikel L.", "name": "Forcada, Mikel L.", "type": "personal" } }, { "affiliations": [ { "name": "Dept. Llenguatges i Sist. Inform., Universitat d'Alacant, 03690 St. Vicent del Raspeig, Spain" } ], "person_or_org": { "family_name": "Espl\u00e0-Gomis", "given_name": "Miquel", "name": "Espl\u00e0-Gomis, Miquel", "type": "personal" } }, { "affiliations": [ { "name": "Department of Computer Science, University of Sheffield, Sheffield S1 4DP, UK & Department of Computing, Imperial College London, London SW7 2AZ, UK" } ], "person_or_org": { "family_name": "Specia", "given_name": "Lucia", "name": "Specia, Lucia", "type": "personal" } } ], "description": "
Devising metrics to assess translation quality has always been at the core of machine translation (MT) research. Traditional automatic reference-based metrics, such as BLEU, have shown correlations with human judgements of adequacy and fluency and have been paramount for the advancement of MT system development. Crowd-sourcing has popularised and enabled the scalability of metrics based on human judgments, such as subjective direct assessments (DA) of adequacy, that are believed to be more reliable than reference-based automatic metrics. Finally, task-based measurements, such as post-editing time, are expected to provide a more de- tailed evaluation of the usefulness of translations for a specific task. Therefore, while DA averages adequacy judgements to obtain an appraisal of (perceived) quality independently of the task, and reference-based automatic metrics try to objectively estimate quality also in a task-independent way, task-based metrics are measurements obtained either during or after performing a specific task. In this paper we argue that, although expensive, task-based measurements are the most reliable when estimating MT quality in a specific task; in our case, this task is post-editing. To that end, we report experiments on a dataset with newly-collected post-editing indicators and show their usefulness when estimating post-editing effort. Our results show that task-based metrics comparing machine-translated and post-edited versions are the best at tracking post-editing effort, as expected. These metrics are followed by DA, and then by metrics comparing the machine-translated version and independent references. We suggest that MT practitioners should be aware of these differences and acknowledge their implications when decid- ing how to evaluate MT for post-editing purposes.
", "languages": [ { "id": "eng", "title": { "en": "English" } } ], "publication_date": "2019-11-02", "publisher": "Zenodo", "resource_type": { "id": "publication-conferencepaper", "title": { "de": "Konferenzbeitrag", "en": "Conference paper" } }, "rights": [ { "description": { "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited." }, "icon": "cc-by-icon", "id": "cc-by-4.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/licenses/by/4.0/legalcode" }, "title": { "en": "Creative Commons Attribution 4.0 International" } } ], "title": "Estimating post-editing effort: a study on human judgements, task-based and reference-based metrics of MT quality" }, "parent": { "access": { "owned_by": { "user": 50447 } }, "communities": { "default": "db2a5261-ee99-4982-9879-e8274ea4f1af", "entries": [ { "access": { "member_policy": "open", "record_policy": "open", "review_policy": "open", "visibility": "public" }, "children": { "allow": false }, "created": "2019-11-01T07:43:01.061176+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "id": "db2a5261-ee99-4982-9879-e8274ea4f1af", "links": {}, "metadata": { "curation_policy": "Papers published at 16th International Workshop on Spoken Language Translation 2019 in Hong Kong, as maintained by the program and evaluation chairs.
\r\n", "description": "", "page": "", "title": "16th International Workshop on Spoken Language Translation 2019" }, "revision_id": 0, "slug": "iwslt2019", "updated": "2019-11-01T07:43:01.127561+00:00" } ], "ids": [ "db2a5261-ee99-4982-9879-e8274ea4f1af" ] }, "id": "3525002", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.3525002", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.3525003", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:3525003", "provider": "oai" } }, "revision_id": 3, "stats": { "all_versions": { "data_volume": 76718505.0, "downloads": 179, "unique_downloads": 156, "unique_views": 218, "views": 239 }, "this_version": { "data_volume": 76289910.0, "downloads": 178, "unique_downloads": 155, "unique_views": 213, "views": 234 } }, "status": "published", "updated": "2020-01-20T17:34:40.544496+00:00", "versions": { "index": 1, "is_latest": true } }