{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2018-11-21T09:33:16.382785+00:00", "custom_fields": { "imprint:imprint": { "isbn": "978-3-96110-123-8", "pages": "319-342", "place": "Berlin", "title": "Multiword expressions at length and in depth" } }, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 1, "enabled": true, "entries": { "12.pdf": { "checksum": "md5:d09bf62ef775f660252d4045cd854eb6", "ext": "pdf", "id": "2dca90ed-9770-4e6e-850e-422038509b89", "key": "12.pdf", "metadata": null, "mimetype": "application/pdf", "size": 255190 } }, "order": [], "total_bytes": 255190 }, "id": "1469571", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/1469571/access", "access_links": "https://zenodo.org/api/records/1469571/access/links", "access_request": "https://zenodo.org/api/records/1469571/access/request", "access_users": "https://zenodo.org/api/records/1469571/access/users", "archive": "https://zenodo.org/api/records/1469571/files-archive", "archive_media": "https://zenodo.org/api/records/1469571/media-files-archive", "communities": "https://zenodo.org/api/records/1469571/communities", "communities-suggestions": "https://zenodo.org/api/records/1469571/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.1469571", "draft": "https://zenodo.org/api/records/1469571/draft", "files": "https://zenodo.org/api/records/1469571/files", "latest": "https://zenodo.org/api/records/1469571/versions/latest", "latest_html": "https://zenodo.org/records/1469571/latest", "media_files": "https://zenodo.org/api/records/1469571/media-files", "parent": "https://zenodo.org/api/records/1469570", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.1469570", "parent_html": "https://zenodo.org/records/1469570", "requests": "https://zenodo.org/api/records/1469571/requests", "reserve_doi": "https://zenodo.org/api/records/1469571/draft/pids/doi", "self": "https://zenodo.org/api/records/1469571", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.1469571", "self_html": "https://zenodo.org/records/1469571", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:1469571/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:1469571/sequence/default", "versions": "https://zenodo.org/api/records/1469571/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "person_or_org": { "family_name": "Marcos Garcia", "name": "Marcos Garcia", "type": "personal" } } ], "description": "
This chapter introduces a strategy for the automatic extraction of multilingual collocation equivalents which takes advantage of parallel corpora to train bilingual word embeddings. First, monolingual collocation candidates are retrieved using syntactic dependencies and standard association measures. Then, the distributional models are applied to search for equivalents of the elements of each collocation in the target languages. The proposed method extracts not only collocation equivalents with direct translations between languages, but also other cases where the collocations in the two languages are not literal translations of each other. Several experiments -- evaluating collocations with five syntactic patterns -- in English, Spanish, and Portuguese show that this approach can effectively extract large sets of bilingual equivalents with an average precision of about $85\\%$. Moreover, preliminary results on comparable corpora suggest that the distributional models can be applied for identifying new bilingual collocations in different domains. This strategy is compared to both hand-crafted bilingual dictionaries and to probabilistic translation dictionaries learned from the same resources as the bilingual word embeddings, showing that it achieves much larger recall values while keeping high precision results.
", "languages": [ { "id": "eng", "title": { "en": "English" } } ], "publication_date": "2018-10-23", "publisher": "Language Science Press", "resource_type": { "id": "publication-section", "title": { "de": "Buchkapitel", "en": "Book chapter" } }, "rights": [ { "description": { "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited." }, "icon": "cc-by-icon", "id": "cc-by-4.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/licenses/by/4.0/legalcode" }, "title": { "en": "Creative Commons Attribution 4.0 International" } } ], "title": "Comparing bilingual word embeddings to translation dictionaries for extracting multilingual collocation equivalents" }, "parent": { "access": { "owned_by": { "user": 17163 } }, "communities": { "default": "981948e2-0cc3-4d74-9a79-3288b9cb2300", "entries": [ { "access": { "member_policy": "open", "members_visibility": "public", "record_policy": "open", "review_policy": "open", "visibility": "public" }, "children": { "allow": false }, "created": "2015-08-18T15:09:51+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "id": "981948e2-0cc3-4d74-9a79-3288b9cb2300", "links": {}, "metadata": { "curation_policy": "Uploads must be books, book parts or auxiliary material connected to a publication accepted by one of the LangSci series
\r\n", "description": "Language Science Press publishes high quality, peer-reviewed open-access books in linguistics.", "page": "", "title": "Language Science Press" }, "revision_id": 0, "slug": "langscipress", "updated": "2017-12-08T14:57:39.545445+00:00" } ], "ids": [ "981948e2-0cc3-4d74-9a79-3288b9cb2300" ] }, "id": "1469570", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.1469570", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.1469571", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:1469571", "provider": "oai" } }, "revision_id": 4, "stats": { "all_versions": { "data_volume": 9952410.0, "downloads": 39, "unique_downloads": 38, "unique_views": 125, "views": 125 }, "this_version": { "data_volume": 9952410.0, "downloads": 39, "unique_downloads": 38, "unique_views": 124, "views": 124 } }, "status": "published", "updated": "2020-01-20T17:33:52.194351+00:00", "versions": { "index": 1, "is_latest": true } }