{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2019-01-17T14:31:07.045874+00:00", "custom_fields": { "legacy:subjects": [ { "identifier": "http://id.loc.gov/authorities/subjects/sh88002425", "scheme": "url", "term": "Natural language processing" } ] }, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 1, "enabled": true, "entries": { "Embeddings_2019-01-01.zip": { "checksum": "md5:e7a3dce00bcc156e150d45ae85e02be9", "ext": "zip", "id": "693274d3-0e2e-44ec-a866-2f6bc5fb67c3", "key": "Embeddings_2019-01-01.zip", "metadata": null, "mimetype": "application/zip", "size": 8635749386 } }, "order": [], "total_bytes": 8635749386 }, "id": "2542722", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/2542722/access", "access_links": "https://zenodo.org/api/records/2542722/access/links", "access_request": "https://zenodo.org/api/records/2542722/access/request", "access_users": "https://zenodo.org/api/records/2542722/access/users", "archive": "https://zenodo.org/api/records/2542722/files-archive", "archive_media": "https://zenodo.org/api/records/2542722/media-files-archive", "communities": "https://zenodo.org/api/records/2542722/communities", "communities-suggestions": "https://zenodo.org/api/records/2542722/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.2542722", "draft": "https://zenodo.org/api/records/2542722/draft", "files": "https://zenodo.org/api/records/2542722/files", "latest": "https://zenodo.org/api/records/2542722/versions/latest", "latest_html": "https://zenodo.org/records/2542722/latest", "media_files": "https://zenodo.org/api/records/2542722/media-files", "parent": "https://zenodo.org/api/records/2542721", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.2542721", "parent_html": "https://zenodo.org/records/2542721", "requests": "https://zenodo.org/api/records/2542722/requests", "reserve_doi": "https://zenodo.org/api/records/2542722/draft/pids/doi", "self": "https://zenodo.org/api/records/2542722", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.2542722", "self_html": "https://zenodo.org/records/2542722", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:2542722/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:2542722/sequence/default", "versions": "https://zenodo.org/api/records/2542722/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "additional_descriptions": [ { "description": "Funded by the Plan de Impulso de las Tecnolog\u00edas del Lenguaje (Plan TL).", "type": { "id": "notes", "title": { "de": "Anmerkungen", "en": "Notes" } } } ], "creators": [ { "affiliations": [ { "name": "BSC" } ], "person_or_org": { "family_name": "Felipe Soares", "name": "Felipe Soares", "type": "personal" } }, { "person_or_org": { "family_name": "Marta Villegas", "name": "Marta Villegas", "type": "personal" } }, { "person_or_org": { "family_name": "Aitor Gonzalez-Agirre", "name": "Aitor Gonzalez-Agirre", "type": "personal" } }, { "person_or_org": { "family_name": "Jordi Armengol-Estap\u00e9", "name": "Jordi Armengol-Estap\u00e9", "type": "personal" } }, { "person_or_org": { "family_name": "Martin Krallinger", "identifiers": [ { "identifier": "0000-0002-2646-8782", "scheme": "orcid" } ], "name": "Martin Krallinger", "type": "personal" } } ], "description": "
This version throws an error while loading the file, There is a newer version of this record available.
\n\n[Plan TL/medicine/word embeddings] Word embeddings generated from Spanish corpora that include: (a) the full-text in Spanish available in Scielo.org (until December/2018), (b) all articles from the following Wikipedia categories: Pharmacology, Pharmacy, Medicine and Biology (during December/2018) and (c) the concatenation of the previous two corpora.
\n\nTo generate the word embedding two different approaches were used: Word2Vec and fastText.
\n\nFor more information, we refer to the corresponding article: https://www.aclweb.org/anthology/W19-1916/
", "languages": [ { "id": "spa", "title": { "en": "Spanish" } } ], "publication_date": "2019-01-17", "publisher": "Zenodo", "references": [ { "reference": "Soares F, Villegas M, Gonzalez-Agirre A, Krallinger M, Armengol-Estap\u00e9 J. Medical Word Embeddings for Spanish: Development and Evaluation. InProceedings of the 2nd Clinical Natural Language Processing Workshop 2019 Jun (pp. 124-133)." } ], "resource_type": { "id": "dataset", "title": { "de": "Datensatz", "en": "Dataset" } }, "rights": [ { "description": { "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited." }, "icon": "cc-by-icon", "id": "cc-by-4.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/licenses/by/4.0/legalcode" }, "title": { "en": "Creative Commons Attribution 4.0 International" } } ], "subjects": [ { "subject": "PlanTL, Natural Language Processing, Word Embeddings" } ], "title": "FastText and Word2Vec Spanish Medical Embeddings", "version": "2019-01-01" }, "parent": { "access": { "owned_by": { "user": 55928 } }, "communities": { "default": "629a6594-ebed-4f9d-aa2a-16766d76d068", "entries": [ { "access": { "member_policy": "open", "members_visibility": "public", "record_policy": "open", "review_policy": "open", "visibility": "public" }, "children": { "allow": false }, "created": "2018-11-20T09:59:47.825777+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "id": "629a6594-ebed-4f9d-aa2a-16766d76d068", "links": {}, "metadata": { "description": "Freely accessible collection of diverse resources including corpora, guidelines, publications, gazetteers and lexical resources related to health and biomedical/clinical natural language processing, text mining and language models.", "organizations": [ { "id": "05sd8tv96" } ], "page": "A considerable amount of medically relevant information is hidden in large unstructured heterogeneous data collections, such as the medical literature, medicinal patents, electronic health records or specialized web-content (health blogs, patient forums or information generated by scientific and medical societies). To process more efficiently medical big data there is a growing interest in exploiting natural language processing and text mining approaches, in particularly deep learning and artificial intelligence-based strategies.
\n\nA considerable amount of medically relevant information is hidden in large unstructured heterogeneous data collections, such as the medical literature, medicinal patents, electronic health records or specialized web-content (health blogs, patient forums or information generated by scientific and medical societies). To process more efficiently medical big data there is a growing interest in exploiting natural language processing and text mining approaches, in particularly deep learning and artificial intelligence-based strategies.
\n\nThe aim of the Plan de Impulso de las Tecnologías del Lenguaje (Plan TL), the Spanish national Plan for the Advancement of Language Technology, is to promote the development of resources of critical importance for processing textual data in Spanish as well as Catalan, Basque and Galician. The Health and biomedical domain constitute one of the flagship topics of the Spanish Plan TL.
\n\nTo promote the development of health-related language technology applications, the Plan TL is both developing and identifying resources of key relevance including individual components/libraries, terminological resources, annotated corpora and annotation guidelines, as well as document collections and language models.
", "title": "Medical NLP (maintained by NLP4BIA unit at BSC)\u2013 language technology resources for clinical and biomedical documents in multiple languages", "type": { "id": "topic" }, "website": "https://www.bsc.es/discover-bsc/organisation/research-departments/nlp-biomedical-information-analysis" }, "revision_id": 1, "slug": "medicalnlp", "updated": "2023-11-07T07:23:33.103958+00:00" } ], "ids": [ "629a6594-ebed-4f9d-aa2a-16766d76d068" ] }, "id": "2542721", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.2542721", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.2542722", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:2542722", "provider": "oai" } }, "revision_id": 20, "stats": { "all_versions": { "data_volume": 696356031017534.0, "downloads": 17396, "unique_downloads": 5209, "unique_views": 4295, "views": 4748 }, "this_version": { "data_volume": 3238406019750.0, "downloads": 375, "unique_downloads": 294, "unique_views": 1522, "views": 1683 } }, "status": "published", "updated": "2022-11-05T07:17:38.581354+00:00", "versions": { "index": 1, "is_latest": false } }