Conference paper Open Access

Linking Named Entities across Languages using Multilingual Word Embeddings

Linhares Pontes, Elvys; Doucet, Antoine; Moreno, Jose G.


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://zenodo.org/record/4059778">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4059778</dct:identifier>
    <foaf:page rdf:resource="https://zenodo.org/record/4059778"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Linhares Pontes, Elvys</foaf:name>
        <foaf:givenName>Elvys</foaf:givenName>
        <foaf:familyName>Linhares Pontes</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of La Rochelle, La Rochelle, France</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Doucet, Antoine</foaf:name>
        <foaf:givenName>Antoine</foaf:givenName>
        <foaf:familyName>Doucet</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of La Rochelle, La Rochelle, France</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Moreno, Jose G.</foaf:name>
        <foaf:givenName>Jose G.</foaf:givenName>
        <foaf:familyName>Moreno</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Toulouse, Toulouse, France</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Linking Named Entities across Languages using Multilingual Word Embeddings</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dcat:keyword>Cross-Lingual Named Entity Linking</dcat:keyword>
    <dcat:keyword>Multilingual Word Embeddings</dcat:keyword>
    <dcat:keyword>Digital Library</dcat:keyword>
    <dcat:keyword>Indexing</dcat:keyword>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/EC/H2020/825153/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-09-30</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4059778"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4059778</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <owl:sameAs rdf:resource="https://doi.org/10.1145/3383583.3398597"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/embeddia"/>
    <dct:description>&lt;p&gt;Digital libraries are online collections of digital objects that can include text, images, audio, or videos in several languages. It has long been observed that named entities (NEs) are key to the access to digital library portals as they are contained in most user queries. However, NEs can have different spellings for each language which reduces the performance of user queries to retrieve documents across languages. Cross-lingual named entity linking (XEL) connects NEs from documents in a source language to external knowledge bases in another (target) language. The XEL task is especially challenging due to the diversity of NEs across languages and contexts. This paper describes a XEL system applied and evaluated with several languages pairs including English and various low-resourced languages of different linguistic families such as Croatian, Finnish, Estonian and Slovenian. We tested this approach to analyze documents and NEs in low-resourced languages and link them to the English version of Wikipedia. We present the resulting study of this analysis and the challenges involved in the case of degraded documents from digital libraries. Further works will make an extensive analysis of the impact of our approach on the XEL task with OCRed documents&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.1145/3383583.3398597">https://doi.org/10.1145/3383583.3398597</dcat:accessURL>
        <dcat:byteSize>502231</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4059778/files/LinharesPontes2020.pdf">https://zenodo.org/record/4059778/files/LinharesPontes2020.pdf</dcat:downloadURL>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/EC/H2020/825153/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">825153</dct:identifier>
    <dct:title>Cross-Lingual Embeddings for Less-Represented Languages in European News Media</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
</rdf:RDF>
13
55
views
downloads
Views 13
Downloads 55
Data volume 27.6 MB
Unique views 10
Unique downloads 53

Share

Cite as