Conference paper Open Access

Multilingual Epidemiological Text Classification: A Comparative Study

Mutuvi, Stephen; Boros, Emanuela; Doucet, Antoine; Lejeune, Gael; Jatowt, Adam; Odeo, Moses


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.4476039">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.4476039</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.4476039"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Mutuvi, Stephen</foaf:name>
        <foaf:givenName>Stephen</foaf:givenName>
        <foaf:familyName>Mutuvi</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Multimedia University Kenya</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Boros, Emanuela</foaf:name>
        <foaf:givenName>Emanuela</foaf:givenName>
        <foaf:familyName>Boros</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of La Rochelle, L3i</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Doucet, Antoine</foaf:name>
        <foaf:givenName>Antoine</foaf:givenName>
        <foaf:familyName>Doucet</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of La Rochelle, L3i</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Lejeune, Gael</foaf:name>
        <foaf:givenName>Gael</foaf:givenName>
        <foaf:familyName>Lejeune</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Sorbonne University France</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Jatowt, Adam</foaf:name>
        <foaf:givenName>Adam</foaf:givenName>
        <foaf:familyName>Jatowt</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Kyoto University Japan</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Odeo, Moses</foaf:name>
        <foaf:givenName>Moses</foaf:givenName>
        <foaf:familyName>Odeo</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Multimedia University Kenya</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Multilingual Epidemiological Text Classification: A Comparative Study</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/EC/H2020/770299/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-01-28</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4476039"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4476039</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.4476038"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/newseye"/>
    <dct:description>&lt;p&gt;In this paper, we approach the multilingual text classification task in the context of the epidemiological field. Multilingual text classification models tend to perform differently across different languages (low- or high-resourced), more particularly when the dataset is highly imbalanced, which is the case for epidemiological datasets. We conduct a comparative study of different machine and deep learning text classification models using a dataset comprising news articles related to epidemic outbreaks from six languages, four low-resourced and two high-resourced, in order to analyze the influence of the nature of the language, the structure of the document, and the size of the data. Our findings indicate that the performance of the models based on fine-tuned language models exceeds by more than 50% the chosen baseline models that include a specialized epidemiological news surveillance system and several machine learning models. Also, low-resource languages are highly influenced not only by the typology of the languages on which the models have been pre-trained or/and fine-tuned but also by their size. Furthermore, we discover that the beginning and the end of documents provide the most salient features for this task and, as expected, the performance of the models was proportionate to the training data size.&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4476039"/>
        <dcat:byteSize>172095</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4476039/files/coling_2020_multilingual_epidemiological_text_classification__a_comparative_study.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/EC/H2020/770299/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">770299</dct:identifier>
    <dct:title>NewsEye: A Digital Investigator for Historical Newspapers</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
</rdf:RDF>
34
35
views
downloads
All versions This version
Views 3434
Downloads 3535
Data volume 6.0 MB6.0 MB
Unique views 3232
Unique downloads 3535

Share

Cite as