Conference paper Open Access

The need of standardised metadata to encode causal relationships: Towards safer data-driven machine learning biological solutions

Beatriz Garcia Santa Cruz; Carlos Vega; Frank Hertel


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.5729350">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.5729350</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.5729350"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-0939-4443">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-0939-4443</dct:identifier>
        <foaf:name>Beatriz Garcia Santa Cruz</foaf:name>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-7979-9921">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-7979-9921</dct:identifier>
        <foaf:name>Carlos Vega</foaf:name>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Frank Hertel</foaf:name>
      </rdf:Description>
    </dct:creator>
    <dct:title>The need of standardised metadata to encode causal relationships: Towards safer data-driven machine learning biological solutions</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <dcat:keyword>confounders</dcat:keyword>
    <dcat:keyword>causality</dcat:keyword>
    <dcat:keyword>metadata</dcat:keyword>
    <dcat:keyword>machine learning</dcat:keyword>
    <dcat:keyword>systems biology</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-11-16</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/5729350"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/5729350</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.5729349"/>
    <owl:versionInfo>Version presented at the Computational Intelligence Methods for Bioinformatics and Biostatistics 2021 (CIBB2021)</owl:versionInfo>
    <dct:description>&lt;p&gt;In this paper, we discuss the importance of considering causal relations in the development of machine learning solutions to prevent factors hampering the robustness and generalisation capacity of the models, such as induced biases. This issue often arises when the algorithm decision is affected by confounding factors. In this work, we argue that the integration of causal relationships can identify potential confounders. We call for standardised meta-information practices as a crucial step for proper machine learning solutions development, validation, and data sharing. Such practices include detailing the dataset generation process, aiming for automatic integration of causal relationships.&amp;nbsp;&lt;/p&gt; &lt;p&gt;&amp;nbsp;&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5729350"/>
        <dcat:byteSize>360296</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/5729350/files/Authors__guidelines_for_CIBB_short_papers (1).pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
94
52
views
downloads
All versions This version
Views 9494
Downloads 5252
Data volume 18.7 MB18.7 MB
Unique views 7676
Unique downloads 4141

Share

Cite as