Thesis Open Access

Improving Generalization of Deep Learning Music Classifiers

Morgan Buisson


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.5554754">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Text"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.5554754</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.5554754"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Morgan Buisson</foaf:name>
      </rdf:Description>
    </dct:creator>
    <dct:title>Improving Generalization of Deep Learning Music Classifiers</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <dcat:keyword>Generalization; Music Classification; Deep Learning</dcat:keyword>
    <dct:contributor>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Pablo Alonso</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Universitat Pompeu Fabra</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:contributor>
    <dct:contributor>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Dmitry Bogdanov</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Universitat Pompeu Fabra</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:contributor>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-02-25</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/5554754"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/5554754</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.5554753"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/mtgupf"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/smc-master"/>
    <dct:description>&lt;p&gt;Deep learning models have recently led to significant improvements in a wide variety of tasks. Known as being a very powerful tool capable of generalizing better than traditional machine learning approaches, deep learning models still heavily rely on large quantities of annotated data. As the field of music information retrieval is still subject to data sparsity, automatic music classification remains a challenging problem and numerous models fail at generalizing to out-of-distribution music col-lections. This project investigates possible directions to follow in order to improve the generalization capacity of deep learning music classifiers. More specifically, we suggest a set of guidelines to be followed in order to address the generalization problem of music classifiers trained on very small datasets. We first propose ways to maximize the amount of information extracted from small datasets through outliers detection and e&amp;yuml;cient audio data augmentation. We then show that considering the amount of perceptual ambiguity of each classification task through label smoothing can help obtaining more generalizable classification bounds. We also highlight the impact label noise can have in a small dataset setting and explore ways to improve the model&amp;rsquo;s robustness. Finally, we argue that leveraging common knowledge among related classification tasks can result in a more generalizable internal representation learned by the model. To illustrate this assumption, we employ a simple multi-task learning architecture to jointly learn pairs of tasks, and list other interesting axes to be further explored in that direction. All the suggested approaches are exper-imentally assessed on two state-of-the-art CNN architectures for automatic music classification. They all lead to consistent improvements over baseline models and unveil new relevant questions to rethink the task of automatic music classification.&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5554754"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5554754"/>
        <dcat:byteSize>3000768</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/5554754/files/2021-Morgan-Buisson.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
81
57
views
downloads
All versions This version
Views 8181
Downloads 5757
Data volume 171.0 MB171.0 MB
Unique views 7474
Unique downloads 5555

Share

Cite as