Journal article Open Access

Continuous Noise Masking Based Vocoder for Statistical Parametric Speech Synthesis

Al-Radhi, Mohammed Salah; Csapó, Tamás Gábor; Németh, Géza


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://zenodo.org/record/5729421">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/5729421</dct:identifier>
    <foaf:page rdf:resource="https://zenodo.org/record/5729421"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-3094-6916">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-3094-6916</dct:identifier>
        <foaf:name>Al-Radhi, Mohammed Salah</foaf:name>
        <foaf:givenName>Mohammed Salah</foaf:givenName>
        <foaf:familyName>Al-Radhi</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Budapest University of Technology and Economics</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-4375-7524">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-4375-7524</dct:identifier>
        <foaf:name>Csapó, Tamás Gábor</foaf:name>
        <foaf:givenName>Tamás Gábor</foaf:givenName>
        <foaf:familyName>Csapó</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Budapest University of Technology and Economics</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-2311-4858">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-2311-4858</dct:identifier>
        <foaf:name>Németh, Géza</foaf:name>
        <foaf:givenName>Géza</foaf:givenName>
        <foaf:familyName>Németh</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Budapest University of Technology and Economics</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Continuous Noise Masking Based Vocoder for Statistical Parametric Speech Synthesis</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dcat:keyword>noise masking</dcat:keyword>
    <dcat:keyword>continuous vocoder</dcat:keyword>
    <dcat:keyword>speech synthesis</dcat:keyword>
    <dcat:keyword>phase distortion</dcat:keyword>
    <dcat:keyword>kernel density functions</dcat:keyword>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/EC/H2020/825619/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100010661</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-02-10</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/5729421"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/5729421</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <owl:sameAs rdf:resource="https://doi.org/10.1587/transinf.2019EDP7167"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/ai4eu"/>
    <owl:versionInfo>1</owl:versionInfo>
    <dct:description>&lt;p&gt;In this article, we propose a method called &amp;ldquo;continuous noise masking (cNM)&amp;rdquo; that allows eliminating residual buzziness in a continuous vocoder, i.e. of which all parameters are continuous and offers a simple and flexible speech analysis and synthesis system. Traditional parametric vocoders generally show a perceptible deterioration in the quality of the synthesized speech due to different processing algorithms. Furthermore, an inaccurate noise resynthesis (e.g. in breathiness or hoarseness) is also considered to be one of the main underlying causes of performance degradation, leading to noisy transients and temporal discontinuity in the synthesized speech. To overcome these issues, a new cNM is developed based on the phase distortion deviation in order to reduce the perceptual effect of the residual noise, allowing a proper reconstruction of noise characteristics, and model better the creaky voice segments that may happen in natural speech. To this end, the cNM is designed to keep only voice components under a condition of the cNM threshold while discarding others. We evaluate the proposed approach and compare with state-of-the-art vocoders using objective and subjective listening tests. Experimental results show that the proposed method can reduce the effect of residual noise and can reach the quality of other sophisticated approaches like STRAIGHT and log domain pulse model (PML).&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.1587/transinf.2019EDP7167"/>
        <dcat:byteSize>1161788</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/5729421/files/E103.D_2019EDP7167.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/EC/H2020/825619/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">825619</dct:identifier>
    <dct:title>A European AI On Demand Platform and Ecosystem</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100010661</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
</rdf:RDF>
32
17
views
downloads
Views 32
Downloads 17
Data volume 19.8 MB
Unique views 30
Unique downloads 16

Share

Cite as