Dataset Open Access

Multilingual bottle-neck feature learning from untranscribed speech for track 1 in zerospeech2017 (system 2 -- with VTLN)

Hongjie Chen Chen; Cheung-Chi Leung; Lei Xie; Bin Ma; Haizhou Li


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nmm##2200000uu#4500</leader>
  <controlfield tag="005">20170908082923.0</controlfield>
  <controlfield tag="001">822737</controlfield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Institute for Infocomm Research, A*STAR</subfield>
    <subfield code="a">Cheung-Chi Leung</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Northwestern Polytechnical University</subfield>
    <subfield code="a">Lei Xie</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Institute for Infocomm Research, A*STAR</subfield>
    <subfield code="a">Bin Ma</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">National University of Singapore</subfield>
    <subfield code="a">Haizhou Li</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">8036448633</subfield>
    <subfield code="z">md5:aec84d688de278e3ed8df99fc536db68</subfield>
    <subfield code="u">https://zenodo.org/record/822737/files/10_5281_zenodo_822737.tar.gz</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2017-07-04</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire_data</subfield>
    <subfield code="p">user-zerospeech2017</subfield>
    <subfield code="o">oai:zenodo.org:822737</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">Northwestern Polytechnical University</subfield>
    <subfield code="a">Hongjie Chen Chen</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Multilingual bottle-neck feature learning from untranscribed speech for track 1 in zerospeech2017 (system 2 -- with VTLN)</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-zerospeech2017</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">http://creativecommons.org/licenses/by/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;We investigate the extraction of bottle-neck features (BNFs) for multiple languages without access to manual transcription. Multilingual BNFs are derived from a multi-task learning deep neural network which is trained with unsupervised phoneme-like labels. The unsupervised phoneme-like labels are obtained from language-dependent Dirichlet process Gaussian mixture models separately trained on untranscribed speech of multiple languages.&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;In this version, the input MFCC for DPGMM is processed with VTLN.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt; &lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isVersionOf</subfield>
    <subfield code="a">10.5281/zenodo.822736</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.822737</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">dataset</subfield>
  </datafield>
</record>
75
29
views
downloads
All versions This version
Views 7575
Downloads 2929
Data volume 233.1 GB233.1 GB
Unique views 7474
Unique downloads 2424

Share

Cite as