Other Open Access

ESPnet2 pretrained model, Chenda Li/wsj0_2mix_enh_train_enh_conv_tasnet_raw_valid.si_snr.ave, fs=8k, lang=en

Chenda Li


JSON Export

{
  "files": [
    {
      "links": {
        "self": "https://zenodo.org/api/files/a58d99e5-03bf-48d0-b00e-9d0c7aa2b1ca/enh_train_enh_conv_tasnet_raw_valid.si_snr.ave.zip"
      }, 
      "checksum": "md5:8f87ee35feb24a1ee2c01a216b69dc8e", 
      "bucket": "a58d99e5-03bf-48d0-b00e-9d0c7aa2b1ca", 
      "key": "enh_train_enh_conv_tasnet_raw_valid.si_snr.ave.zip", 
      "type": "zip", 
      "size": 35240744
    }
  ], 
  "owners": [
    128322
  ], 
  "doi": "10.5281/zenodo.4498562", 
  "stats": {
    "version_unique_downloads": 355.0, 
    "unique_views": 294.0, 
    "views": 316.0, 
    "version_views": 320.0, 
    "unique_downloads": 355.0, 
    "version_unique_views": 296.0, 
    "volume": 16281223728.0, 
    "version_downloads": 462.0, 
    "downloads": 462.0, 
    "version_volume": 16281223728.0
  }, 
  "links": {
    "doi": "https://doi.org/10.5281/zenodo.4498562", 
    "conceptdoi": "https://doi.org/10.5281/zenodo.4498561", 
    "bucket": "https://zenodo.org/api/files/a58d99e5-03bf-48d0-b00e-9d0c7aa2b1ca", 
    "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.4498561.svg", 
    "html": "https://zenodo.org/record/4498562", 
    "latest_html": "https://zenodo.org/record/4498562", 
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.4498562.svg", 
    "latest": "https://zenodo.org/api/records/4498562"
  }, 
  "conceptdoi": "10.5281/zenodo.4498561", 
  "created": "2021-02-03T17:19:23.254037+00:00", 
  "updated": "2021-06-22T12:31:17.980474+00:00", 
  "conceptrecid": "4498561", 
  "revision": 2, 
  "id": 4498562, 
  "metadata": {
    "access_right_category": "success", 
    "doi": "10.5281/zenodo.4498562", 
    "description": "<p>This model was trained by Chenda Li using wsj0_2mix recipe in <a href=\"https://github.com/espnet/espnet/\">espnet</a>.</p>\n\n<p>&nbsp;</p>\n\n<ul>\n\t<li><strong>Python API</strong>\n\n\t<pre><code class=\"language-python\">See https://github.com/espnet/espnet_model_zoo</code></pre>\n\t</li>\n\t<li><strong>Evaluate in the recipe</strong>\n\t<pre><code class=\"language-bash\">git clone https://github.com/espnet/espnet\ncd espnet\ngit checkout a3334220b0352931677946d178fade3313cf82bb\npip install -e .\ncd egs2/wsj0_2mix/enh1\n./run.sh --skip_data_prep false --skip_train true --download_model Chenda Li/wsj0_2mix_enh_train_enh_conv_tasnet_raw_valid.si_snr.ave</code>\n</pre>\n\t</li>\n\t<li><strong>Results</strong>\n\t<pre><code>\n# RESULTS\n## Environments\n- date: `Thu Feb  4 01:16:18 CST 2021`\n- python version: `3.7.6 (default, Jan  8 2020, 19:59:22)  [GCC 7.3.0]`\n- espnet version: `espnet 0.9.7`\n- pytorch version: `pytorch 1.5.0`\n- Git hash: `a3334220b0352931677946d178fade3313cf82bb`\n  - Commit date: `Fri Jan 29 23:35:47 2021 +0800`\n\n\n## enh_train_enh_conv_tasnet_raw\n\nconfig: ./conf/tuning/train_enh_conv_tasnet.yaml\n\n|dataset|STOI|SAR|SDR|SIR|\n|---|---|---|---|---|\n|enhanced_cv_min_8k|0.949205|17.3785|16.8028|26.9785|\n|enhanced_tt_min_8k|0.95349|16.6221|15.9494|25.9032|</code></pre>\n\t</li>\n\t<li><strong>ASR config</strong>\n\t<pre><code>config: ./conf/tuning/train_enh_conv_tasnet.yaml\nprint_config: false\nlog_level: INFO\ndry_run: false\niterator_type: chunk\noutput_dir: exp/enh_train_enh_conv_tasnet_raw\nngpu: 1\nseed: 0\nnum_workers: 4\nnum_att_plot: 3\ndist_backend: nccl\ndist_init_method: env://\ndist_world_size: null\ndist_rank: null\nlocal_rank: 0\ndist_master_addr: null\ndist_master_port: null\ndist_launcher: null\nmultiprocessing_distributed: false\ncudnn_enabled: true\ncudnn_benchmark: false\ncudnn_deterministic: true\ncollect_stats: false\nwrite_collected_feats: false\nmax_epoch: 100\npatience: 4\nval_scheduler_criterion:\n- valid\n- loss\nearly_stopping_criterion:\n- valid\n- loss\n- min\nbest_model_criterion:\n-   - valid\n    - si_snr\n    - max\n-   - valid\n    - loss\n    - min\nkeep_nbest_models: 1\ngrad_clip: 5.0\ngrad_clip_type: 2.0\ngrad_noise: false\naccum_grad: 1\nno_forward_run: false\nresume: true\ntrain_dtype: float32\nuse_amp: false\nlog_interval: null\nunused_parameters: false\nuse_tensorboard: true\nuse_wandb: false\nwandb_project: null\nwandb_id: null\npretrain_path: null\ninit_param: []\nfreeze_param: []\nnum_iters_per_epoch: null\nbatch_size: 8\nvalid_batch_size: null\nbatch_bins: 1000000\nvalid_batch_bins: null\ntrain_shape_file:\n- exp/enh_stats_8k/train/speech_mix_shape\n- exp/enh_stats_8k/train/speech_ref1_shape\n- exp/enh_stats_8k/train/speech_ref2_shape\nvalid_shape_file:\n- exp/enh_stats_8k/valid/speech_mix_shape\n- exp/enh_stats_8k/valid/speech_ref1_shape\n- exp/enh_stats_8k/valid/speech_ref2_shape\nbatch_type: folded\nvalid_batch_type: null\nfold_length:\n- 80000\n- 80000\n- 80000\nsort_in_batch: descending\nsort_batch: descending\nmultiple_iterator: false\nchunk_length: 32000\nchunk_shift_ratio: 0.5\nnum_cache_chunks: 1024\ntrain_data_path_and_name_and_type:\n-   - dump/raw/tr_min_8k/wav.scp\n    - speech_mix\n    - sound\n-   - dump/raw/tr_min_8k/spk1.scp\n    - speech_ref1\n    - sound\n-   - dump/raw/tr_min_8k/spk2.scp\n    - speech_ref2\n    - sound\nvalid_data_path_and_name_and_type:\n-   - dump/raw/cv_min_8k/wav.scp\n    - speech_mix\n    - sound\n-   - dump/raw/cv_min_8k/spk1.scp\n    - speech_ref1\n    - sound\n-   - dump/raw/cv_min_8k/spk2.scp\n    - speech_ref2\n    - sound\nallow_variable_data_keys: false\nmax_cache_size: 0.0\nmax_cache_fd: 32\nvalid_max_cache_size: null\noptim: adam\noptim_conf:\n    lr: 0.001\n    eps: 1.0e-08\n    weight_decay: 0\nscheduler: reducelronplateau\nscheduler_conf:\n    mode: min\n    factor: 0.5\n    patience: 1\ninit: xavier_uniform\nmodel_conf:\n    loss_type: si_snr\nuse_preprocessor: false\nencoder: conv\nencoder_conf:\n    channel: 256\n    kernel_size: 20\n    stride: 10\nseparator: tcn\nseparator_conf:\n    num_spk: 2\n    layer: 8\n    stack: 4\n    bottleneck_dim: 256\n    hidden_dim: 512\n    kernel: 3\n    causal: false\n    norm_type: gLN\n    nonlinear: relu\ndecoder: conv\ndecoder_conf:\n    channel: 256\n    kernel_size: 20\n    stride: 10\nrequired:\n- output_dir\nversion: 0.9.7\ndistributed: false</code></pre>\n\t</li>\n</ul>", 
    "license": {
      "id": "CC-BY-4.0"
    }, 
    "title": "ESPnet2 pretrained model, Chenda Li/wsj0_2mix_enh_train_enh_conv_tasnet_raw_valid.si_snr.ave, fs=8k, lang=en", 
    "relations": {
      "version": [
        {
          "count": 1, 
          "index": 0, 
          "parent": {
            "pid_type": "recid", 
            "pid_value": "4498561"
          }, 
          "is_last": true, 
          "last_child": {
            "pid_type": "recid", 
            "pid_value": "4498562"
          }
        }
      ]
    }, 
    "communities": [
      {
        "id": "espnet"
      }
    ], 
    "keywords": [
      "ESPnet", 
      "deep-learning", 
      "python", 
      "pytorch", 
      "speech-separation", 
      "speech-recognition", 
      "speech-synthesis", 
      "speech-translation", 
      "machine-translation"
    ], 
    "publication_date": "2021-02-04", 
    "creators": [
      {
        "name": "Chenda Li"
      }
    ], 
    "access_right": "open", 
    "resource_type": {
      "type": "other", 
      "title": "Other"
    }, 
    "related_identifiers": [
      {
        "scheme": "url", 
        "identifier": "https://github.com/espnet/espnet", 
        "relation": "isSupplementTo"
      }, 
      {
        "scheme": "doi", 
        "identifier": "10.5281/zenodo.4498561", 
        "relation": "isVersionOf"
      }
    ]
  }
}
320
462
views
downloads
All versions This version
Views 320316
Downloads 462462
Data volume 16.3 GB16.3 GB
Unique views 296294
Unique downloads 355355

Share

Cite as