Other Open Access

ESPnet2 pretrained model, kan-bayashi/jsut_tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave, fs=24000, lang=jp

kan-bayashi


JSON Export

{
  "files": [
    {
      "links": {
        "self": "https://zenodo.org/api/files/9e46dbda-e2a1-4c5b-bf21-1c218b26ff51/tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave.zip"
      }, 
      "checksum": "md5:09474018281d53f72800679649e3dfa5", 
      "bucket": "9e46dbda-e2a1-4c5b-bf21-1c218b26ff51", 
      "key": "tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave.zip", 
      "type": "zip", 
      "size": 282053710
    }
  ], 
  "owners": [
    116548
  ], 
  "doi": "10.5281/zenodo.4391409", 
  "stats": {
    "version_unique_downloads": 101.0, 
    "unique_views": 32.0, 
    "views": 33.0, 
    "version_views": 33.0, 
    "unique_downloads": 101.0, 
    "version_unique_views": 32.0, 
    "volume": 44564486180.0, 
    "version_downloads": 158.0, 
    "downloads": 158.0, 
    "version_volume": 44564486180.0
  }, 
  "links": {
    "doi": "https://doi.org/10.5281/zenodo.4391409", 
    "conceptdoi": "https://doi.org/10.5281/zenodo.4391408", 
    "bucket": "https://zenodo.org/api/files/9e46dbda-e2a1-4c5b-bf21-1c218b26ff51", 
    "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.4391408.svg", 
    "html": "https://zenodo.org/record/4391409", 
    "latest_html": "https://zenodo.org/record/4391409", 
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.4391409.svg", 
    "latest": "https://zenodo.org/api/records/4391409"
  }, 
  "conceptdoi": "10.5281/zenodo.4391408", 
  "created": "2020-12-24T00:54:44.910006+00:00", 
  "updated": "2021-06-22T12:34:24.719076+00:00", 
  "conceptrecid": "4391408", 
  "revision": 2, 
  "id": 4391409, 
  "metadata": {
    "access_right_category": "success", 
    "doi": "10.5281/zenodo.4391409", 
    "description": "This model was trained by kan-bayashi using jsut/tts1 recipe in <a href=\"https://github.com/espnet/espnet/\">espnet</a>.\n<p>&nbsp;</p>\n<ul>\n<li><strong>Python API</strong><pre><code class=\"language-python\">See https://github.com/espnet/espnet_model_zoo</code></pre></li>\n<li><strong>Evaluate in the recipe</strong><pre><code class=\"language-bash\">git clone https://github.com/espnet/espnet\ncd espnet\ngit checkout 73e3fbe6b0050a401c5945d8210f8fa206c0dcc0\npip install -e .\ncd egs2/jsut/tts1\n# Download the model file here\n./run.sh --skip_data_prep false --skip_train true --download_model kan-bayashi/jsut_tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave</code>\n</pre></li>\n<li><strong>Config</strong><pre><code>config: conf/tuning/train_conformer_fastspeech2.yaml\nprint_config: false\nlog_level: INFO\ndry_run: false\niterator_type: sequence\noutput_dir: exp/tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent\nngpu: 1\nseed: 0\nnum_workers: 1\nnum_att_plot: 3\ndist_backend: nccl\ndist_init_method: env://\ndist_world_size: 4\ndist_rank: 0\nlocal_rank: 0\ndist_master_addr: localhost\ndist_master_port: 43445\ndist_launcher: null\nmultiprocessing_distributed: true\ncudnn_enabled: true\ncudnn_benchmark: false\ncudnn_deterministic: true\ncollect_stats: false\nwrite_collected_feats: false\nmax_epoch: 200\npatience: null\nval_scheduler_criterion:\n- valid\n- loss\nearly_stopping_criterion:\n- valid\n- loss\n- min\nbest_model_criterion:\n-   - valid\n    - loss\n    - min\n-   - train\n    - loss\n    - min\nkeep_nbest_models: 5\ngrad_clip: 1.0\ngrad_clip_type: 2.0\ngrad_noise: false\naccum_grad: 1\nno_forward_run: false\nresume: true\ntrain_dtype: float32\nuse_amp: false\nlog_interval: null\nunused_parameters: false\nuse_tensorboard: true\nuse_wandb: false\nwandb_project: null\nwandb_id: null\npretrain_path: null\ninit_param: []\nfreeze_param: []\nnum_iters_per_epoch: 500\nbatch_size: 20\nvalid_batch_size: null\nbatch_bins: 18000000\nvalid_batch_bins: null\ntrain_shape_file:\n- exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/text_shape.phn\n- exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/speech_shape\nvalid_shape_file:\n- exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/text_shape.phn\n- exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/speech_shape\nbatch_type: numel\nvalid_batch_type: null\nfold_length:\n- 150\n- 240000\nsort_in_batch: descending\nsort_batch: descending\nmultiple_iterator: false\nchunk_length: 500\nchunk_shift_ratio: 0.5\nnum_cache_chunks: 1024\ntrain_data_path_and_name_and_type:\n-   - dump/raw/tr_no_dev/text\n    - text\n    - text\n-   - exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/tr_no_dev/durations\n    - durations\n    - text_int\n-   - dump/raw/tr_no_dev/wav.scp\n    - speech\n    - sound\nvalid_data_path_and_name_and_type:\n-   - dump/raw/dev/text\n    - text\n    - text\n-   - exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/dev/durations\n    - durations\n    - text_int\n-   - dump/raw/dev/wav.scp\n    - speech\n    - sound\nallow_variable_data_keys: false\nmax_cache_size: 0.0\nmax_cache_fd: 32\nvalid_max_cache_size: null\noptim: adam\noptim_conf:\n    lr: 1.0\nscheduler: noamlr\nscheduler_conf:\n    model_size: 384\n    warmup_steps: 4000\ntoken_list:\n- \n- \n- '1'\n- '2'\n- '0'\n- '3'\n- '4'\n- '-1'\n- '5'\n- a\n- o\n- '-2'\n- i\n- '-3'\n- u\n- e\n- k\n- n\n- t\n- '6'\n- r\n- '-4'\n- s\n- N\n- m\n- '7'\n- sh\n- d\n- g\n- w\n- '8'\n- U\n- '-5'\n- I\n- cl\n- h\n- y\n- b\n- '9'\n- j\n- ts\n- ch\n- '-6'\n- z\n- p\n- '-7'\n- f\n- ky\n- ry\n- '-8'\n- gy\n- '-9'\n- hy\n- ny\n- '-10'\n- by\n- my\n- '-11'\n- '-12'\n- '-13'\n- py\n- '-14'\n- '-15'\n- v\n- '10'\n- '-16'\n- '-17'\n- '11'\n- '-21'\n- '-20'\n- '12'\n- '-19'\n- '13'\n- '-18'\n- '14'\n- dy\n- '15'\n- ty\n- '-22'\n- '16'\n- '18'\n- '19'\n- '17'\n- \nodim: null\nmodel_conf: {}\nuse_preprocessor: true\ntoken_type: phn\nbpemodel: null\nnon_linguistic_symbols: null\ncleaner: jaconv\ng2p: pyopenjtalk_accent\nfeats_extract: fbank\nfeats_extract_conf:\n    fs: 24000\n    fmin: 80\n    fmax: 7600\n    n_mels: 80\n    hop_length: 300\n    n_fft: 2048\n    win_length: 1200\nnormalize: global_mvn\nnormalize_conf:\n    stats_file: exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz\ntts: fastspeech2\ntts_conf:\n    adim: 384\n    aheads: 2\n    elayers: 4\n    eunits: 1536\n    dlayers: 4\n    dunits: 1536\n    positionwise_layer_type: conv1d\n    positionwise_conv_kernel_size: 3\n    duration_predictor_layers: 2\n    duration_predictor_chans: 256\n    duration_predictor_kernel_size: 3\n    postnet_layers: 5\n    postnet_filts: 5\n    postnet_chans: 256\n    use_masking: true\n    encoder_normalize_before: true\n    decoder_normalize_before: true\n    reduction_factor: 1\n    encoder_type: conformer\n    decoder_type: conformer\n    conformer_pos_enc_layer_type: rel_pos\n    conformer_self_attn_layer_type: rel_selfattn\n    conformer_activation_type: swish\n    use_macaron_style_in_conformer: true\n    use_cnn_in_conformer: true\n    conformer_enc_kernel_size: 7\n    conformer_dec_kernel_size: 31\n    init_type: xavier_uniform\n    transformer_enc_dropout_rate: 0.2\n    transformer_enc_positional_dropout_rate: 0.2\n    transformer_enc_attn_dropout_rate: 0.2\n    transformer_dec_dropout_rate: 0.2\n    transformer_dec_positional_dropout_rate: 0.2\n    transformer_dec_attn_dropout_rate: 0.2\n    pitch_predictor_layers: 5\n    pitch_predictor_chans: 256\n    pitch_predictor_kernel_size: 5\n    pitch_predictor_dropout: 0.5\n    pitch_embed_kernel_size: 1\n    pitch_embed_dropout: 0.0\n    stop_gradient_from_pitch_predictor: true\n    energy_predictor_layers: 2\n    energy_predictor_chans: 256\n    energy_predictor_kernel_size: 3\n    energy_predictor_dropout: 0.5\n    energy_embed_kernel_size: 1\n    energy_embed_dropout: 0.0\n    stop_gradient_from_energy_predictor: false\npitch_extract: dio\npitch_extract_conf:\n    fs: 24000\n    n_fft: 2048\n    hop_length: 300\n    f0max: 400\n    f0min: 80\n    reduction_factor: 1\npitch_normalize: global_mvn\npitch_normalize_conf:\n    stats_file: exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/pitch_stats.npz\nenergy_extract: energy\nenergy_extract_conf:\n    fs: 24000\n    n_fft: 2048\n    hop_length: 300\n    win_length: 1200\n    reduction_factor: 1\nenergy_normalize: global_mvn\nenergy_normalize_conf:\n    stats_file: exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/energy_stats.npz\nrequired:\n- output_dir\n- token_list\ndistributed: true</code></pre></li>\n</ul>", 
    "license": {
      "id": "CC-BY-4.0"
    }, 
    "title": "ESPnet2 pretrained model, kan-bayashi/jsut_tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave, fs=24000, lang=jp", 
    "relations": {
      "version": [
        {
          "count": 1, 
          "index": 0, 
          "parent": {
            "pid_type": "recid", 
            "pid_value": "4391408"
          }, 
          "is_last": true, 
          "last_child": {
            "pid_type": "recid", 
            "pid_value": "4391409"
          }
        }
      ]
    }, 
    "communities": [
      {
        "id": "espnet"
      }
    ], 
    "keywords": [
      "ESPnet", 
      "deep-learning", 
      "python", 
      "pytorch", 
      "speech-recognition", 
      "speech-synthesis", 
      "speech-translation", 
      "machine-translation"
    ], 
    "publication_date": "2020-12-24", 
    "creators": [
      {
        "name": "kan-bayashi"
      }
    ], 
    "access_right": "open", 
    "resource_type": {
      "type": "other", 
      "title": "Other"
    }, 
    "related_identifiers": [
      {
        "scheme": "url", 
        "identifier": "https://github.com/espnet/espnet", 
        "relation": "isSupplementTo"
      }, 
      {
        "scheme": "doi", 
        "identifier": "10.5281/zenodo.4391408", 
        "relation": "isVersionOf"
      }
    ]
  }
}
33
158
views
downloads
All versions This version
Views 3333
Downloads 158158
Data volume 44.6 GB44.6 GB
Unique views 3232
Unique downloads 101101

Share

Cite as