{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2021-01-12T03:16:59.134961+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent_with_pause_train.loss.ave.zip": {
        "checksum": "md5:d843f8183d6916507a0ddddc7cdfcc21",
        "ext": "zip",
        "id": "e0ec43c0-9c27-4e8a-90a7-42ced3f6600c",
        "key": "tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent_with_pause_train.loss.ave.zip",
        "metadata": null,
        "mimetype": "application/zip",
        "size": 133238771
      }
    },
    "order": [],
    "total_bytes": 133238771
  },
  "id": "4433196",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/4433196/access",
    "access_grants": "https://zenodo.org/api/records/4433196/access/grants",
    "access_links": "https://zenodo.org/api/records/4433196/access/links",
    "access_request": "https://zenodo.org/api/records/4433196/access/request",
    "access_users": "https://zenodo.org/api/records/4433196/access/users",
    "archive": "https://zenodo.org/api/records/4433196/files-archive",
    "archive_media": "https://zenodo.org/api/records/4433196/media-files-archive",
    "communities": "https://zenodo.org/api/records/4433196/communities",
    "communities-suggestions": "https://zenodo.org/api/records/4433196/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.4433196",
    "draft": "https://zenodo.org/api/records/4433196/draft",
    "files": "https://zenodo.org/api/records/4433196/files",
    "latest": "https://zenodo.org/api/records/4433196/versions/latest",
    "latest_html": "https://zenodo.org/records/4433196/latest",
    "media_files": "https://zenodo.org/api/records/4433196/media-files",
    "parent": "https://zenodo.org/api/records/4433195",
    "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.4433195",
    "parent_html": "https://zenodo.org/records/4433195",
    "requests": "https://zenodo.org/api/records/4433196/requests",
    "reserve_doi": "https://zenodo.org/api/records/4433196/draft/pids/doi",
    "self": "https://zenodo.org/api/records/4433196",
    "self_doi": "https://zenodo.org/doi/10.5281/zenodo.4433196",
    "self_html": "https://zenodo.org/records/4433196",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:4433196/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:4433196/sequence/default",
    "versions": "https://zenodo.org/api/records/4433196/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "person_or_org": {
          "family_name": "kan-bayashi",
          "name": "kan-bayashi",
          "type": "personal"
        }
      }
    ],
    "description": "<p>This model was trained by kan-bayashi using jsut/tts1 recipe in <a href=\"https://github.com/espnet/espnet/\">espnet</a>.</p>\n\n<p>&nbsp;</p>\n\n<ul>\n\t<li><strong>Python API</strong>\n\n\t<pre><code class=\"language-python\">See https://github.com/espnet/espnet_model_zoo</code></pre>\n\t</li>\n\t<li><strong>Evaluate in the recipe</strong>\n\t<pre><code class=\"language-bash\">git clone https://github.com/espnet/espnet\ncd espnet\ngit checkout 18fb6edb7b14911730337baa05f0e40c4dde9002\npip install -e .\ncd egs2/jsut/tts1\n# Download the model file here\n./run.sh --skip_data_prep false --skip_train true --download_model kan-bayashi/jsut_tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent_with_pause_train.loss.ave</code>\n</pre>\n\t</li>\n\t<li><strong>Config</strong>\n\t<pre><code>config: conf/tuning/train_transformer.yaml\nprint_config: false\nlog_level: INFO\ndry_run: false\niterator_type: sequence\noutput_dir: exp/tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent_with_pause\nngpu: 1\nseed: 0\nnum_workers: 1\nnum_att_plot: 3\ndist_backend: nccl\ndist_init_method: env://\ndist_world_size: 4\ndist_rank: 0\nlocal_rank: 0\ndist_master_addr: localhost\ndist_master_port: 58625\ndist_launcher: null\nmultiprocessing_distributed: true\ncudnn_enabled: true\ncudnn_benchmark: false\ncudnn_deterministic: true\ncollect_stats: false\nwrite_collected_feats: false\nmax_epoch: 200\npatience: null\nval_scheduler_criterion:\n- valid\n- loss\nearly_stopping_criterion:\n- valid\n- loss\n- min\nbest_model_criterion:\n-   - valid\n    - loss\n    - min\n-   - train\n    - loss\n    - min\nkeep_nbest_models: 5\ngrad_clip: 1.0\ngrad_clip_type: 2.0\ngrad_noise: false\naccum_grad: 2\nno_forward_run: false\nresume: true\ntrain_dtype: float32\nuse_amp: false\nlog_interval: null\nunused_parameters: false\nuse_tensorboard: true\nuse_wandb: false\nwandb_project: null\nwandb_id: null\npretrain_path: null\ninit_param: []\nfreeze_param: []\nnum_iters_per_epoch: 1000\nbatch_size: 20\nvalid_batch_size: null\nbatch_bins: 9000000\nvalid_batch_bins: null\ntrain_shape_file:\n- exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train/text_shape.phn\n- exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train/speech_shape\nvalid_shape_file:\n- exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent_with_pause/valid/text_shape.phn\n- exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent_with_pause/valid/speech_shape\nbatch_type: numel\nvalid_batch_type: null\nfold_length:\n- 150\n- 240000\nsort_in_batch: descending\nsort_batch: descending\nmultiple_iterator: false\nchunk_length: 500\nchunk_shift_ratio: 0.5\nnum_cache_chunks: 1024\ntrain_data_path_and_name_and_type:\n-   - dump/raw/tr_no_dev/text\n    - text\n    - text\n-   - dump/raw/tr_no_dev/wav.scp\n    - speech\n    - sound\nvalid_data_path_and_name_and_type:\n-   - dump/raw/dev/text\n    - text\n    - text\n-   - dump/raw/dev/wav.scp\n    - speech\n    - sound\nallow_variable_data_keys: false\nmax_cache_size: 0.0\nmax_cache_fd: 32\nvalid_max_cache_size: null\noptim: adam\noptim_conf:\n    lr: 1.0\nscheduler: noamlr\nscheduler_conf:\n    model_size: 512\n    warmup_steps: 8000\ntoken_list:\n- \n- \n- '1'\n- '2'\n- '0'\n- '3'\n- '4'\n- '-1'\n- '5'\n- a\n- o\n- '-2'\n- i\n- '-3'\n- u\n- e\n- k\n- n\n- t\n- '6'\n- r\n- '-4'\n- s\n- N\n- m\n- pau\n- '7'\n- sh\n- d\n- g\n- w\n- '8'\n- U\n- '-5'\n- I\n- cl\n- h\n- y\n- b\n- '9'\n- j\n- ts\n- ch\n- '-6'\n- z\n- p\n- '-7'\n- f\n- ky\n- ry\n- '-8'\n- gy\n- '-9'\n- hy\n- ny\n- '-10'\n- by\n- my\n- '-11'\n- '-12'\n- '-13'\n- py\n- '-14'\n- '-15'\n- v\n- '10'\n- '-16'\n- '-17'\n- '11'\n- '-21'\n- '-20'\n- '12'\n- '-19'\n- '13'\n- '-18'\n- '14'\n- dy\n- '15'\n- ty\n- '-22'\n- '16'\n- '18'\n- '19'\n- '17'\n- \nodim: null\nmodel_conf: {}\nuse_preprocessor: true\ntoken_type: phn\nbpemodel: null\nnon_linguistic_symbols: null\ncleaner: jaconv\ng2p: pyopenjtalk_accent_with_pause\nfeats_extract: fbank\nfeats_extract_conf:\n    fs: 24000\n    fmin: 80\n    fmax: 7600\n    n_mels: 80\n    hop_length: 300\n    n_fft: 2048\n    win_length: 1200\nnormalize: global_mvn\nnormalize_conf:\n    stats_file: exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent_with_pause/train/feats_stats.npz\ntts: transformer\ntts_conf:\n    embed_dim: 0\n    eprenet_conv_layers: 0\n    eprenet_conv_filts: 0\n    eprenet_conv_chans: 0\n    dprenet_layers: 2\n    dprenet_units: 256\n    adim: 512\n    aheads: 8\n    elayers: 6\n    eunits: 1024\n    dlayers: 6\n    dunits: 1024\n    positionwise_layer_type: conv1d\n    positionwise_conv_kernel_size: 1\n    postnet_layers: 5\n    postnet_filts: 5\n    postnet_chans: 256\n    use_masking: true\n    bce_pos_weight: 5.0\n    use_scaled_pos_enc: true\n    encoder_normalize_before: true\n    decoder_normalize_before: true\n    reduction_factor: 1\n    init_type: xavier_uniform\n    init_enc_alpha: 1.0\n    init_dec_alpha: 1.0\n    eprenet_dropout_rate: 0.0\n    dprenet_dropout_rate: 0.5\n    postnet_dropout_rate: 0.5\n    transformer_enc_dropout_rate: 0.1\n    transformer_enc_positional_dropout_rate: 0.1\n    transformer_enc_attn_dropout_rate: 0.1\n    transformer_dec_dropout_rate: 0.1\n    transformer_dec_positional_dropout_rate: 0.1\n    transformer_dec_attn_dropout_rate: 0.1\n    transformer_enc_dec_attn_dropout_rate: 0.1\n    use_guided_attn_loss: true\n    num_heads_applied_guided_attn: 2\n    num_layers_applied_guided_attn: 2\n    modules_applied_guided_attn:\n    - encoder-decoder\n    guided_attn_loss_sigma: 0.4\n    guided_attn_loss_lambda: 10.0\npitch_extract: null\npitch_extract_conf: {}\npitch_normalize: null\npitch_normalize_conf: {}\nenergy_extract: null\nenergy_extract_conf: {}\nenergy_normalize: null\nenergy_normalize_conf: {}\nrequired:\n- output_dir\n- token_list\ndistributed: true</code></pre>\n\t</li>\n</ul>",
    "publication_date": "2021-01-12",
    "publisher": "Zenodo",
    "related_identifiers": [
      {
        "identifier": "https://github.com/espnet/espnet",
        "relation_type": {
          "id": "issupplementto",
          "title": {
            "de": "Erg\u00e4nzt",
            "en": "Is supplement to"
          }
        },
        "scheme": "url"
      }
    ],
    "resource_type": {
      "id": "other",
      "title": {
        "de": "Sonstige",
        "en": "Other"
      }
    },
    "rights": [
      {
        "description": {
          "en": ""
        },
        "icon": "cc-by-nc-sa-icon",
        "id": "cc-by-nc-sa-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution Non Commercial Share Alike 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "ESPnet"
      },
      {
        "subject": "deep-learning"
      },
      {
        "subject": "python"
      },
      {
        "subject": "pytorch"
      },
      {
        "subject": "speech-recognition"
      },
      {
        "subject": "speech-synthesis"
      },
      {
        "subject": "speech-translation"
      },
      {
        "subject": "machine-translation"
      }
    ],
    "title": "ESPnet2 pretrained model, kan-bayashi/jsut_tts_train_transformer_raw_phn_jaconv_pyopenjtalk_accent_with_pause_train.loss.ave, fs=24000, lang=jp"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "116548"
      }
    },
    "communities": {
      "default": "771556da-6e7c-4231-9f32-f89b8d9852f1",
      "entries": [
        {
          "access": {
            "member_policy": "open",
            "members_visibility": "public",
            "record_policy": "open",
            "review_policy": "open",
            "visibility": "public"
          },
          "children": {
            "allow": false
          },
          "created": "2020-06-26T16:41:10.535975+00:00",
          "custom_fields": {},
          "deletion_status": {
            "is_deleted": false,
            "status": "P"
          },
          "id": "771556da-6e7c-4231-9f32-f89b8d9852f1",
          "links": {},
          "metadata": {
            "curation_policy": "",
            "description": "",
            "page": "",
            "title": "ESPnet: end-to-end speech processing toolkit"
          },
          "revision_id": 0,
          "slug": "espnet",
          "updated": "2021-02-15T12:50:40.384178+00:00"
        }
      ],
      "ids": [
        "771556da-6e7c-4231-9f32-f89b8d9852f1"
      ]
    },
    "id": "4433195",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.4433195",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.4433196",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:4433196",
      "provider": "oai"
    }
  },
  "revision_id": 3,
  "stats": {
    "all_versions": {
      "data_volume": 27980141910.0,
      "downloads": 210,
      "unique_downloads": 150,
      "unique_views": 82,
      "views": 90
    },
    "this_version": {
      "data_volume": 27980141910.0,
      "downloads": 210,
      "unique_downloads": 150,
      "unique_views": 82,
      "views": 90
    }
  },
  "status": "published",
  "updated": "2022-02-07T09:42:38.521910+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}