Published December 21, 2020 | Version v1
Other Open

ESPnet2 pretrained model, kan-bayashi/jsut_tts_train_tacotron2_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave, fs=24000, lang=jp

Creators

Description

This model was trained by kan-bayashi using jsut/tts1 recipe in espnet.

 

  • Python API
    See https://github.com/espnet/espnet_model_zoo
  • Evaluate in the recipe
    git clone https://github.com/espnet/espnet
    cd espnet
    git checkout acd69577240687cc1c6c9d7ca024462aa87fcf89
    pip install -e .
    cd egs2/jsut/tts1
    # Download the model file here
    ./run.sh --skip_data_prep false --skip_train true --download_model kan-bayashi/jsut_tts_train_tacotron2_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave
    
  • Config
    config: conf/tuning/train_tacotron2.yaml
    print_config: false
    log_level: INFO
    dry_run: false
    iterator_type: sequence
    output_dir: exp/tts_train_tacotron2_raw_phn_jaconv_pyopenjtalk_accent
    ngpu: 1
    seed: 0
    num_workers: 1
    num_att_plot: 3
    dist_backend: nccl
    dist_init_method: env://
    dist_world_size: null
    dist_rank: null
    local_rank: 0
    dist_master_addr: null
    dist_master_port: null
    dist_launcher: null
    multiprocessing_distributed: false
    cudnn_enabled: true
    cudnn_benchmark: false
    cudnn_deterministic: true
    collect_stats: false
    write_collected_feats: false
    max_epoch: 200
    patience: null
    val_scheduler_criterion:
    - valid
    - loss
    early_stopping_criterion:
    - valid
    - loss
    - min
    best_model_criterion:
    -   - valid
        - loss
        - min
    -   - train
        - loss
        - min
    keep_nbest_models: 5
    grad_clip: 1.0
    grad_clip_type: 2.0
    grad_noise: false
    accum_grad: 1
    no_forward_run: false
    resume: true
    train_dtype: float32
    use_amp: false
    log_interval: null
    unused_parameters: false
    use_tensorboard: true
    use_wandb: false
    wandb_project: null
    wandb_id: null
    pretrain_path: null
    init_param: []
    num_iters_per_epoch: 500
    batch_size: 20
    valid_batch_size: null
    batch_bins: 3750000
    valid_batch_bins: null
    train_shape_file:
    - exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent/train/text_shape.phn
    - exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent/train/speech_shape
    valid_shape_file:
    - exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent/valid/text_shape.phn
    - exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent/valid/speech_shape
    batch_type: numel
    valid_batch_type: null
    fold_length:
    - 150
    - 240000
    sort_in_batch: descending
    sort_batch: descending
    multiple_iterator: false
    chunk_length: 500
    chunk_shift_ratio: 0.5
    num_cache_chunks: 1024
    train_data_path_and_name_and_type:
    -   - dump/raw/tr_no_dev/text
        - text
        - text
    -   - dump/raw/tr_no_dev/wav.scp
        - speech
        - sound
    valid_data_path_and_name_and_type:
    -   - dump/raw/dev/text
        - text
        - text
    -   - dump/raw/dev/wav.scp
        - speech
        - sound
    allow_variable_data_keys: false
    max_cache_size: 0.0
    max_cache_fd: 32
    valid_max_cache_size: null
    optim: adam
    optim_conf:
        lr: 0.001
        eps: 1.0e-06
        weight_decay: 0.0
    scheduler: null
    scheduler_conf: {}
    token_list:
    - 
    - 
    - '1'
    - '2'
    - '0'
    - '3'
    - '4'
    - '-1'
    - '5'
    - a
    - o
    - '-2'
    - i
    - '-3'
    - u
    - e
    - k
    - n
    - t
    - '6'
    - r
    - '-4'
    - s
    - N
    - m
    - '7'
    - sh
    - d
    - g
    - w
    - '8'
    - U
    - '-5'
    - I
    - cl
    - h
    - y
    - b
    - '9'
    - j
    - ts
    - ch
    - '-6'
    - z
    - p
    - '-7'
    - f
    - ky
    - ry
    - '-8'
    - gy
    - '-9'
    - hy
    - ny
    - '-10'
    - by
    - my
    - '-11'
    - '-12'
    - '-13'
    - py
    - '-14'
    - '-15'
    - v
    - '10'
    - '-16'
    - '-17'
    - '11'
    - '-21'
    - '-20'
    - '12'
    - '-19'
    - '13'
    - '-18'
    - '14'
    - dy
    - '15'
    - ty
    - '-22'
    - '16'
    - '18'
    - '19'
    - '17'
    - 
    odim: null
    model_conf: {}
    use_preprocessor: true
    token_type: phn
    bpemodel: null
    non_linguistic_symbols: null
    cleaner: jaconv
    g2p: pyopenjtalk_accent
    feats_extract: fbank
    feats_extract_conf:
        fs: 24000
        fmin: 80
        fmax: 7600
        n_mels: 80
        hop_length: 300
        n_fft: 2048
        win_length: 1200
    normalize: global_mvn
    normalize_conf:
        stats_file: exp/tts_stats_raw_phn_jaconv_pyopenjtalk_accent/train/feats_stats.npz
    tts: tacotron2
    tts_conf:
        embed_dim: 512
        elayers: 1
        eunits: 512
        econv_layers: 3
        econv_chans: 512
        econv_filts: 5
        atype: location
        adim: 512
        aconv_chans: 32
        aconv_filts: 15
        cumulate_att_w: true
        dlayers: 2
        dunits: 1024
        prenet_layers: 2
        prenet_units: 256
        postnet_layers: 5
        postnet_chans: 512
        postnet_filts: 5
        output_activation: null
        use_batch_norm: true
        use_concate: true
        use_residual: false
        dropout_rate: 0.5
        zoneout_rate: 0.1
        reduction_factor: 1
        spk_embed_dim: null
        use_masking: true
        bce_pos_weight: 5.0
        use_guided_attn_loss: true
        guided_attn_loss_sigma: 0.4
        guided_attn_loss_lambda: 1.0
    pitch_extract: null
    pitch_extract_conf: {}
    pitch_normalize: null
    pitch_normalize_conf: {}
    energy_extract: null
    energy_extract_conf: {}
    energy_normalize: null
    energy_normalize_conf: {}
    required:
    - output_dir
    - token_list
    distributed: false

Files

tts_train_tacotron2_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave.zip

Files (107.6 MB)

Additional details

Related works

Is supplement to
https://github.com/espnet/espnet (URL)