Planned intervention: On Wednesday April 3rd 05:30 UTC Zenodo will be unavailable for up to 2-10 minutes to perform a storage cluster upgrade.
Published January 22, 2021 | Version v1
Other Open

ESPnet2 pretrained model, ftshijt/mls_asr_transformer_valid.acc.best, fs=16k, lang=noinfo

Creators

  • 1. Johns Hopkins University

Description

This model was trained by ftshijt using mls recipe in espnet.

 

  • Python API
    See https://github.com/espnet/espnet_model_zoo
  • Evaluate in the recipe
    git clone https://github.com/espnet/espnet
    cd espnet
    git checkout fa63570d883e2ae8a57a58abfb6e1c1754100e7c
    pip install -e .
    cd egs2/mls/asr1
    ./run.sh --skip_data_prep false --skip_train true --download_model ftshijt/mls_asr_transformer_valid.acc.best
    
  • Results
    
    # RESULTS
    ## Environments
    - date: `Fri Jan 22 04:56:26 EST 2021`
    - python version: `3.8.3 (default, May 19 2020, 18:47:26)  [GCC 7.3.0]`
    - espnet version: `espnet 0.9.2`
    - pytorch version: `pytorch 1.6.0`
    - Git hash: `c0c3724fe660abd205dbca9c9bbdffed1d2c79db`
      - Commit date: `Tue Jan 12 23:00:11 2021 -0500`
    
    ## asr_transformer
    ### WER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |decode_asr_lm_lm_train_bpe150_valid.loss.ave_asr_model_valid.acc.best/es_test|2385|88499|81.3|15.6|3.1|2.5|21.2|98.6|
    
    ### CER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |decode_asr_lm_lm_train_bpe150_valid.loss.ave_asr_model_valid.acc.best/es_test|2385|474976|94.3|2.9|2.7|1.4|7.1|98.6|
    
    ### TER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |decode_asr_lm_lm_train_bpe150_valid.loss.ave_asr_model_valid.acc.best/es_test|2385|251160|88.6|7.9|3.5|2.1|13.6|98.6|
  • ASR config
    config: conf/tuning/train_asr_transformer.yaml
    print_config: false
    log_level: INFO
    dry_run: false
    iterator_type: sequence
    output_dir: exp/asr_transformer
    ngpu: 1
    seed: 0
    num_workers: 1
    num_att_plot: 3
    dist_backend: nccl
    dist_init_method: env://
    dist_world_size: null
    dist_rank: null
    local_rank: 0
    dist_master_addr: null
    dist_master_port: null
    dist_launcher: null
    multiprocessing_distributed: false
    cudnn_enabled: true
    cudnn_benchmark: false
    cudnn_deterministic: true
    collect_stats: false
    write_collected_feats: false
    max_epoch: 100
    patience: 0
    val_scheduler_criterion:
    - valid
    - loss
    early_stopping_criterion:
    - valid
    - loss
    - min
    best_model_criterion:
    -   - valid
        - acc
        - max
    keep_nbest_models: 10
    grad_clip: 5
    grad_clip_type: 2.0
    grad_noise: false
    accum_grad: 2
    no_forward_run: false
    resume: true
    train_dtype: float32
    use_amp: false
    log_interval: null
    unused_parameters: false
    use_tensorboard: true
    use_wandb: false
    wandb_project: null
    wandb_id: null
    pretrain_path: null
    init_param: []
    num_iters_per_epoch: null
    batch_size: 32
    valid_batch_size: null
    batch_bins: 1000000
    valid_batch_bins: null
    train_shape_file:
    - exp/asr_stats_raw_bpe150/train/speech_shape
    - exp/asr_stats_raw_bpe150/train/text_shape.bpe
    valid_shape_file:
    - exp/asr_stats_raw_bpe150/valid/speech_shape
    - exp/asr_stats_raw_bpe150/valid/text_shape.bpe
    batch_type: folded
    valid_batch_type: null
    fold_length:
    - 80000
    - 150
    sort_in_batch: descending
    sort_batch: descending
    multiple_iterator: false
    chunk_length: 500
    chunk_shift_ratio: 0.5
    num_cache_chunks: 1024
    train_data_path_and_name_and_type:
    -   - dump/raw/es_train/wav.scp
        - speech
        - sound
    -   - dump/raw/es_train/text
        - text
        - text
    valid_data_path_and_name_and_type:
    -   - dump/raw/es_dev/wav.scp
        - speech
        - sound
    -   - dump/raw/es_dev/text
        - text
        - text
    allow_variable_data_keys: false
    max_cache_size: 0.0
    max_cache_fd: 32
    valid_max_cache_size: null
    optim: adam
    optim_conf:
        lr: 1.0
    scheduler: noamlr
    scheduler_conf:
        warmup_steps: 25000
    token_list:
    - 
    - 
    - ▁
    - s
    - n
    - r
    - o
    - a
    - ▁de
    - e
    - l
    - ▁a
    - u
    - ▁y
    - ▁que
    - ra
    - ta
    - do
    - ▁la
    - i
    - ▁en
    - re
    - to
    - ▁el
    - d
    - p
    - da
    - la
    - c
    - b
    - t
    - ro
    - ó
    - en
    - ri
    - g
    - ba
    - ▁se
    - os
    - er
    - te
    - ▁con
    - ci
    - ▁es
    - es
    - ▁no
    - ▁su
    - h
    - ti
    - é
    - mo
    - á
    - ▁ca
    - ▁ha
    - na
    - ▁los
    - lo
    - í
    - ía
    - de
    - me
    - ca
    - ▁al
    - le
    - ce
    - v
    - ma
    - nte
    - ▁di
    - ▁ma
    - ▁por
    - y
    - di
    - m
    - ▁pa
    - sa
    - ▁si
    - ▁pe
    - gu
    - z
    - ▁mi
    - ▁co
    - ▁me
    - ▁o
    - ▁e
    - ▁un
    - tra
    - ▁re
    - li
    - ▁f
    - co
    - ▁á
    - ndo
    - se
    - mi
    - ga
    - ni
    - ▁cu
    - ▁le
    - jo
    - ▁ve
    - mp
    - bi
    - f
    - va
    - ▁mu
    - go
    - ▁so
    - ñ
    - tu
    - si
    - ▁lo
    - ▁pu
    - ▁vi
    - ▁b
    - ▁las
    - ▁c
    - ▁sa
    - za
    - ▁del
    - ▁po
    - ▁in
    - vi
    - ▁te
    - tro
    - cia
    - ▁una
    - qui
    - pi
    - que
    - ja
    - pa
    - ▁para
    - cu
    - pe
    - ▁como
    - ▁esta
    - ve
    - je
    - lle
    - x
    - ú
    - j
    - q
    - ''''
    - k
    - w
    - ü
    - '-'
    - 
    init: chainer
    input_size: null
    ctc_conf:
        dropout_rate: 0.0
        ctc_type: builtin
        reduce: true
        ignore_nan_grad: false
    model_conf:
        ctc_weight: 0.3
        lsm_weight: 0.1
        length_normalized_loss: false
    use_preprocessor: true
    token_type: bpe
    bpemodel: data/token_list/bpe_unigram150/bpe.model
    non_linguistic_symbols: null
    cleaner: null
    g2p: null
    frontend: default
    frontend_conf:
        fs: 16k
    specaug: null
    specaug_conf: {}
    normalize: global_mvn
    normalize_conf:
        stats_file: exp/asr_stats_raw_bpe150/train/feats_stats.npz
    preencoder: null
    preencoder_conf: {}
    encoder: transformer
    encoder_conf:
        input_layer: conv2d
        num_blocks: 12
        linear_units: 2048
        dropout_rate: 0.1
        output_size: 256
        attention_heads: 4
        attention_dropout_rate: 0.0
    decoder: transformer
    decoder_conf:
        input_layer: embed
        num_blocks: 6
        linear_units: 2048
        dropout_rate: 0.1
    required:
    - output_dir
    - token_list
    distributed: false
  • LM config
    config: null
    print_config: false
    log_level: INFO
    dry_run: false
    iterator_type: sequence
    output_dir: exp/lm_train_bpe150
    ngpu: 1
    seed: 0
    num_workers: 1
    num_att_plot: 3
    dist_backend: nccl
    dist_init_method: env://
    dist_world_size: null
    dist_rank: null
    local_rank: 0
    dist_master_addr: null
    dist_master_port: null
    dist_launcher: null
    multiprocessing_distributed: false
    cudnn_enabled: true
    cudnn_benchmark: false
    cudnn_deterministic: true
    collect_stats: false
    write_collected_feats: false
    max_epoch: 40
    patience: null
    val_scheduler_criterion:
    - valid
    - loss
    early_stopping_criterion:
    - valid
    - loss
    - min
    best_model_criterion:
    -   - train
        - loss
        - min
    -   - valid
        - loss
        - min
    -   - train
        - acc
        - max
    -   - valid
        - acc
        - max
    keep_nbest_models:
    - 10
    grad_clip: 5.0
    grad_clip_type: 2.0
    grad_noise: false
    accum_grad: 1
    no_forward_run: false
    resume: true
    train_dtype: float32
    use_amp: false
    log_interval: null
    unused_parameters: false
    use_tensorboard: true
    use_wandb: false
    wandb_project: null
    wandb_id: null
    pretrain_path: null
    init_param: []
    num_iters_per_epoch: null
    batch_size: 20
    valid_batch_size: null
    batch_bins: 1000000
    valid_batch_bins: null
    train_shape_file:
    - exp/lm_stats_bpe150/train/text_shape.bpe
    valid_shape_file:
    - exp/lm_stats_bpe150/valid/text_shape.bpe
    batch_type: folded
    valid_batch_type: null
    fold_length:
    - 150
    sort_in_batch: descending
    sort_batch: descending
    multiple_iterator: false
    chunk_length: 500
    chunk_shift_ratio: 0.5
    num_cache_chunks: 1024
    train_data_path_and_name_and_type:
    -   - dump/raw/lm_train.txt
        - text
        - text
    valid_data_path_and_name_and_type:
    -   - dump/raw/es_dev/text
        - text
        - text
    allow_variable_data_keys: false
    max_cache_size: 0.0
    max_cache_fd: 32
    valid_max_cache_size: null
    optim: adadelta
    optim_conf: {}
    scheduler: null
    scheduler_conf: {}
    token_list:
    - 
    - 
    - ▁
    - s
    - n
    - r
    - o
    - a
    - ▁de
    - e
    - l
    - ▁a
    - u
    - ▁y
    - ▁que
    - ra
    - ta
    - do
    - ▁la
    - i
    - ▁en
    - re
    - to
    - ▁el
    - d
    - p
    - da
    - la
    - c
    - b
    - t
    - ro
    - ó
    - en
    - ri
    - g
    - ba
    - ▁se
    - os
    - er
    - te
    - ▁con
    - ci
    - ▁es
    - es
    - ▁no
    - ▁su
    - h
    - ti
    - é
    - mo
    - á
    - ▁ca
    - ▁ha
    - na
    - ▁los
    - lo
    - í
    - ía
    - de
    - me
    - ca
    - ▁al
    - le
    - ce
    - v
    - ma
    - nte
    - ▁di
    - ▁ma
    - ▁por
    - y
    - di
    - m
    - ▁pa
    - sa
    - ▁si
    - ▁pe
    - gu
    - z
    - ▁mi
    - ▁co
    - ▁me
    - ▁o
    - ▁e
    - ▁un
    - tra
    - ▁re
    - li
    - ▁f
    - co
    - ▁á
    - ndo
    - se
    - mi
    - ga
    - ni
    - ▁cu
    - ▁le
    - jo
    - ▁ve
    - mp
    - bi
    - f
    - va
    - ▁mu
    - go
    - ▁so
    - ñ
    - tu
    - si
    - ▁lo
    - ▁pu
    - ▁vi
    - ▁b
    - ▁las
    - ▁c
    - ▁sa
    - za
    - ▁del
    - ▁po
    - ▁in
    - vi
    - ▁te
    - tro
    - cia
    - ▁una
    - qui
    - pi
    - que
    - ja
    - pa
    - ▁para
    - cu
    - pe
    - ▁como
    - ▁esta
    - ve
    - je
    - lle
    - x
    - ú
    - j
    - q
    - ''''
    - k
    - w
    - ü
    - '-'
    - 
    init: null
    model_conf:
        ignore_id: 0
    use_preprocessor: true
    token_type: bpe
    bpemodel: data/token_list/bpe_unigram150/bpe.model
    non_linguistic_symbols: null
    cleaner: null
    g2p: null
    lm: seq_rnn
    lm_conf: {}
    required:
    - output_dir
    - token_list
    distributed: false

Files

asr_transformer_valid.acc.best.zip

Files (137.9 MB)

Name Size Download all
md5:107ca6a9113c1b1e15efeb7c75c039f7
137.9 MB Preview Download

Additional details

Related works

Is supplement to
https://github.com/espnet/espnet (URL)