• Espent环境配置与实践


    Espent环境配置与实践

    代码来源:espnet/espnet: End-to-End Speech Processing Toolkit (github.com)

    官网文档:Common usages — ESPnet 202402 documentation

    1. 环境配置

    通过官方文档可得我们指定 PyTorch=1.12.1 CUDA=11.6。我们还支持许多其他版本。请查看 https://github.com/espnet/espnet/blob/master/tools/installers/install_torch.sh 以获取详细的版本列表。

    安装CUDA=11.6

    # 下载
    # cuda的安装和配置:https://blog.csdn.net/weixin_46560570/article/details/140754242?spm=1001.2014.3001.5501
    wget https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run
    # 安装
    sh ./cuda_12.2.1_535.86.10_linux.run \
      --silent \
      --toolkit \
      --installpath=/s6home/lnj524/module/cuda/cuda-11.6 \
      --no-opengl-libs \
      --no-drm \
      --no-man-page
    # 添加环境变量
    vim ~/.bashrc
    #将下方内容写入.bashrc 
    export CUDA_HOME=/s6home/lnj524/module/cuda/cuda-11.6
    export PATH=$CUDA_HOME/bin:$PATH
    export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
    export LD_LIBRARY_PATH=$CUDA_HOME/lib:$LD_LIBRARY_PATH
    export LD_LIBRARY_PATH=$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH
    export CUDAToolkit_ROOT_DIR=$CUDA_HOME
    export CUDAToolkit_ROOT=$CUDA_HOME
    
    export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
    export CUDA_TOOLKIT_ROOT=$CUDA_HOME
    export CUDA_BIN_PATH=$CUDA_HOME
    export CUDA_PATH=$CUDA_HOME
    export CUDA_INC_PATH=$CUDA_HOME/targets/x86_64-linux
    export CFLAGS=-I$CUDA_HOME/targets/x86_64-linux/include:$CFLAGS
    export CUDAToolkit_TARGET_DIR=$CUDA_HOME/targets/x86_64-linux
    
    # 更新用户环境
    source ~/.bashrc
    # 验证
    nvcc -V
    

    安装Pytorch

    # 克隆espnet代码
    git clone https://gitee.com/chengsili/espnet.git
    # 创建虚拟环境
    # conda的安装和配置:https://blog.csdn.net/weixin_46560570/article/details/140754242?spm=1001.2014.3001.5501
    conda create -n espnet python=3.9
    # 激活虚拟环境
    conda activate espnet
    # 安装Pytorch
    conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.6 -c pytorch -c conda-forge
    # 安装cudnn
    conda install cudnn
    

    项目配置

    cd /s6home/lnj524/module/espnet/tools
    # 链接cuda
    ./setup_cuda_env.sh /home/lnj524/module/cuda/cuda-11.6
    # 链接python
    ./setup_python.sh $(command -v python3)
    # 此步骤后,tools下生成activate_python.sh
    
    #查看activate_python.sh
    cat activate_python.sh
    
    #!/usr/bin/env bash
    # THIS FILE IS GENERATED BY tools/setup_python.sh
    export PYTHONUSERBASE="/home/lnj524/module/espnet/tools/python_user_base"
    export PATH="/home/lnj524/module/espnet/tools/python_user_base/bin":${PATH}
    export PATH=/home/lnj524/miniconda3/envs/espnet/bin:${PATH}
    export NLTK_DATA="${PYTHONUSERBASE}/nltk_data" # NLTK_DATA环境变量是后添上的,前三个变量是自动生成的。
    # 报错记录
    # NLTK_DATA环境变量,如果没有这个,nltk_data文件会产生在用户文件夹下。
    # 若报错XXX.zip错误,自行安装和解压。
    # https://gitee.com/qwererer2/nltk_data/blob/gh-pages/packages/corpora/cmudict.zip
    # https://gitee.com/qwererer2/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip
    

    安装相关依赖

    # 踩坑记录
    # 服务器无法访问github,使用make安装时,网络连接超时。
    # 解决方法:
    cd /home/lnj524/module/espnet/tools/installers
    # 检查文件目录下所有的.sh文件,将文件中的github上的项目url改成gitee上的项目url。
    # 我改好的:https://gitee.com/chengsili/espent_tools_installers.git
    # sph2pipe.sh中下载2.5.tar.gz使用:https://gitee.com/chengsili/sph2pipe/repository/archive/2.5.tar.gz
    # SCTK.sh中下载9688a26.tar.gz使用:https://gitee.com/chengsili/SCTK/repository/archive/9688a26.tar.gz
    # kenlm.sh中boost服务器可能下载不下来,如果你遇到了请看下文运行aishell,报错kenlm找不到。
    
    cd /s6home/lnj524/module/espnet/tools
    make TH_VERSION=1.12.1 CUDA_VERSION=11.6
    
    pip install cmake
    pip install sox
    
    # 到此环境配置完成
    # 检查安装,具体细节请访问:https://espnet.github.io/espnet/tutorial.html#transducer
    python3 check_install.py
    

    2. an4数据集

    按照官网进行测试即可,注意Stage 11: ASR Training,设置GPU数量和使用的GPU编号

    # 使用GPU 0和GPU 1
    export CUDA_VISIBLE_DEVICES=0,1
    # 训练
    ./asr.sh --stage 11 --stop_stage 11 --train_set train_nodev --valid_set train_dev --test_sets "train_dev test" --ngpu 2 --asr_config conf/train_asr_demo_transformer.yaml
    

    3.aishell数据集

    3.1kenlm报错
    # 踩坑记录 - 安装kenlm
    # 使用pip或者conda进行安装kenlm。使用conda list可看到kenlm=0.2.0,但是运行 ./asr.sh --stage 1 --stop_stage 1 .... 
    # 仍然报错找不到kenlm。
    # 解决方法,使用espnet提供的脚本。进入installers文件夹,运行./install_kenlm.sh报错,网络连接超时,可自行下载。
    # https://boostorg.jfrog.io/artifactory/main/release/1.81.0/source/boost_1_81_0.tar.bz2
    # 然后自行解压:tar xvf boost_1_81_0.tar.bz2,随后更改install_kenlm.sh,再次运行。 
    # 具体如下:
    cd /home/lnj524/module/espnet/tools/installers
    
    # 将你下载好的压缩包放在installers目录下
    
    tar xvf boost_1_81_0.tar.bz2
    
    vim install_kenlm.sh
    
    #更改后的install_kenlm.sh
    
    #!/usr/bin/env bash
    set -euo pipefail
    
    if [ $# != 0 ]; then
        echo "Usage: $0"
        exit 1;
    fi
    
    boost_version=1.81.0
    
    if [ ! -d boost_${boost_version//./_}_build ]; then
        (
            set -euo pipefail
            cd boost_${boost_version//./_}
            ./bootstrap.sh
            ./b2 install --prefix=$(pwd)/../boost_${boost_version//./_}_build install
        )
    fi
    
    if [ ! -d kenlm ]; then
        git clone https://gitee.com/chengsili/kenlm.git
    fi
    
    (
        set -euo pipefail
        cd kenlm
    
        mkdir -p build
        (
            set -euo pipefail
            cd build && cmake -DCMAKE_PREFIX_PATH=$(pwd)/../../boost_${boost_version//./_}_build .. && make
        )
        (
            set -euo pipefail
            python3 -m pip install -e .
        )
    )
    
    # 安装完成后,发现kenlm文件在installers目录下,将其移动到tools下即可
    cd /home/lnj524/module/espnet/tools/installers
    mv kenlm/ ../
    
    3.2分阶段进行
    数据准备
    # 按照脚本执行第一步,数据集下载时间较长,自行下载数据集,更改data.sh中的数据集路径。
    vim /home/lnj524/module/espnet/egs2/aishell/asr1/local/data.sh
    AISHELL=/home/lnj524/module/data/opensource_data/aishell
    
    第 1 阶段:数据准备
    # 第 1 阶段:数据准备
    ./asr.sh --stage 1 --stop_stage 1 --train_set train --valid_set dev --test_sets "dev test"
    espnet/egs2/aishell/asr1/data$ ls
    dev  local  test  token_list  train
    espnet/egs2/aishell/asr1/data$ ll train
    spk2utt # 每个说话者的 ID 及其所有的发言列表
    text    # 每个发言的转录文本
    utt2spk # 每个发言 ID 及其对应的说话者 ID
    wav.scp # 列出音频文件的路径
    
    第 3 阶段:格式化
    # 第 3 阶段:格式化 wav.scp: data/ -> dump/raw
    ./asr.sh --stage 3 --stop_stage 3 --train_set train --valid_set dev --test_sets "dev test"
    
    2024-08-09T18:40:02 (asr.sh:283:main) ./asr.sh --stage 3 --stop_stage 3 --train_set train --valid_set dev --test_sets dev test
    2024-08-09T18:40:02 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
    2024-08-09T18:40:02 (asr.sh:564:main) Skipped stages:  9 14 15 
    2024-08-09T18:40:02 (asr.sh:614:main) Stage 3: Format wav.scp: data/ -> dump/raw
    utils/copy_data_dir.sh: copied data from data/train to dump/raw/org/train
    utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/org/train
    2024-08-09T18:40:04 (format_wav_scp.sh:46:main) scripts/audio/format_wav_scp.sh --nj 32 --cmd run.pl --audio-format flac --fs 16k --multi-columns-input false --multi-columns-output false data/train/wav.scp dump/raw/org/train
    2024-08-09T18:40:05 (format_wav_scp.sh:118:main) [info]: without segments
    2024-08-09T18:47:10 (format_wav_scp.sh:153:main) Successfully finished. [elapsed=426s]
    utils/copy_data_dir.sh: copied data from data/dev to dump/raw/org/dev
    utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/org/dev
    2024-08-09T18:47:10 (format_wav_scp.sh:46:main) scripts/audio/format_wav_scp.sh --nj 32 --cmd run.pl --audio-format flac --fs 16k --multi-columns-input false --multi-columns-output false data/dev/wav.scp dump/raw/org/dev
    2024-08-09T18:47:10 (format_wav_scp.sh:118:main) [info]: without segments
    2024-08-09T18:47:56 (format_wav_scp.sh:153:main) Successfully finished. [elapsed=46s]
    utils/copy_data_dir.sh: copied data from data/test to dump/raw/test
    utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/test
    2024-08-09T18:47:56 (format_wav_scp.sh:46:main) scripts/audio/format_wav_scp.sh --nj 32 --cmd run.pl --audio-format flac --fs 16k --multi-columns-input false --multi-columns-output false data/test/wav.scp dump/raw/test
    2024-08-09T18:47:57 (format_wav_scp.sh:118:main) [info]: without segments
    2024-08-09T18:48:10 (format_wav_scp.sh:153:main) Successfully finished. [elapsed=14s]
    2024-08-09T18:48:10 (asr.sh:1809:main) Successfully finished. [elapsed=488s]
    
    第 4 阶段:删除长/短数据
    # 第 4 阶段:删除长/短数据:dump/raw/org -> dump/raw
    ./asr.sh --stage 4 --stop_stage 4 --train_set train --valid_set dev --test_sets "dev test"
    
    2024-08-09T18:50:44 (asr.sh:283:main) ./asr.sh --stage 4 --stop_stage 4 --train_set train --valid_set dev --test_sets dev test
    2024-08-09T18:50:44 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
    2024-08-09T18:50:44 (asr.sh:564:main) Skipped stages:  9 14 15 
    2024-08-09T18:50:44 (asr.sh:799:main) Stage 4: Remove long/short data: dump/raw/org -> dump/raw
    utils/copy_data_dir.sh: copied data from dump/raw/org/train to dump/raw/train
    utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/train
    fix_data_dir.sh: kept all 120098 utterances.
    fix_data_dir.sh: old files are kept in dump/raw/train/.backup
    utils/copy_data_dir.sh: copied data from dump/raw/org/dev to dump/raw/dev
    utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/dev
    fix_data_dir.sh: kept all 14326 utterances.
    fix_data_dir.sh: old files are kept in dump/raw/dev/.backup
    2024-08-09T18:50:51 (asr.sh:1809:main) Successfully finished. [elapsed=7s]
    
    第 5 阶段:生成token_list
    # 第 5 阶段:使用 BPE 从 dump/raw/train/text 生成token_list。
    # 此处需要在asr.sh设置nbpe=4234  
    ./asr.sh --stage 5 --stop_stage 5 --train_set train --valid_set dev --test_sets "dev test"
    
    2024-08-09T18:52:48 (asr.sh:283:main) ./asr.sh --stage 5 --stop_stage 5 --train_set train --valid_set dev --test_sets dev test
    2024-08-09T18:52:48 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
    2024-08-09T18:52:48 (asr.sh:564:main) Skipped stages:  9 14 15 
    2024-08-09T18:52:48 (asr.sh:877:main) Stage 5: Generate token_list from dump/raw/org/train/text using BPE
    sentencepiece_trainer.cc(177) LOG(INFO) Running command: --input=data/token_list/bpe_unigram4234/train.txt --vocab_size=4234 --model_type=unigram --model_prefix=data/token_list/bpe_unigram4234/bpe --character_coverage=1.0 --input_sentence_size=100000000
    sentencepiece_trainer.cc(77) LOG(INFO) Starts training with : 
    trainer_spec {
      input: data/token_list/bpe_unigram4234/train.txt
      input_format: 
      model_prefix: data/token_list/bpe_unigram4234/bpe
      model_type: UNIGRAM
      vocab_size: 4234
      self_test_sample_size: 0
      character_coverage: 1
      input_sentence_size: 100000000
      shuffle_input_sentence: 1
      seed_sentencepiece_size: 1000000
      shrinking_factor: 0.75
      max_sentence_length: 4192
      num_threads: 16
      num_sub_iterations: 2
      max_sentencepiece_length: 16
      split_by_unicode_script: 1
      split_by_number: 1
      split_by_whitespace: 1
      split_digits: 0
      treat_whitespace_as_suffix: 0
      allow_whitespace_only_pieces: 0
      required_chars: 
      byte_fallback: 0
      vocabulary_output_piece_score: 1
      train_extremely_large_corpus: 0
      hard_vocab_limit: 1
      use_all_vocab: 0
      unk_id: 0
      bos_id: 1
      eos_id: 2
      pad_id: -1
      unk_piece: <unk>
      bos_piece: <s>
      eos_piece: </s>
      pad_piece: <pad>
      unk_surface:  ⁇ 
      enable_differential_privacy: 0
      differential_privacy_noise_level: 0
      differential_privacy_clipping_threshold: 0
    }
    normalizer_spec {
      name: nmt_nfkc
      add_dummy_prefix: 1
      remove_extra_whitespaces: 1
      escape_whitespaces: 1
      normalization_rule_tsv: 
    }
    denormalizer_spec {}
    trainer_interface.cc(350) LOG(INFO) SentenceIterator is not specified. Using MultiFileSentenceIterator.
    trainer_interface.cc(181) LOG(INFO) Loading corpus: data/token_list/bpe_unigram4234/train.txt
    trainer_interface.cc(406) LOG(INFO) Loaded all 120098 sentences
    trainer_interface.cc(422) LOG(INFO) Adding meta_piece: <unk>
    trainer_interface.cc(422) LOG(INFO) Adding meta_piece: <s>
    trainer_interface.cc(422) LOG(INFO) Adding meta_piece: </s>
    trainer_interface.cc(427) LOG(INFO) Normalizing sentences...
    trainer_interface.cc(536) LOG(INFO) all chars count=1850211
    trainer_interface.cc(557) LOG(INFO) Alphabet size=4231
    trainer_interface.cc(558) LOG(INFO) Final character coverage=1
    trainer_interface.cc(590) LOG(INFO) Done! preprocessed 120098 sentences.
    unigram_model_trainer.cc(146) LOG(INFO) Making suffix array...
    unigram_model_trainer.cc(150) LOG(INFO) Extracting frequent sub strings...
    unigram_model_trainer.cc(201) LOG(INFO) Initialized 384921 seed sentencepieces
    trainer_interface.cc(596) LOG(INFO) Tokenizing input sentences with whitespace: 120098
    trainer_interface.cc(607) LOG(INFO) Done! 113737
    unigram_model_trainer.cc(491) LOG(INFO) Using 113737 sentences for EM training
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=169508 obj=60.2932 num_tokens=605625 num_tokens/piece=3.57284
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=154327 obj=57.151 num_tokens=608759 num_tokens/piece=3.9446
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=115321 obj=58.0022 num_tokens=643243 num_tokens/piece=5.57785
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=114890 obj=57.5467 num_tokens=644182 num_tokens/piece=5.60695
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=86118 obj=59.1864 num_tokens=685973 num_tokens/piece=7.9655
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=86071 obj=58.6914 num_tokens=686845 num_tokens/piece=7.97998
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=64542 obj=60.6321 num_tokens=730928 num_tokens/piece=11.3248
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=64533 obj=60.1345 num_tokens=731803 num_tokens/piece=11.34
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=48395 obj=62.3089 num_tokens=775418 num_tokens/piece=16.0227
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=48393 obj=61.8552 num_tokens=775965 num_tokens/piece=16.0347
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=36293 obj=64.1495 num_tokens=822372 num_tokens/piece=22.6592
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=36293 obj=63.7144 num_tokens=822793 num_tokens/piece=22.6708
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=27218 obj=66.1917 num_tokens=870513 num_tokens/piece=31.983
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=27218 obj=65.7753 num_tokens=871226 num_tokens/piece=32.0092
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=20412 obj=68.3477 num_tokens=920879 num_tokens/piece=45.1146
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=20412 obj=67.9286 num_tokens=921147 num_tokens/piece=45.1277
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=15309 obj=70.7446 num_tokens=974604 num_tokens/piece=63.6622
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=15309 obj=70.2996 num_tokens=975000 num_tokens/piece=63.688
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=11480 obj=73.3361 num_tokens=1034514 num_tokens/piece=90.1145
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=11479 obj=72.8184 num_tokens=1034923 num_tokens/piece=90.1579
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=8609 obj=76.2591 num_tokens=1105069 num_tokens/piece=128.362
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=8609 obj=75.6089 num_tokens=1105469 num_tokens/piece=128.409
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=6456 obj=80.114 num_tokens=1196553 num_tokens/piece=185.34
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=6456 obj=79.1711 num_tokens=1196816 num_tokens/piece=185.38
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=4842 obj=86.2754 num_tokens=1339006 num_tokens/piece=276.54
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=4842 obj=84.6424 num_tokens=1339362 num_tokens/piece=276.613
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=4657 obj=85.9046 num_tokens=1365198 num_tokens/piece=293.15
    unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=4657 obj=85.6287 num_tokens=1365198 num_tokens/piece=293.15
    trainer_interface.cc(685) LOG(INFO) Saving model: data/token_list/bpe_unigram4234/bpe.model
    trainer_interface.cc(697) LOG(INFO) Saving vocabs: data/token_list/bpe_unigram4234/bpe.vocab
    2024-08-09T18:52:58 (asr.sh:1809:main) Successfully finished. [elapsed=10s]
    
    第 6-9 阶段:与语言建模相关的阶段
    第 10 阶段:ASR 收集统计信息
    # 第 10 阶段:ASR 收集统计信息:train_set=dump/raw/train,valid_set=dump/raw/dev
    # 此阶段需要设置asr.sh中asr_config=asr_config=conf/train_asr_conformer.yaml
    ./asr.sh --stage 10 --stop_stage 10 --train_set train --valid_set dev --test_sets "dev test" --asr_config conf/train_asr_conformer.yaml
    
    2024-08-09T18:56:13 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
    2024-08-09T18:56:13 (asr.sh:564:main) Skipped stages:  9 14 15 
    2024-08-09T18:56:13 (asr.sh:1189:main) Stage 10: ASR collect stats: train_set=dump/raw/train, valid_set=dump/raw/dev
    2024-08-09T18:56:13 (asr.sh:1240:main) Generate 'exp/asr_stats_raw_bpe4234/run.sh'. You can resume the process from stage 10 using this script
    2024-08-09T18:56:13 (asr.sh:1244:main) ASR collect-stats started... log: 'exp/asr_stats_raw_bpe4234/logdir/stats.*.log'
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/aggregate_stats_dirs.py --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.1 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.2 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.3 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.4 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.5 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.6 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.7 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.8 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.9 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.10 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.11 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.12 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.13 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.14 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.15 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.16 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.17 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.18 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.19 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.20 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.21 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.22 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.23 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.24 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.25 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.26 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.27 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.28 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.29 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.30 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.31 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.32 --output_dir exp/asr_stats_raw_bpe4234
    2024-08-09T19:01:59 (asr.sh:1809:main) Successfully finished. [elapsed=346s]
    
    第 11 阶段:ASR 训练
    # 第 11 阶段:ASR 训练:train_set=dump/raw/train,valid_set=dump/raw/dev
    # 更改train_asr_conformer.yaml中max_epoch: 50,其他默认设置
    export CUDA_VISIBLE_DEVICES=1,3,4,5
    ./asr.sh --stage 11 --stop_stage 11 --train_set train --valid_set dev --test_sets "dev test" --asr_config conf/train_asr_conformer.yaml --ngpu 4
    
    2024-08-10T09:27:53 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
    2024-08-10T09:27:53 (asr.sh:564:main) Skipped stages:  9 14 15 
    2024-08-10T09:27:53 (asr.sh:1308:main) Stage 11: ASR Training: train_set=dump/raw/train, valid_set=dump/raw/dev
    2024-08-10T09:27:53 (asr.sh:1407:main) Generate 'exp/asr_train_asr_conformer_raw_bpe4234/run.sh'. You can resume the process from stage 11 using this script
    2024-08-10T09:27:53 (asr.sh:1411:main) ASR training started... log: 'exp/asr_train_asr_conformer_raw_bpe4234/train.log'
    2024-08-10 09:27:53,791 (launch:94) INFO: /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/launch.py --cmd 'run.pl --name exp/asr_train_asr_conformer_raw_bpe4234/train.log' --log exp/asr_train_asr_conformer_raw_bpe4234/train.log --ngpu 4 --num_nodes 1 --init_file_prefix exp/asr_train_asr_conformer_raw_bpe4234/.dist_init_ --multiprocessing_distributed true -- python3 -m espnet2.bin.asr_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram4234/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram4234/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,sound --valid_shape_file exp/asr_stats_raw_bpe4234/valid/speech_shape --resume true --ignore_init_mismatch false --fold_length 80000 --output_dir exp/asr_train_asr_conformer_raw_bpe4234 --config conf/train_asr_conformer.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_bpe4234/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_shape_file exp/asr_stats_raw_bpe4234/train/speech_shape --fold_length 150 --train_data_path_and_name_and_type dump/raw/train/text,text,text --train_shape_file exp/asr_stats_raw_bpe4234/train/text_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/asr_stats_raw_bpe4234/valid/text_shape.bpe
    2024-08-10 09:27:53,806 (launch:237) INFO: single-node with 4gpu on distributed mode
    2024-08-10 09:27:53,809 (launch:348) INFO: log file: exp/asr_train_asr_conformer_raw_bpe4234/train.log
    2024-08-10T15:46:41 (asr.sh:1809:main) Successfully finished. [elapsed=22728s]
    
    

    tensorboard可视化

    由于我使用的是服务器,无法生成链接,同wenet一样,将训练生成的tensorboard文件,拿到本机上运行查看效果。

    tensorboard --logdir tensorboard/train/ --port 12598 --bind_all
    

    在这里插入图片描述

    tensorboard --logdir tensorboard/valid/ --port 12598 --bind_all
    

    在这里插入图片描述

    第 12 阶段:解码
    export CUDA_VISIBLE_DEVICES=1,3,4,5
    ./asr.sh --use_lm false --gpu_inference true --inference_nj 4 --stage 12 --stop_stage 12 --train_set train --valid_set dev --test_sets "dev test" --asr_exp exp/asr_train_asr_conformer_raw_bpe4234 --inference_config conf/decode_asr_transformer.yaml --ngpu 4
    
    2024-08-11T09:02:43 (asr.sh:283:main) ./asr.sh --use_lm false --gpu_inference true --inference_nj 4 --stage 12 --stop_stage 12 --train_set train --valid_set dev --test_sets dev test --asr_exp exp/asr_train_asr_conformer_raw_bpe4234 --inference_config conf/decode_asr_transformer.yaml --ngpu 42024-08-11T09:02:43 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
    2024-08-11T09:02:43 (asr.sh:564:main) Skipped stages:  6 7 8 9 14 15 
    2024-08-11T09:02:43 (asr.sh:1480:main) Stage 12: Decoding: training_dir=exp/asr_train_asr_conformer_raw_bpe42342024-08-11T09:02:43 (asr.sh:1508:main) Generate 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/run.sh'. You can resume the process from stage 12 using this script
    2024-08-11T09:02:43 (asr.sh:1573:main) Decoding started... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/logdir/asr_inference.*.log'
    2024-08-11T11:58:37 (asr.sh:1589:main) Calculating RTF & latency... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/logdir/calculate_rtf.log'
    2024-08-11T11:58:40 (asr.sh:1573:main) Decoding started... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/logdir/asr_inference.*.log'
    2024-08-11T13:35:20 (asr.sh:1589:main) Calculating RTF & latency... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/logdir/calculate_rtf.log'
    2024-08-11T13:35:21 (asr.sh:1809:main) Successfully finished. [elapsed=16358s]
    
    第 13 阶段:得分
    ./asr.sh --stage 13 --stop_stage 13 --train_set train --valid_set dev --test_sets "dev test" --use_lm false --asr_exp exp/asr_train_asr_conformer_raw_bpe4234 --inference_config decode_asr_transformer.yaml
    
    2024-08-11T14:41:53 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
    2024-08-11T14:41:54 (asr.sh:564:main) Skipped stages:  6 7 8 9 14 15 
    2024-08-11T14:41:54 (asr.sh:1621:main) Stage 13: Scoring
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
    2024-08-11T14:42:36 (asr.sh:1711:main) Write cer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/score_cer/result.txt
    |   SPKR      |   # Snt        # Wrd    |   Corr         Sub         Del          Ins         Err       S.Err    |
    |   Sum/Avg   |  14326        205341    |   94.8         5.1         0.1          0.1         5.3        38.9    |
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
    2024-08-11T14:43:04 (asr.sh:1711:main) Write wer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/score_wer/result.txt
    |   SPKR      |    # Snt      # Wrd    |   Corr          Sub         Del         Ins          Err       S.Err    |
    |   Sum/Avg   |   14326       14326    |   61.1         38.9         0.0         0.0         38.9        38.9    |
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model --cleaner none
    2024-08-11T14:43:48 (asr.sh:1711:main) Write ter result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/score_ter/result.txt
    |   SPKR      |   # Snt        # Wrd    |   Corr         Sub         Del          Ins         Err       S.Err    |
    |   Sum/Avg   |  14326        219662    |   95.2         4.7         0.1          0.1         4.9        38.9    |
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
    2024-08-11T14:44:09 (asr.sh:1711:main) Write cer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/score_cer/result.txt
    |   SPKR      |   # Snt       # Wrd    |   Corr         Sub        Del         Ins         Err       S.Err    |
    |   Sum/Avg   |   7176       104765    |   94.5         5.4        0.2         0.1         5.7        40.2    |
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
    2024-08-11T14:44:29 (asr.sh:1711:main) Write wer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/score_wer/result.txt
    |   SPKR      |   # Snt      # Wrd    |   Corr         Sub         Del         Ins         Err       S.Err    |
    |   Sum/Avg   |   7176        7176    |   59.8        40.2         0.0         0.0        40.2        40.2    |
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model
    /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model --cleaner none
    2024-08-11T14:44:46 (asr.sh:1711:main) Write ter result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/score_ter/result.txt
    |   SPKR      |   # Snt       # Wrd    |   Corr         Sub        Del         Ins         Err       S.Err    |
    |   Sum/Avg   |   7176       111941    |   94.8         5.0        0.1         0.1         5.3        40.2    |
    
    ## exp/asr_train_asr_conformer_raw_bpe4234
    ### WER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |decode_asr_transformer_asr_model_valid.acc.ave/test|7176|7176|59.8|40.2|0.0|0.0|40.2|40.2|
    
    ### CER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |decode_asr_transformer_asr_model_valid.acc.ave/test|7176|104765|94.5|5.4|0.2|0.1|5.7|40.2|
    
    ### TER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |decode_asr_transformer_asr_model_valid.acc.ave/test|7176|111941|94.8|5.0|0.1|0.1|5.3|40.2|
    
    ## exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave
    ### WER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |org/dev|14326|14326|61.1|38.9|0.0|0.0|38.9|38.9|
    
    ### CER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |org/dev|14326|205341|94.8|5.1|0.1|0.1|5.3|38.9|
    
    ### TER
    
    |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
    |---|---|---|---|---|---|---|---|---|
    |org/dev|14326|219662|95.2|4.7|0.1|0.1|4.9|38.9|
    
    2024-08-11T14:44:51 (asr.sh:1809:main) Successfully finished. [elapsed=178s]
    
    

    exp/asr_train_asr_conformer_raw_bpe4234

    WER

    datasetSntWrdCorrSubDelInsErrS.Err
    decode_asr_transformer_asr_model_valid.acc.ave/test7176717659.840.20.00.040.240.2

    CER

    datasetSntWrdCorrSubDelInsErrS.Err
    decode_asr_transformer_asr_model_valid.acc.ave/test717610476594.55.40.20.15.740.2

    TER

    datasetSntWrdCorrSubDelInsErrS.Err
    decode_asr_transformer_asr_model_valid.acc.ave/test717611194194.85.00.10.15.340.2

    exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave

    WER

    datasetSntWrdCorrSubDelInsErrS.Err
    org/dev143261432661.138.90.00.038.938.9

    CER

    datasetSntWrdCorrSubDelInsErrS.Err
    org/dev1432620534194.85.10.10.15.338.9

    TER

    datasetSntWrdCorrSubDelInsErrS.Err
    org/dev1432621966295.24.70.10.14.938.9
  • 相关阅读:
    shiro会话管理
    文件用手机拍照片打印时,打印出来总是有黑阴影,如何去掉黑色阴影打印清晰的图片
    conda环境下XZ_5.1.2alpha not found解决方案
    计算机网络入门基础篇——应用层
    生产者消费者模型设计
    Thingworx 8.*启动失败
    java毕业设计人人小说系统(附源码、数据库)
    快速集成 HelpLook ChatBot,无人值守AI机器人提升网站留资转化!
    【秋招基础知识】【3】机器学习常见判别模型和生成模型
    【机器学习合集】标准化与池化合集 ->(个人学习记录笔记)
  • 原文地址:https://blog.csdn.net/weixin_46560570/article/details/141107205