kan-tts docker本地化
下载docker镜像(python3.8的)
registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.8.0-py38-torch2.0.1-tf2.13.0-1.9.2
安装基础模型
pip install modelscope
安装语音模型
pip install "modelscope[audio]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
安装最新版tts-autolabel
# 运行此代码块安装
tts-autolabel import sys !{sys.executable} -m pip install -U tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
如果网不行,指定阿里镜像源
!{sys.executable} -m pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
- from modelscope.tools import run_auto_label
-
- input_wav = "./test_wavs/"
- output_data = "./output_training_data/"
-
- ret, report = run_auto_label(input_wav=input_wav, work_dir=output_data, resource_revision="v1.0.7")
- from modelscope.metainfo import Trainers
- from modelscope.trainers import build_trainer
- from modelscope.utils.audio.audio_utils import TtsTrainType
-
- pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'
-
- dataset_id = "./output_training_data/"
- pretrain_work_dir = "./pretrain_work_dir/"
-
- # 训练信息,用于指定需要训练哪个或哪些模型,这里展示AM和Vocoder模型皆进行训练
- # 目前支持训练:TtsTrainType.TRAIN_TYPE_SAMBERT, TtsTrainType.TRAIN_TYPE_VOC
- # 训练SAMBERT会以模型最新step作为基础进行finetune
- train_info = {
- TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型
- 'train_steps': 202, # 训练多少个step
- 'save_interval_steps': 200, # 每训练多少个step保存一次checkpoint
- 'log_interval': 10 # 每训练多少个step打印一次训练日志
- }
- }
-
- # 配置训练参数,指定数据集,临时工作目录和train_info
- kwargs = dict(
- model=pretrained_model_id, # 指定要finetune的模型
- model_revision = "v1.0.6",
- work_dir=pretrain_work_dir, # 指定临时工作目录
- train_dataset=dataset_id, # 指定数据集id
- train_type=train_info # 指定要训练类型及参数
- )
-
- trainer = build_trainer(Trainers.speech_kantts_trainer,
- default_args=kwargs)
-
- trainer.train()
其中
pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'
要下载下来
最好提取下载,然后pretrained_model_id后面就等于下面下载的地址
# 克隆预训练模型
git clone https://www.modelscope.cn/damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k.git
拉取下来,然后合成
- import os
- from modelscope.models.audio.tts import SambertHifigan
- from modelscope.pipelines import pipeline
- from modelscope.utils.constant import Tasks
-
- model_dir = os.path.abspath("./pretrain_work_dir")
-
- custom_infer_abs = {
- 'voice_name':
- 'F7',
- 'am_ckpt':
- os.path.join(model_dir, 'tmp_am', 'ckpt'),
- 'am_config':
- os.path.join(model_dir, 'tmp_am', 'config.yaml'),
- 'voc_ckpt':
- os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
- 'voc_config':
- os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
- 'config.yaml'),
- 'audio_config':
- os.path.join(model_dir, 'data', 'audio_config.yaml'),
- 'se_file':
- os.path.join(model_dir, 'data', 'se', 'se.npy')
- }
- kwargs = {'custom_ckpt': custom_infer_abs}
-
- model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs)
-
- inference = pipeline(task=Tasks.text_to_speech, model=model_id)
- output = inference(input="今天的天气真不错")
-
- import IPython.display as ipd
- ipd.Audio(output["output_wav"], rate=16000)
参考地址: