Spaces:
Running
on
Zero
Running
on
Zero
update readme
Browse files
README.md
CHANGED
@@ -33,7 +33,7 @@ sudo yum install sox sox-devel
|
|
33 |
|
34 |
**Model download**
|
35 |
|
36 |
-
We strongly
|
37 |
|
38 |
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
39 |
|
@@ -81,13 +81,13 @@ from cosyvoice.cli.cosyvoice import CosyVoice
|
|
81 |
from cosyvoice.utils.file_utils import load_wav
|
82 |
import torchaudio
|
83 |
|
84 |
-
cosyvoice = CosyVoice('
|
85 |
# sft usage
|
86 |
print(cosyvoice.list_avaliable_spks())
|
87 |
output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
|
88 |
torchaudio.save('sft.wav', output['tts_speech'], 22050)
|
89 |
|
90 |
-
cosyvoice = CosyVoice('
|
91 |
# zero_shot usage
|
92 |
prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
|
93 |
output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
|
@@ -97,7 +97,7 @@ prompt_speech_16k = load_wav('cross_lingual_prompt.wav', 16000)
|
|
97 |
output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
|
98 |
torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
|
99 |
|
100 |
-
cosyvoice = CosyVoice('
|
101 |
# instruct usage
|
102 |
output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
|
103 |
torchaudio.save('instruct.wav', output['tts_speech'], 22050)
|
@@ -112,7 +112,7 @@ Please see the demo website for details.
|
|
112 |
|
113 |
``` python
|
114 |
# change iic/CosyVoice-300M-SFT for sft inference, or iic/CosyVoice-300M-Instruct for instruct inference
|
115 |
-
python3 webui.py --port 50000 --model_dir
|
116 |
```
|
117 |
|
118 |
**Advanced Usage**
|
|
|
33 |
|
34 |
**Model download**
|
35 |
|
36 |
+
We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource.
|
37 |
|
38 |
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
39 |
|
|
|
81 |
from cosyvoice.utils.file_utils import load_wav
|
82 |
import torchaudio
|
83 |
|
84 |
+
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
|
85 |
# sft usage
|
86 |
print(cosyvoice.list_avaliable_spks())
|
87 |
output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
|
88 |
torchaudio.save('sft.wav', output['tts_speech'], 22050)
|
89 |
|
90 |
+
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M')
|
91 |
# zero_shot usage
|
92 |
prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
|
93 |
output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
|
|
|
97 |
output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
|
98 |
torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
|
99 |
|
100 |
+
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-Instruct')
|
101 |
# instruct usage
|
102 |
output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
|
103 |
torchaudio.save('instruct.wav', output['tts_speech'], 22050)
|
|
|
112 |
|
113 |
``` python
|
114 |
# change iic/CosyVoice-300M-SFT for sft inference, or iic/CosyVoice-300M-Instruct for instruct inference
|
115 |
+
python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
|
116 |
```
|
117 |
|
118 |
**Advanced Usage**
|