CosyVoice commited on
Commit
62c7107
1 Parent(s): 89fc722

update dockerfile

Browse files
cosyvoice/dataset/processor.py CHANGED
@@ -106,7 +106,7 @@ def filter(data,
106
  yield sample
107
 
108
 
109
- def resample(data, resample_rate=22050, mode='train'):
110
  """ Resample data.
111
  Inplace operation.
112
 
@@ -123,7 +123,7 @@ def resample(data, resample_rate=22050, mode='train'):
123
  sample_rate = sample['sample_rate']
124
  waveform = sample['speech']
125
  if sample_rate != resample_rate:
126
- if sample_rate < resample_rate:
127
  continue
128
  sample['sample_rate'] = resample_rate
129
  sample['speech'] = torchaudio.transforms.Resample(
 
106
  yield sample
107
 
108
 
109
+ def resample(data, resample_rate=22050, min_sample_rate=16000, mode='train'):
110
  """ Resample data.
111
  Inplace operation.
112
 
 
123
  sample_rate = sample['sample_rate']
124
  waveform = sample['speech']
125
  if sample_rate != resample_rate:
126
+ if sample_rate < min_sample_rate:
127
  continue
128
  sample['sample_rate'] = resample_rate
129
  sample['speech'] = torchaudio.transforms.Resample(
runtime/python/Dockerfile CHANGED
@@ -1,15 +1,14 @@
1
- FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
2
  ENV DEBIAN_FRONTEND=noninteractive
3
 
4
  WORKDIR /opt/CosyVoice
5
 
6
  RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
7
  RUN apt-get update -y
8
- RUN apt-get -y install python3-dev cmake python3-pip git unzip
 
9
  RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
10
- RUN cd CosyVoice && pip3 install --default-timeout=3600 -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
11
- RUN apt install git-lfs && git lfs install
12
- RUN cd CosyVoice && git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd
13
- RUN cd CosyVoice/pretrained_models/CosyVoice-ttsfrd && unzip resource.zip -d . && pip3 install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
14
  RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto
15
- CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"]
 
1
+ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
2
  ENV DEBIAN_FRONTEND=noninteractive
3
 
4
  WORKDIR /opt/CosyVoice
5
 
6
  RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
7
  RUN apt-get update -y
8
+ RUN apt-get -y install git unzip git-lfs
9
+ RUN git lfs install
10
  RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
11
+ # here we use python==3.10 because we cannot find an image which have both python3.8 and torch2.0.1-cu118 installed
12
+ RUN cd CosyVoice && pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
 
 
13
  RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto
14
+ CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"]
webui.py CHANGED
@@ -59,10 +59,10 @@ def postprocess(speech, top_db=60, hop_length=220, win_length=440):
59
  return speech
60
 
61
  inference_mode_list = ['预训练音色', '3s极速复刻', '跨语种复刻', '自然语言控制']
62
- instruct_dict = {'预训练音色': '1. 选择预训练音色\n2.点击生成音频按钮',
63
- '3s极速复刻': '1. 选择prompt音频文件,或录入prompt音频,若同时提供,优先选择prompt音频文件\n2. 输入prompt文本\n3.点击生成音频按钮',
64
- '跨语种复刻': '1. 选择prompt音频文件,或录入prompt音频,若同时提供,优先选择prompt音频文件\n2.点击生成音频按钮',
65
- '自然语言控制': '1. 输入instruct文本\n2.点击生成音频按钮'}
66
  def change_instruction(mode_checkbox_group):
67
  return instruct_dict[mode_checkbox_group]
68
 
 
59
  return speech
60
 
61
  inference_mode_list = ['预训练音色', '3s极速复刻', '跨语种复刻', '自然语言控制']
62
+ instruct_dict = {'预训练音色': '1. 选择预训练音色\n2. 点击生成音频按钮',
63
+ '3s极速复刻': '1. 选择prompt音频文件,或录入prompt音频,注意不超过30s,若同时提供,优先选择prompt音频文件\n2. 输入prompt文本\n3. 点击生成音频按钮',
64
+ '跨语种复刻': '1. 选择prompt音频文件,或录入prompt音频,注意不超过30s,若同时提供,优先选择prompt音频文件\n2. 点击生成音频按钮',
65
+ '自然语言控制': '1. 选择预训练音色\n2. 输入instruct文本\n3. 点击生成音频按钮'}
66
  def change_instruction(mode_checkbox_group):
67
  return instruct_dict[mode_checkbox_group]
68