禾息 commited on
Commit
fadb220
1 Parent(s): 18599be

export onnx

Browse files
cosyvoice/bin/export_onnx.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Antgroup Inc (authors: Zhoubofan, [email protected])
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import argparse
16
+ import logging
17
+ import os
18
+ import sys
19
+
20
+ logging.getLogger('matplotlib').setLevel(logging.WARNING)
21
+ import onnxruntime as ort
22
+ import numpy as np
23
+
24
+ # try:
25
+ # import tensorrt
26
+ # import tensorrt as trt
27
+ # except ImportError:
28
+ # error_msg_zh = [
29
+ # "step.1 下载 tensorrt .tar.gz 压缩包并解压,下载地址: https://developer.nvidia.com/tensorrt/download/10x",
30
+ # "step.2 使用 tensorrt whl 包进行安装根据 python 版本对应进行安装,如 pip install ${TensorRT-Path}/python/tensorrt-10.2.0-cp38-none-linux_x86_64.whl",
31
+ # "step.3 将 tensorrt 的 lib 路径添加进环境变量中,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${TensorRT-Path}/lib/"
32
+ # ]
33
+ # print("\n".join(error_msg_zh))
34
+ # sys.exit(1)
35
+
36
+ import torch
37
+ from cosyvoice.cli.cosyvoice import CosyVoice
38
+
39
+
40
+ def calculate_onnx(onnx_file, x, mask, mu, t, spks, cond):
41
+ providers = ['CUDAExecutionProvider']
42
+ sess_options = ort.SessionOptions()
43
+
44
+ providers = [
45
+ 'CUDAExecutionProvider'
46
+ ]
47
+
48
+ # Load the ONNX model
49
+ session = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
50
+
51
+ x_np = x.cpu().numpy()
52
+ mask_np = mask.cpu().numpy()
53
+ mu_np = mu.cpu().numpy()
54
+ t_np = np.array(t.cpu())
55
+ spks_np = spks.cpu().numpy()
56
+ cond_np = cond.cpu().numpy()
57
+
58
+ ort_inputs = {
59
+ 'x': x_np,
60
+ 'mask': mask_np,
61
+ 'mu': mu_np,
62
+ 't': t_np,
63
+ 'spks': spks_np,
64
+ 'cond': cond_np
65
+ }
66
+
67
+ output = session.run(None, ort_inputs)
68
+
69
+ return output[0]
70
+
71
+ # def calculate_tensorrt(trt_file, x, mask, mu, t, spks, cond):
72
+ # trt.init_libnvinfer_plugins(None, "")
73
+ # logger = trt.Logger(trt.Logger.WARNING)
74
+ # runtime = trt.Runtime(logger)
75
+ # with open(trt_file, 'rb') as f:
76
+ # serialized_engine = f.read()
77
+ # engine = runtime.deserialize_cuda_engine(serialized_engine)
78
+ # context = engine.create_execution_context()
79
+
80
+ # bs = x.shape[0]
81
+ # hs = x.shape[1]
82
+ # seq_len = x.shape[2]
83
+
84
+ # ret = torch.zeros_like(x)
85
+
86
+ # # Set input shapes for dynamic dimensions
87
+ # context.set_input_shape("x", x.shape)
88
+ # context.set_input_shape("mask", mask.shape)
89
+ # context.set_input_shape("mu", mu.shape)
90
+ # context.set_input_shape("t", t.shape)
91
+ # context.set_input_shape("spks", spks.shape)
92
+ # context.set_input_shape("cond", cond.shape)
93
+
94
+ # # bindings = [x.data_ptr(), mask.data_ptr(), mu.data_ptr(), t.data_ptr(), spks.data_ptr(), cond.data_ptr(), ret.data_ptr()]
95
+ # # names = ['x', 'mask', 'mu', 't', 'spks', 'cond', 'estimator_out']
96
+ # #
97
+ # # for i in range(len(bindings)):
98
+ # # context.set_tensor_address(names[i], bindings[i])
99
+ # #
100
+ # # handle = torch.cuda.current_stream().cuda_stream
101
+ # # context.execute_async_v3(stream_handle=handle)
102
+
103
+ # # Create a list of bindings
104
+ # bindings = [int(x.data_ptr()), int(mask.data_ptr()), int(mu.data_ptr()), int(t.data_ptr()), int(spks.data_ptr()), int(cond.data_ptr()), int(ret.data_ptr())]
105
+
106
+ # # Execute the inference
107
+ # context.execute_v2(bindings=bindings)
108
+
109
+ # torch.cuda.synchronize()
110
+
111
+ # return ret
112
+
113
+
114
+ # def test_calculate_value(estimator, onnx_file, trt_file, dummy_input, args):
115
+ # torch_output = estimator.forward(**dummy_input).cpu().detach().numpy()
116
+ # onnx_output = calculate_onnx(onnx_file, **dummy_input)
117
+ # tensorrt_output = calculate_tensorrt(trt_file, **dummy_input).cpu().detach().numpy()
118
+ # atol = 2e-3 # Absolute tolerance
119
+ # rtol = 1e-4 # Relative tolerance
120
+
121
+ # print(f"args.export_half: {args.export_half}, args.model_dir: {args.model_dir}")
122
+ # print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
123
+
124
+ # print("torch_output diff with onnx_output: ", )
125
+ # print(f"compare with atol: {atol}, rtol: {rtol} ", np.allclose(torch_output, onnx_output, atol, rtol))
126
+ # print(f"max diff value: ", np.max(np.fabs(torch_output - onnx_output)))
127
+ # print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
128
+
129
+ # print("torch_output diff with tensorrt_output: ")
130
+ # print(f"compare with atol: {atol}, rtol: {rtol} ", np.allclose(torch_output, tensorrt_output, atol, rtol))
131
+ # print(f"max diff value: ", np.max(np.fabs(torch_output - tensorrt_output)))
132
+ # print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
133
+
134
+ # print("onnx_output diff with tensorrt_output: ")
135
+ # print(f"compare with atol: {atol}, rtol: {rtol} ", np.allclose(onnx_output, tensorrt_output, atol, rtol))
136
+ # print(f"max diff value: ", np.max(np.fabs(onnx_output - tensorrt_output)))
137
+ # print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
138
+
139
+
140
+ def get_args():
141
+ parser = argparse.ArgumentParser(description='Export your model for deployment')
142
+ parser.add_argument('--model_dir', type=str, default='pretrained_models/CosyVoice-300M', help='Local path to the model directory')
143
+ parser.add_argument('--export_half', type=str, choices=['True', 'False'], default='False', help='Export with half precision (FP16)')
144
+ # parser.add_argument('--trt_max_len', type=int, default=8192, help='Export max len')
145
+ parser.add_argument('--exec_export', type=str, choices=['True', 'False'], default='True', help='Exec export')
146
+
147
+ args = parser.parse_args()
148
+ args.export_half = args.export_half == 'True'
149
+ args.exec_export = args.exec_export == 'True'
150
+ print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
151
+ print(args)
152
+ return args
153
+
154
+ def main():
155
+ args = get_args()
156
+
157
+ cosyvoice = CosyVoice(args.model_dir, load_jit=False, load_trt=False)
158
+ estimator = cosyvoice.model.flow.decoder.estimator
159
+
160
+ dtype = torch.float32 if not args.export_half else torch.float16
161
+ device = torch.device("cuda")
162
+ batch_size = 1
163
+ seq_len = 256
164
+ out_channels = cosyvoice.model.flow.decoder.estimator.out_channels
165
+ x = torch.rand((batch_size, out_channels, seq_len), dtype=dtype, device=device)
166
+ mask = torch.ones((batch_size, 1, seq_len), dtype=dtype, device=device)
167
+ mu = torch.rand((batch_size, out_channels, seq_len), dtype=dtype, device=device)
168
+ t = torch.rand((batch_size, ), dtype=dtype, device=device)
169
+ spks = torch.rand((batch_size, out_channels), dtype=dtype, device=device)
170
+ cond = torch.rand((batch_size, out_channels, seq_len), dtype=dtype, device=device)
171
+
172
+ onnx_file_name = 'estimator_fp32.onnx' if not args.export_half else 'estimator_fp16.onnx'
173
+ onnx_file_path = os.path.join(args.model_dir, onnx_file_name)
174
+ dummy_input = (x, mask, mu, t, spks, cond)
175
+
176
+ estimator = estimator.to(dtype)
177
+
178
+ if args.exec_export:
179
+ torch.onnx.export(
180
+ estimator,
181
+ dummy_input,
182
+ onnx_file_path,
183
+ export_params=True,
184
+ opset_version=18,
185
+ do_constant_folding=True,
186
+ input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
187
+ output_names=['estimator_out'],
188
+ dynamic_axes={
189
+ 'x': {2: 'seq_len'},
190
+ 'mask': {2: 'seq_len'},
191
+ 'mu': {2: 'seq_len'},
192
+ 'cond': {2: 'seq_len'},
193
+ 'estimator_out': {2: 'seq_len'},
194
+ }
195
+ )
196
+
197
+ # tensorrt_path = os.environ.get('tensorrt_root_dir')
198
+ # if not tensorrt_path:
199
+ # raise EnvironmentError("Please set the 'tensorrt_root_dir' environment variable.")
200
+
201
+ # if not os.path.isdir(tensorrt_path):
202
+ # raise FileNotFoundError(f"The directory {tensorrt_path} does not exist.")
203
+
204
+ # trt_lib_path = os.path.join(tensorrt_path, "lib")
205
+ # if trt_lib_path not in os.environ.get('LD_LIBRARY_PATH', ''):
206
+ # print(f"Adding TensorRT lib path {trt_lib_path} to LD_LIBRARY_PATH.")
207
+ # os.environ['LD_LIBRARY_PATH'] = f"{os.environ.get('LD_LIBRARY_PATH', '')}:{trt_lib_path}"
208
+
209
+ # trt_file_name = 'estimator_fp32.plan' if not args.export_half else 'estimator_fp16.plan'
210
+ # trt_file_path = os.path.join(args.model_dir, trt_file_name)
211
+
212
+ # trtexec_bin = os.path.join(tensorrt_path, 'bin/trtexec')
213
+ # trt_max_len = args.trt_max_len
214
+ # trtexec_cmd = f"{trtexec_bin} --onnx={onnx_file_path} --saveEngine={trt_file_path} " \
215
+ # f"--minShapes=x:1x{out_channels}x1,mask:1x1x1,mu:1x{out_channels}x1,t:1,spks:1x{out_channels},cond:1x{out_channels}x1 " \
216
+ # f"--maxShapes=x:1x{out_channels}x{trt_max_len},mask:1x1x{trt_max_len},mu:1x{out_channels}x{trt_max_len},t:1,spks:1x{out_channels},cond:1x{out_channels}x{trt_max_len} " + \
217
+ # ("--fp16" if args.export_half else "")
218
+
219
+ # print("execute ", trtexec_cmd)
220
+
221
+ # if args.exec_export:
222
+ # os.system(trtexec_cmd)
223
+
224
+ # dummy_input = {'x': x, 'mask': mask, 'mu': mu, 't': t, 'spks': spks, 'cond': cond}
225
+ # test_calculate_value(estimator, onnx_file_path, trt_file_path, dummy_input, args)
226
+
227
+ if __name__ == "__main__":
228
+ main()
cosyvoice/bin/export_trt.py DELETED
@@ -1,126 +0,0 @@
1
- # Copyright (c) 2024 Antgroup Inc (authors: Zhoubofan, [email protected])
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import argparse
16
- import logging
17
- import os
18
- import sys
19
-
20
- logging.getLogger('matplotlib').setLevel(logging.WARNING)
21
-
22
- try:
23
- import tensorrt
24
- except ImportError:
25
- error_msg_zh = [
26
- "step.1 下载 tensorrt .tar.gz 压缩包并解压,下载地址: https://developer.nvidia.com/tensorrt/download/10x",
27
- "step.2 使用 tensorrt whl 包进行安装根据 python 版本对应进行安装,如 pip install ${TensorRT-Path}/python/tensorrt-10.2.0-cp38-none-linux_x86_64.whl",
28
- "step.3 将 tensorrt 的 lib 路径添加进环境变量中,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${TensorRT-Path}/lib/"
29
- ]
30
- print("\n".join(error_msg_zh))
31
- sys.exit(1)
32
-
33
- import torch
34
- from cosyvoice.cli.cosyvoice import CosyVoice
35
-
36
- def get_args():
37
- parser = argparse.ArgumentParser(description='Export your model for deployment')
38
- parser.add_argument('--model_dir',
39
- type=str,
40
- default='pretrained_models/CosyVoice-300M-SFT',
41
- help='Local path to the model directory')
42
-
43
- parser.add_argument('--export_half',
44
- action='store_true',
45
- help='Export with half precision (FP16)')
46
-
47
- args = parser.parse_args()
48
- print(args)
49
- return args
50
-
51
- def main():
52
- args = get_args()
53
-
54
- cosyvoice = CosyVoice(args.model_dir, load_jit=False, load_trt=False)
55
- estimator = cosyvoice.model.flow.decoder.estimator
56
-
57
- dtype = torch.float32 if not args.export_half else torch.float16
58
- device = torch.device("cuda")
59
- batch_size = 1
60
- seq_len = 256
61
- hidden_size = cosyvoice.model.flow.output_size
62
- x = torch.rand((batch_size, hidden_size, seq_len), dtype=dtype, device=device)
63
- mask = torch.ones((batch_size, 1, seq_len), dtype=dtype, device=device)
64
- mu = torch.rand((batch_size, hidden_size, seq_len), dtype=dtype, device=device)
65
- t = torch.rand((batch_size, ), dtype=dtype, device=device)
66
- spks = torch.rand((batch_size, hidden_size), dtype=dtype, device=device)
67
- cond = torch.rand((batch_size, hidden_size, seq_len), dtype=dtype, device=device)
68
-
69
- onnx_file_name = 'estimator_fp32.onnx' if not args.export_half else 'estimator_fp16.onnx'
70
- onnx_file_path = os.path.join(args.model_dir, onnx_file_name)
71
- dummy_input = (x, mask, mu, t, spks, cond)
72
-
73
- estimator = estimator.to(dtype)
74
-
75
- torch.onnx.export(
76
- estimator,
77
- dummy_input,
78
- onnx_file_path,
79
- export_params=True,
80
- opset_version=18,
81
- do_constant_folding=True,
82
- input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
83
- output_names=['estimator_out'],
84
- dynamic_axes={
85
- 'x': {2: 'seq_len'},
86
- 'mask': {2: 'seq_len'},
87
- 'mu': {2: 'seq_len'},
88
- 'cond': {2: 'seq_len'},
89
- 'estimator_out': {2: 'seq_len'},
90
- }
91
- )
92
-
93
- tensorrt_path = os.environ.get('tensorrt_root_dir')
94
- if not tensorrt_path:
95
- raise EnvironmentError("Please set the 'tensorrt_root_dir' environment variable.")
96
-
97
- if not os.path.isdir(tensorrt_path):
98
- raise FileNotFoundError(f"The directory {tensorrt_path} does not exist.")
99
-
100
- trt_lib_path = os.path.join(tensorrt_path, "lib")
101
- if trt_lib_path not in os.environ.get('LD_LIBRARY_PATH', ''):
102
- print(f"Adding TensorRT lib path {trt_lib_path} to LD_LIBRARY_PATH.")
103
- os.environ['LD_LIBRARY_PATH'] = f"{os.environ.get('LD_LIBRARY_PATH', '')}:{trt_lib_path}"
104
-
105
- trt_file_name = 'estimator_fp32.plan' if not args.export_half else 'estimator_fp16.plan'
106
- trt_file_path = os.path.join(args.model_dir, trt_file_name)
107
-
108
- trtexec_bin = os.path.join(tensorrt_path, 'bin/trtexec')
109
- trtexec_cmd = f"{trtexec_bin} --onnx={onnx_file_path} --saveEngine={trt_file_path} " \
110
- "--minShapes=x:1x80x1,mask:1x1x1,mu:1x80x1,t:1,spks:1x80,cond:1x80x1 " \
111
- "--maxShapes=x:1x80x4096,mask:1x1x4096,mu:1x80x4096,t:1,spks:1x80,cond:1x80x4096 --verbose " + \
112
- ("--fp16" if args.export_half else "")
113
-
114
- print("execute ", trtexec_cmd)
115
-
116
- os.system(trtexec_cmd)
117
-
118
- # print("x.shape", x.shape)
119
- # print("mask.shape", mask.shape)
120
- # print("mu.shape", mu.shape)
121
- # print("t.shape", t.shape)
122
- # print("spks.shape", spks.shape)
123
- # print("cond.shape", cond.shape)
124
-
125
- if __name__ == "__main__":
126
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cosyvoice/cli/cosyvoice.py CHANGED
@@ -21,7 +21,7 @@ from cosyvoice.utils.file_utils import logging
21
 
22
  class CosyVoice:
23
 
24
- def __init__(self, model_dir, load_jit=True, load_trt=True, use_fp16=False):
25
  instruct = True if '-Instruct' in model_dir else False
26
  self.model_dir = model_dir
27
  if not os.path.exists(model_dir):
@@ -39,13 +39,16 @@ class CosyVoice:
39
  self.model.load('{}/llm.pt'.format(model_dir),
40
  '{}/flow.pt'.format(model_dir),
41
  '{}/hift.pt'.format(model_dir))
42
-
43
  if load_jit:
44
  self.model.load_jit('{}/llm.text_encoder.fp16.zip'.format(model_dir),
45
  '{}/llm.llm.fp16.zip'.format(model_dir))
46
 
47
- if load_trt:
48
- self.model.load_trt(model_dir, use_fp16)
 
 
 
49
 
50
  del configs
51
 
 
21
 
22
  class CosyVoice:
23
 
24
+ def __init__(self, model_dir, load_jit=True, load_trt=False, load_onnx=True, use_fp16=False):
25
  instruct = True if '-Instruct' in model_dir else False
26
  self.model_dir = model_dir
27
  if not os.path.exists(model_dir):
 
39
  self.model.load('{}/llm.pt'.format(model_dir),
40
  '{}/flow.pt'.format(model_dir),
41
  '{}/hift.pt'.format(model_dir))
42
+
43
  if load_jit:
44
  self.model.load_jit('{}/llm.text_encoder.fp16.zip'.format(model_dir),
45
  '{}/llm.llm.fp16.zip'.format(model_dir))
46
 
47
+ # if load_trt:
48
+ # self.model.load_trt(model_dir, use_fp16)
49
+
50
+ if load_onnx:
51
+ self.model.load_onnx(model_dir, use_fp16)
52
 
53
  del configs
54
 
cosyvoice/cli/model.py CHANGED
@@ -19,6 +19,13 @@ import time
19
  from contextlib import nullcontext
20
  import uuid
21
  from cosyvoice.utils.common import fade_in_out
 
 
 
 
 
 
 
22
 
23
  class CosyVoiceModel:
24
 
@@ -66,21 +73,40 @@ class CosyVoiceModel:
66
  llm_llm = torch.jit.load(llm_llm_model)
67
  self.llm.llm = llm_llm
68
 
69
- def load_trt(self, model_dir, use_fp16):
70
- import tensorrt as trt
71
- trt_file_name = 'estimator_fp16.plan' if use_fp16 else 'estimator_fp32.plan'
72
- trt_file_path = os.path.join(model_dir, trt_file_name)
73
- if not os.path.isfile(trt_file_path):
74
- raise f"{trt_file_path} does not exist. Please use bin/export_trt.py to generate .plan file"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- trt.init_libnvinfer_plugins(None, "")
77
- logger = trt.Logger(trt.Logger.WARNING)
78
- runtime = trt.Runtime(logger)
79
- with open(trt_file_path, 'rb') as f:
80
- serialized_engine = f.read()
81
- engine = runtime.deserialize_cuda_engine(serialized_engine)
82
- self.flow.decoder.estimator_context = engine.create_execution_context()
83
  self.flow.decoder.estimator = None
 
84
 
85
  def llm_job(self, text, prompt_text, llm_prompt_speech_token, llm_embedding, uuid):
86
  with self.llm_context:
 
19
  from contextlib import nullcontext
20
  import uuid
21
  from cosyvoice.utils.common import fade_in_out
22
+ import numpy as np
23
+ import onnxruntime as ort
24
+
25
+ # try:
26
+ # import tensorrt as trt
27
+ # except ImportError:
28
+ # ...
29
 
30
  class CosyVoiceModel:
31
 
 
73
  llm_llm = torch.jit.load(llm_llm_model)
74
  self.llm.llm = llm_llm
75
 
76
+ # def load_trt(self, model_dir, use_fp16):
77
+ # trt_file_name = 'estimator_fp16.plan' if use_fp16 else 'estimator_fp32.plan'
78
+ # trt_file_path = os.path.join(model_dir, trt_file_name)
79
+ # if not os.path.isfile(trt_file_path):
80
+ # raise f"{trt_file_path} does not exist. Please use bin/export_trt.py to generate .plan file"
81
+
82
+ # trt.init_libnvinfer_plugins(None, "")
83
+ # logger = trt.Logger(trt.Logger.WARNING)
84
+ # runtime = trt.Runtime(logger)
85
+ # with open(trt_file_path, 'rb') as f:
86
+ # serialized_engine = f.read()
87
+ # engine = runtime.deserialize_cuda_engine(serialized_engine)
88
+ # self.flow.decoder.estimator_context = engine.create_execution_context()
89
+ # self.flow.decoder.estimator = None
90
+
91
+ def load_onnx(self, model_dir, use_fp16):
92
+ onnx_file_name = 'estimator_fp16.onnx' if use_fp16 else 'estimator_fp32.onnx'
93
+ onnx_file_path = os.path.join(model_dir, onnx_file_name)
94
+ if not os.path.isfile(onnx_file_path):
95
+ raise f"{onnx_file_path} does not exist. Please use bin/export_trt.py to generate .onnx file"
96
+
97
+ providers = ['CUDAExecutionProvider']
98
+ sess_options = ort.SessionOptions()
99
+
100
+ # Add TensorRT Execution Provider
101
+ providers = [
102
+ 'CUDAExecutionProvider'
103
+ ]
104
 
105
+ # Load the ONNX model
106
+ self.flow.decoder.session = ort.InferenceSession(onnx_file_path, sess_options=sess_options, providers=providers)
107
+ # self.flow.decoder.estimator_context = None
 
 
 
 
108
  self.flow.decoder.estimator = None
109
+
110
 
111
  def llm_job(self, text, prompt_text, llm_prompt_speech_token, llm_embedding, uuid):
112
  with self.llm_context:
cosyvoice/flow/flow_matching.py CHANGED
@@ -14,6 +14,8 @@
14
  import torch
15
  import torch.nn.functional as F
16
  from matcha.models.components.flow_matching import BASECFM
 
 
17
 
18
  class ConditionalCFM(BASECFM):
19
  def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None):
@@ -29,6 +31,8 @@ class ConditionalCFM(BASECFM):
29
  in_channels = in_channels + (spk_emb_dim if n_spks > 0 else 0)
30
  # Just change the architecture of the estimator here
31
  self.estimator = estimator
 
 
32
 
33
  @torch.inference_mode()
34
  def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None):
@@ -101,28 +105,47 @@ class ConditionalCFM(BASECFM):
101
 
102
  if self.estimator is not None:
103
  return self.estimator.forward(x, mask, mu, t, spks, cond)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  else:
105
- assert self.training is False, 'tensorrt cannot be used in training'
106
- bs = x.shape[0]
107
- hs = x.shape[1]
108
- seq_len = x.shape[2]
109
- # assert bs == 1 and hs == 80
110
- ret = torch.empty_like(x)
111
- self.estimator_context.set_input_shape("x", x.shape)
112
- self.estimator_context.set_input_shape("mask", mask.shape)
113
- self.estimator_context.set_input_shape("mu", mu.shape)
114
- self.estimator_context.set_input_shape("t", t.shape)
115
- self.estimator_context.set_input_shape("spks", spks.shape)
116
- self.estimator_context.set_input_shape("cond", cond.shape)
117
- bindings = [x.data_ptr(), mask.data_ptr(), mu.data_ptr(), t.data_ptr(), spks.data_ptr(), cond.data_ptr(), ret.data_ptr()]
118
- names = ['x', 'mask', 'mu', 't', 'spks', 'cond', 'estimator_out']
119
-
120
- for i in range(len(bindings)):
121
- self.estimator_context.set_tensor_address(names[i], bindings[i])
122
-
123
- handle = torch.cuda.current_stream().cuda_stream
124
- self.estimator_context.execute_async_v3(stream_handle=handle)
125
- return ret
126
 
127
  def compute_loss(self, x1, mask, mu, spks=None, cond=None):
128
  """Computes diffusion loss
 
14
  import torch
15
  import torch.nn.functional as F
16
  from matcha.models.components.flow_matching import BASECFM
17
+ import onnxruntime as ort
18
+ import numpy as np
19
 
20
  class ConditionalCFM(BASECFM):
21
  def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None):
 
31
  in_channels = in_channels + (spk_emb_dim if n_spks > 0 else 0)
32
  # Just change the architecture of the estimator here
33
  self.estimator = estimator
34
+ self.estimator_context = None # for tensorrt
35
+ self.session = None # for onnx
36
 
37
  @torch.inference_mode()
38
  def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None):
 
105
 
106
  if self.estimator is not None:
107
  return self.estimator.forward(x, mask, mu, t, spks, cond)
108
+ # elif self.estimator_context is not None:
109
+ # assert self.training is False, 'tensorrt cannot be used in training'
110
+ # bs = x.shape[0]
111
+ # hs = x.shape[1]
112
+ # seq_len = x.shape[2]
113
+ # # assert bs == 1 and hs == 80
114
+ # ret = torch.empty_like(x)
115
+ # self.estimator_context.set_input_shape("x", x.shape)
116
+ # self.estimator_context.set_input_shape("mask", mask.shape)
117
+ # self.estimator_context.set_input_shape("mu", mu.shape)
118
+ # self.estimator_context.set_input_shape("t", t.shape)
119
+ # self.estimator_context.set_input_shape("spks", spks.shape)
120
+ # self.estimator_context.set_input_shape("cond", cond.shape)
121
+
122
+ # # Create a list of bindings
123
+ # bindings = [int(x.data_ptr()), int(mask.data_ptr()), int(mu.data_ptr()), int(t.data_ptr()), int(spks.data_ptr()), int(cond.data_ptr()), int(ret.data_ptr())]
124
+
125
+ # # Execute the inference
126
+ # self.estimator_context.execute_v2(bindings=bindings)
127
+ # return ret
128
  else:
129
+ x_np = x.cpu().numpy()
130
+ mask_np = mask.cpu().numpy()
131
+ mu_np = mu.cpu().numpy()
132
+ t_np = t.cpu().numpy()
133
+ spks_np = spks.cpu().numpy()
134
+ cond_np = cond.cpu().numpy()
135
+
136
+ ort_inputs = {
137
+ 'x': x_np,
138
+ 'mask': mask_np,
139
+ 'mu': mu_np,
140
+ 't': t_np,
141
+ 'spks': spks_np,
142
+ 'cond': cond_np
143
+ }
144
+
145
+ output = self.session.run(None, ort_inputs)[0]
146
+
147
+ return torch.tensor(output, dtype=x.dtype, device=x.device)
148
+
 
149
 
150
  def compute_loss(self, x1, mask, mu, spks=None, cond=None):
151
  """Computes diffusion loss