File size: 383 Bytes
46e0b62
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

# Load the original init_decoder_model
model_path = "decoder_model.onnx"
quantized_model_path = "decoder_quantized.onnx"

# Perform dynamic quantization on the model
quantize_dynamic(model_path, quantized_model_path, weight_type=QuantType.QInt8)

print(f"Quantized model saved to {quantized_model_path}")