File size: 383 Bytes
46e0b62 |
1 2 3 4 5 6 7 8 9 10 11 12 |
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType
# Load the original init_decoder_model
model_path = "decoder_model.onnx"
quantized_model_path = "decoder_quantized.onnx"
# Perform dynamic quantization on the model
quantize_dynamic(model_path, quantized_model_path, weight_type=QuantType.QInt8)
print(f"Quantized model saved to {quantized_model_path}")
|