rgallardo's picture
upload quantized models
1c77584
raw
history blame contribute delete
534 Bytes
from onnxruntime.quantization import quantize_dynamic, QuantType
models = ["encoder_model.onnx", "decoder_model.onnx", "decoder_with_past_model.onnx"]
for model in models:
print(f"Quantizing model: {model}")
output_model_name = f"{model[:-5]}-quantized.onnx"
quantize_dynamic(
model_input=model,
model_output=output_model_name,
per_channel=True,
reduce_range=True,
weight_type=QuantType.QUInt8,
optimize_model=False
)
print(f"Quantized model: {output_model_name}")