File size: 534 Bytes
1c77584
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from onnxruntime.quantization import quantize_dynamic, QuantType

models = ["encoder_model.onnx", "decoder_model.onnx", "decoder_with_past_model.onnx"]

for model in models:
    print(f"Quantizing model: {model}")
    output_model_name = f"{model[:-5]}-quantized.onnx"
    quantize_dynamic(
        model_input=model,
        model_output=output_model_name,
        per_channel=True,
        reduce_range=True,
        weight_type=QuantType.QUInt8,
        optimize_model=False
    )
    print(f"Quantized model: {output_model_name}")