File size: 551 Bytes
a0b2bc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from optimum.onnxruntime import ORTModelForTokenClassification, ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig
onnx_model = ORTModelForTokenClassification.from_pretrained(
"nickprock/bert-italian-finetuned-ner",
export=True
)
quantizer = ORTQuantizer.from_pretrained(onnx_model)
dqconfig = AutoQuantizationConfig.avx512_vnni(
is_static=False,
per_channel=False
)
model_quantized_path = quantizer.quantize(
save_dir="bert-italian-ner-onnx-quantized-avx512",
quantization_config=dqconfig,
) |