|
from optimum.onnxruntime import ORTModelForTokenClassification, ORTQuantizer |
|
from optimum.onnxruntime.configuration import AutoQuantizationConfig |
|
|
|
onnx_model = ORTModelForTokenClassification.from_pretrained( |
|
"nickprock/bert-italian-finetuned-ner", |
|
export=True |
|
) |
|
quantizer = ORTQuantizer.from_pretrained(onnx_model) |
|
dqconfig = AutoQuantizationConfig.avx512_vnni( |
|
is_static=False, |
|
per_channel=False |
|
) |
|
|
|
model_quantized_path = quantizer.quantize( |
|
save_dir="bert-italian-ner-onnx-quantized-avx512", |
|
quantization_config=dqconfig, |
|
) |