z-uo commited on
Commit
a0b2bc1
1 Parent(s): a8f9c54

Upload 2 files

Browse files
Files changed (2) hide show
  1. bert_din_quant.py +17 -0
  2. bert_din_quant_test.py +13 -0
bert_din_quant.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from optimum.onnxruntime import ORTModelForTokenClassification, ORTQuantizer
2
+ from optimum.onnxruntime.configuration import AutoQuantizationConfig
3
+
4
+ onnx_model = ORTModelForTokenClassification.from_pretrained(
5
+ "nickprock/bert-italian-finetuned-ner",
6
+ export=True
7
+ )
8
+ quantizer = ORTQuantizer.from_pretrained(onnx_model)
9
+ dqconfig = AutoQuantizationConfig.avx512_vnni(
10
+ is_static=False,
11
+ per_channel=False
12
+ )
13
+
14
+ model_quantized_path = quantizer.quantize(
15
+ save_dir="bert-italian-ner-onnx-quantized-avx512",
16
+ quantization_config=dqconfig,
17
+ )
bert_din_quant_test.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from transformers import AutoTokenizer, pipeline
3
+ from optimum.onnxruntime import ORTModelForTokenClassification
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("./bert-italian-ner-onnx-quantized-avx512")
6
+ model = ORTModelForTokenClassification.from_pretrained("./bert-italian-ner-onnx-quantized-avx512")
7
+ nerpipeline = pipeline('ner', model=model, tokenizer=tokenizer)
8
+
9
+ text = "La sede storica della Olivetti è ad Ivrea"
10
+ start_time = time.time()
11
+ output = nerpipeline(text)
12
+ print(f"--- {time.time() - start_time} seconds ---")
13
+ print(output)