upload quantized models
Browse files
decoder_model.onnx β decoder_model-quantized.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95fbd45e18731b47d2515ea81ca3015404da45fc7553ae6c478f46b539f3f03e
|
3 |
+
size 163789113
|
decoder_with_past_model.onnx β decoder_with_past_model-quantized.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09e86f788e49455c94fa23cc0e23aa6b08c0238831d9ad430b707acede9cf872
|
3 |
+
size 149512777
|
encoder_model.onnx β encoder_model-quantized.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14368f04a5469a476d6ba0383f5d14da8b902fca8d2d64c95aed597f6e74c926
|
3 |
+
size 168149736
|
quantize.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from onnxruntime.quantization import quantize_dynamic, QuantType
|
2 |
+
|
3 |
+
models = ["encoder_model.onnx", "decoder_model.onnx", "decoder_with_past_model.onnx"]
|
4 |
+
|
5 |
+
for model in models:
|
6 |
+
print(f"Quantizing model: {model}")
|
7 |
+
output_model_name = f"{model[:-5]}-quantized.onnx"
|
8 |
+
quantize_dynamic(
|
9 |
+
model_input=model,
|
10 |
+
model_output=output_model_name,
|
11 |
+
per_channel=True,
|
12 |
+
reduce_range=True,
|
13 |
+
weight_type=QuantType.QUInt8,
|
14 |
+
optimize_model=False
|
15 |
+
)
|
16 |
+
print(f"Quantized model: {output_model_name}")
|