upload quantized models

Files changed (4) hide show

decoder_model.onnx → decoder_model-quantized.onnx RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c01adff67277c7f41acb2d01e3ca4f4f56793603dc741f21762ff6ca660fada
-size 650495648

 version https://git-lfs.github.com/spec/v1
+oid sha256:95fbd45e18731b47d2515ea81ca3015404da45fc7553ae6c478f46b539f3f03e
+size 163789113

decoder_with_past_model.onnx → decoder_with_past_model-quantized.onnx RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17ad40c57a97483b379e9665dba3d834da10df03eb741f73b23693a6c245bc73
-size 593862705

 version https://git-lfs.github.com/spec/v1
+oid sha256:09e86f788e49455c94fa23cc0e23aa6b08c0238831d9ad430b707acede9cf872
+size 149512777

encoder_model.onnx → encoder_model-quantized.onnx RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50dab74b25b5cd4ce4172a6fdeb968d63f7000d818c2105692eae02874695ada
-size 439511740

 version https://git-lfs.github.com/spec/v1
+oid sha256:14368f04a5469a476d6ba0383f5d14da8b902fca8d2d64c95aed597f6e74c926
+size 168149736

quantize.py ADDED Viewed

+from onnxruntime.quantization import quantize_dynamic, QuantType
+models = ["encoder_model.onnx", "decoder_model.onnx", "decoder_with_past_model.onnx"]
+for model in models:
+    print(f"Quantizing model: {model}")
+    output_model_name = f"{model[:-5]}-quantized.onnx"
+    quantize_dynamic(
+        model_input=model,
+        model_output=output_model_name,
+        per_channel=True,
+        reduce_range=True,
+        weight_type=QuantType.QUInt8,
+        optimize_model=False
+    )
+    print(f"Quantized model: {output_model_name}")