rgallardo commited on
Commit
1c77584
β€’
1 Parent(s): bd0e6ae

upload quantized models

Browse files
decoder_model.onnx β†’ decoder_model-quantized.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c01adff67277c7f41acb2d01e3ca4f4f56793603dc741f21762ff6ca660fada
3
- size 650495648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95fbd45e18731b47d2515ea81ca3015404da45fc7553ae6c478f46b539f3f03e
3
+ size 163789113
decoder_with_past_model.onnx β†’ decoder_with_past_model-quantized.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17ad40c57a97483b379e9665dba3d834da10df03eb741f73b23693a6c245bc73
3
- size 593862705
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e86f788e49455c94fa23cc0e23aa6b08c0238831d9ad430b707acede9cf872
3
+ size 149512777
encoder_model.onnx β†’ encoder_model-quantized.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50dab74b25b5cd4ce4172a6fdeb968d63f7000d818c2105692eae02874695ada
3
- size 439511740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14368f04a5469a476d6ba0383f5d14da8b902fca8d2d64c95aed597f6e74c926
3
+ size 168149736
quantize.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from onnxruntime.quantization import quantize_dynamic, QuantType
2
+
3
+ models = ["encoder_model.onnx", "decoder_model.onnx", "decoder_with_past_model.onnx"]
4
+
5
+ for model in models:
6
+ print(f"Quantizing model: {model}")
7
+ output_model_name = f"{model[:-5]}-quantized.onnx"
8
+ quantize_dynamic(
9
+ model_input=model,
10
+ model_output=output_model_name,
11
+ per_channel=True,
12
+ reduce_range=True,
13
+ weight_type=QuantType.QUInt8,
14
+ optimize_model=False
15
+ )
16
+ print(f"Quantized model: {output_model_name}")