File size: 1,659 Bytes
cec228d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import sys
import onnx
from onnxslim import slim
from onnxconverter_common.float16 import convert_float_to_float16
from onnxconverter_common.optimizer import optimize_onnx_model
import logging

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


def convert_to_fp16(input_onnx_model):
    base_name, _ = os.path.splitext(input_onnx_model)

    if base_name.endswith(".fp32"):
        base_name = base_name[:-5]

    output_onnx_model = base_name + ".fp16.onnx"

    if os.path.exists(output_onnx_model):
        logging.info(
            f"FP16 version {output_onnx_model} already exists. Skipping conversion."
        )
        return

    logging.info(f"Starting conversion for {input_onnx_model}")

    logging.info(f"Simplifying model {input_onnx_model}")
    model = slim(input_onnx_model)

    sys.setrecursionlimit(10000)

    logging.info(
        f"Performing shape inference and quant pre-process for {input_onnx_model}"
    )
    model_optimized = optimize_onnx_model(model)

    logging.info(f"Converting {input_onnx_model} to FP16")
    model_fp16 = convert_float_to_float16(model_optimized, keep_io_types=True)

    logging.info(f"Saving FP16 model to {output_onnx_model}")
    onnx.save(model_fp16, output_onnx_model)
    logging.info(f"FP16 model saved to {output_onnx_model}")


def main():
    onnx_files = [f for f in os.listdir(".") if f.endswith(".onnx")]

    if not onnx_files:
        logging.warning("No .onnx files found in the current directory.")
    else:
        for onnx_file in onnx_files:
            convert_to_fp16(onnx_file)


if __name__ == "__main__":
    main()