File size: 1,626 Bytes
0053ecb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
# coding: utf-8

import datetime
from rknn.api import RKNN
from sys import exit


ONNX_MODEL = "RWKV-x070-World-0.1B-v2.8-20241210-ctx4096.onnx"
RKNN_MODEL = ONNX_MODEL.replace(".onnx", ".rknn")
DATASET = ""
QUANTIZE = False
detailed_performance_log = True

timedate_iso = datetime.datetime.now().isoformat()

rknn = RKNN(verbose=True)
rknn.config(
    # mean_values=[x * 255 for x in [0.485, 0.456, 0.406]],
    # std_values=[x * 255 for x in [0.229, 0.224, 0.225]],
    quantized_dtype="w8a8",
    quantized_algorithm="normal",
    quantized_method="channel",
    quantized_hybrid_level=0,
    target_platform="rk3588",
    quant_img_RGB2BGR=False,
    float_dtype="float16",
    optimization_level=3,
    custom_string=f"converted at {timedate_iso}",
    remove_weight=False,
    compress_weight=False,
    inputs_yuv_fmt=None,
    single_core_mode=False,
    dynamic_input=None,
    model_pruning=False,
    op_target=None,
    quantize_weight=False,
    remove_reshape=False,
    sparse_infer=False,
    enable_flash_attention=False,
    # ้š่—็š„ๅ‚ๆ•ฐ
    # disable_rules=[],
    # sram_prefer=False,
    # nbuf_prefer=False,
    # check_data=[],
)

ret = rknn.load_onnx(model=ONNX_MODEL)
ret = rknn.build(do_quantization=QUANTIZE, dataset=DATASET, rknn_batch_size=None)
ret = rknn.export_rknn(RKNN_MODEL)

# ret = rknn.init_runtime(target='rk3588',device_id='cbb956772bf5dac9',core_mask=RKNN.NPU_CORE_0,perf_debug=detailed_performance_log)
# rknn.eval_perf()
# ret = rknn.accuracy_analysis(inputs=['../embeddings.npy','../state.npy','../scale_ratio.npy'], target='rk3588', device_id=device_id)