File size: 17,964 Bytes
7c071a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
/*****************************************************************************
 *
 *    Copyright (c) 2016-2026 by Sophgo Technologies Inc. All rights reserved.
 *
 *    The material in this file is confidential and contains trade secrets
 *    of Sophgo Technologies Inc. This is proprietary information owned by
 *    Sophgo Technologies Inc. No part of this work may be disclosed,
 *    reproduced, copied, transmitted, or used in any way for any purpose,
 *    without the express written permission of Sophgo Technologies Inc.
 *
 *****************************************************************************/

/*****************************************************************************
 * BMRuntime Interface is mainly for inference.
 * Also we can use it for device computation from BMLang programming.
 * Note: please use interface from bmlib_runtime.h for device memory operation.
 ****************************************************************************/

#ifndef BMRUNTIME_INTERFACE_H_
#define BMRUNTIME_INTERFACE_H_

#include "bmdef.h"

#ifdef _WIN32
#define DECL_EXPORT _declspec(dllexport)
#define DECL_IMPORT _declspec(dllimport)
#else
#define DECL_EXPORT
#define DECL_IMPORT
#endif

#if defined(__cplusplus)
extern "C" {
#endif

/* --------------------------------------------------------------------------*/
/* interface for basic data type */

/* get data type byte size */
DECL_EXPORT size_t bmrt_data_type_size(bm_data_type_t dtype);

/*
dims array to bm_shape_t,
shape and dims should not be NULL, num_dims should not be larger than BM_MAX_DIMS_NUM */
DECL_EXPORT void bmrt_shape(bm_shape_t* shape, const int* dims, int num_dims);

/*
number of shape elements, shape should not be NULL and num_dims should not large than
BM_MAX_DIMS_NUM */
DECL_EXPORT uint64_t bmrt_shape_count(const bm_shape_t* shape);

/* compare whether two shape is same */
DECL_EXPORT bool bmrt_shape_is_same(const bm_shape_t* left, const bm_shape_t* right);

/*
fill a tensor with data type and shape, and st_mode = 0 as default.
tensor and p_bmrt should not be NULL, shape count should not be 0.
it will alloc device mem to tensor->device_mem, so user should bmrt_free_device(p_bmrt,
tensor->device_mem) to free it.*/
DECL_EXPORT bool bmrt_tensor(bm_tensor_t* tensor, void* p_bmrt, bm_data_type_t dtype, bm_shape_t shape);

/*
fill a tensor with data type and shape, and st_mode = 0 as default.
tensor and p_bmrt should not be NULL, shape count should not be 0.
it will alloc device mem to tensor->device_mem on devid-th device.*/
DECL_EXPORT bool bmrt_tensor_ex(bm_tensor_t* tensor, void* p_bmrt, int devid, bm_data_type_t dtype, bm_shape_t shape);

/* fill a tensor with device mem existed, tensor byte size should not large than device mem size */
DECL_EXPORT void bmrt_tensor_with_device(bm_tensor_t* tensor, bm_device_mem_t device_mem,
                             bm_data_type_t dtype, bm_shape_t shape);

/* get tensor bytes size, tensor should not be NULL */
DECL_EXPORT size_t bmrt_tensor_bytesize(const bm_tensor_t* tensor);

/* get tensor mem size allocated in device mem, tensor should not be NULL */
DECL_EXPORT size_t bmrt_tensor_device_size(const bm_tensor_t* tensor);

/* print net info for debug */
DECL_EXPORT void bmrt_print_network_info(const bm_net_info_t* net_info);

/* --------------------------------------------------------------------------*/
/**
 * @name    bmrt_create
 * @brief   To create the bmruntime with bm_handle.
 * @ingroup bmruntime
 *
 * This API creates the bmruntime. It returns a void* pointer which is the pointer
 * of bmruntime. Device id is set when get bm_handle;
 *
 * @param [in] bm_handle     bm handle. It must be initialized by using bmlib.
 *
 * @retval void* the pointer of bmruntime
 */
DECL_EXPORT void* bmrt_create(bm_handle_t bm_handle);

/* --------------------------------------------------------------------------*/
/**
 * @name    bmrt_create_ex
 * @brief   To create the bmruntime with one or more bm_handle.
 * @ingroup bmruntime
 *
 * This API creates the bmruntime. It returns a void* pointer which is the pointer
 * of bmruntime.
 *
 * @param [in] bm_handles   bm handles. They must be initialized by using bmlib.
 * @param [in] num_handles  number of bm_handles.
 *
 * @retval void* the pointer of bmruntime
 */
DECL_EXPORT void *bmrt_create_ex(bm_handle_t *bm_handles, int num_handles);

/**
 * @name    bmrt_destroy
 * @brief   To destroy the bmruntime pointer
 * @ingroup bmruntime
 *
 * This API destroy the bmruntime.
 *
 * @param [in]     p_bmrt        Bmruntime that had been created
 */
DECL_EXPORT void bmrt_destroy(void* p_bmrt);

/**
 * @name    bmrt_get_bm_handle
 * @brief   To get the BM runtime context.
 * @ingroup bmruntime
 *
 * This API get the BM runtime context for using BMDNN, BMCV or BMLIB
 *
 * @param [in]     p_bmrt        Bmruntime that had been created
 */
DECL_EXPORT void * bmrt_get_bm_handle(void* p_bmrt);

/**
 * @name    bmrt_load_bmodel
 * @brief   To load the bmodel which is created by BM compiler
 * @ingroup bmruntime
 *
 * This API is to load bmodel created by BM compiler.
 * After loading bmodel, we can run the inference of neuron network.
 *
 * @param   [in]   p_bmrt        Bmruntime that had been created
 * @param   [in]   bmodel_path   Bmodel file directory.
 *
 * @retval true    Load context sucess.
 * @retval false   Load context failed.
 */
DECL_EXPORT bool bmrt_load_bmodel(void* p_bmrt, const char *bmodel_path);

/**
 * @name    bmrt_load_bmodel_data
 * @brief   To load the bmodel which is created by BM compiler from buffer
 * @ingroup bmruntime
 *
 * This API is to load bmodel created by BM compiler.
 * After loading bmodel, we can run the inference of neuron network.
 * Different with bmrt_load_bmodel, bmodel is the data in host memory.
 *
 * @param   [in]   p_bmrt        Bmruntime that had been created
 * @param   [in]   bmodel_data   Bmodel data pointer to buffer
 * @param   [in]   size          Bmodel data size
 *
 * @retval true    Load context sucess.
 * @retval false   Load context failed.
 */
DECL_EXPORT bool bmrt_load_bmodel_data(void* p_bmrt, const void * bmodel_data, size_t size);

/**
 * @name    bmrt_show_neuron_network
 * @brief   To print the name of all neuron network
 * @ingroup bmruntime
 *
 * @param [in]     p_bmrt         Bmruntime that had been created
 */
DECL_EXPORT void bmrt_show_neuron_network(void* p_bmrt);

/**
 * @name    bmrt_get_network_number
 * @brief   To get the number of neuron network in the bmruntime
 * @ingroup bmruntime
 *
 * @param [in]     p_bmrt         Bmruntime that had been created
 *
 * @retval  int value     The number of neuron networks.
 */
DECL_EXPORT int bmrt_get_network_number(void* p_bmrt);

/**
 * @name    bmrt_get_network_names
 * @brief   To get the names of all neuron network in the bmruntime
 * @ingroup bmruntime
 *
 * @param [in]     p_bmrt         Bmruntime that had been created
 * @param [out]    network_names  The names of all neuron networks. It should be declare as (const char** networks_ = NULL),
 *                                and use as the param &networks_. After this API, user need to free(networks_) if user
 *                                do not need it.
 */
DECL_EXPORT void bmrt_get_network_names(void* p_bmrt, const char*** network_names);

/**
 * @name    bmrt_get_network_info
 * @brief   To get network info by net name
 * @ingroup bmruntime
 *
 * @param [in]     p_bmrt         Bmruntime that had been created
 * @param [in]     net_name       Network name
 *
 * @retval  bm_net_info_t*        Pointer to net info, needn't free by user; if net name not found, will return NULL.
 */
DECL_EXPORT const bm_net_info_t* bmrt_get_network_info(void* p_bmrt, const char* net_name);

/**
 * @name    bmrt_launch_tensor
 * @brief   To launch the inference of the neuron network with setting input tensors
 * @ingroup bmruntime
 *
 * This API supports the neuron nework that is static-compiled or dynamic-compiled
 * After calling this API, inference on TPU is launched. And the CPU program will not
 * be blocked. bm_thread_sync should be called to make sure inference finished.
 * This API support multiple inputs, and multi thread safety
 *
 * @param [in]    p_bmrt         Bmruntime that had been created
 * @param [in]    net_name       The name of the neuron network
 * @param [in]    input_tensors  Array of input tensor, defined like bm_tensor_t input_tensors[input_num].
 *                               User should initialize each input tensor.
 * @param [in]    input_num      Input number
 * @param [out]   output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
 *                               This interface will alloc devcie mem to store output data. User should free each
 *                               device mem by bm_free_device after the result data not used.
 * @param [in]    output_num     Output number
 *
 * @retval true    Launch success.
 * @retval false   Launch failed.
 */
DECL_EXPORT bool bmrt_launch_tensor(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num,
                        bm_tensor_t output_tensors[], int output_num);

/**
 * @name    bmrt_launch_tensor_ex
 * @brief   To launch the inference of the neuron network with setting input tensors
 * @ingroup bmruntime
 *
 * This API supports the neuron nework that is static-compiled or dynamic-compiled
 * After calling this API, inference on TPU is launched. And the CPU program will not
 * be blocked. bm_thread_sync should be called to make sure inference finished.
 * This API support multiple inputs, and multi thread safety
 *
 * @param [in]    p_bmrt            Bmruntime that had been created
 * @param [in]    net_name          The name of the neuron network
 * @param [in]    input_tensors     Array of input tensor, defined like bm_tensor_t input_tensors[input_num],
 *                                  User should initialize each input tensor.
 * @param [in]    input_num         Input number
 * @param [out]   output_tensors    Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
 *                                  User can set device_mem or stmode of output tensors. If user_mem is true, this interface
 *                                  will use device mem of output_tensors to store output data, and not alloc device mem;
 *                                  Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in
 *                                  each output tensor; Or stmode will be BM_STORE_1N as default.
 * @param [in]    output_num        Output number
 * @param [in]    user_mem          whether device_mem of output tensors are set
 * @param [in]    user_stmode       whether stmode of output tensors are set
 *
 * @retval true    Launch success.
 * @retval false   Launch failed.
 */
DECL_EXPORT bool bmrt_launch_tensor_ex(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num,
                           bm_tensor_t output_tensors[], int output_num, bool user_mem, bool user_stmode);

/**
 * @name    bmrt_launch_data
 * @brief   To launch the inference of the neuron network with setting input datas in system memory
 * @ingroup bmruntime
 *
 * This API supports the neuron nework that is static-compiled or dynamic-compiled
 * After calling this API, inference on TPU is launched. And the CPU
 * program will be blocked.
 * This API support multiple inputs, and multi thread safety
 *
 * @param [in]    p_bmrt         Bmruntime that had been created
 * @param [in]    net_name       The name of the neuron network
 * @param [in]    input_datas    Array of input data, defined like void * input_datas[input_num]. User should
 *                               initialize each data pointer as input.
 * @param [in]    input_shapes   Array of input shape, defined like bm_shape_t input_shapes[input_num].
 *                               User should set each input shape
 * @param [in]    input_num      Input number
 * @param [out]   output_datas   Array of output data, defined like void * output_datas[output_num].
 *                               If user don't alloc each output data, set user_mem to false, and this api will alloc
 *                               output mem, user should free each output mem when output data not used. Also
 *                               user can alloc system memory for each output data by self and set user_mem = true.
 * @param [out]   output_shapes  Array of output shape, defined like bm_shape_t output_shapes[output_num].
 *                               It will store each output shape.
 * @param [in]    output_num     Output number
 * @param [in]    user_mem       whether output_datas[i] have allocated memory
 *
 * @retval true    Launch success.
 * @retval false   Launch failed.
 */
DECL_EXPORT bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* const input_datas[],
                      const bm_shape_t input_shapes[], int input_num, void * output_datas[],
                      bm_shape_t output_shapes[], int output_num, bool user_mem);

/**
 * @name    bmrt_trace
 * @brief   To check runtime environment, and collect info for DEBUG
 * @ingroup bmruntime
 *
 * This API is to collect runtime info for DEBUG. Expecially when launch result sudden mistake, call bmrt_trace
 * will show whether device mems are broken, and other check info.
 *
 * @param [in]    p_bmrt         Bmruntime that had been created
 */
DECL_EXPORT void bmrt_trace(void* p_bmrt);

/**
 * @name    bmrt_launch_tensor_multi_cores
 * @brief   To launch the inference of the neuron network with setting input tensors, and support multi core inference.
 * @ingroup bmruntime
 *
 * This API supports the neuron nework that is static-compiled or dynamic-compiled
 * After calling this API, inference on TPU is launched. And the CPU program will not
 * be blocked. bm_thread_sync_from_core should be called to make sure inference is finished.
 * This API support multiple inputs, and multi thread safety
 *
 * @param [in]    p_bmrt            Bmruntime that had been created
 * @param [in]    net_name          The name of the neuron network
 * @param [in]    input_tensors     Array of input tensor, defined like bm_tensor_t input_tensors[input_num],
 *                                  User should initialize each input tensor.
 * @param [in]    input_num         Input number
 * @param [out]   output_tensors    Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
 *                                  User can set device_mem or stmode of output tensors. If user_mem is true, this interface
 *                                  will use device mem of output_tensors to store output data, and not alloc device mem;
 *                                  Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in
 *                                  each output tensor; Or stmode will be BM_STORE_1N as default.
 * @param [in]    output_num        Output number
 * @param [in]    user_mem          whether device_mem of output tensors are set
 * @param [in]    user_stmode       whether stmode of output tensors are set
 * @param [in]    core_list         core id list those will be used to inference
 * @param [in]    core_num          number of the core list
 *
 * @retval true    Launch success.
 * @retval false   Launch failed.
 */
DECL_EXPORT bool bmrt_launch_tensor_multi_cores(
    void *p_bmrt,
    const char *net_name,
    const bm_tensor_t input_tensors[],
    int input_num,
    bm_tensor_t output_tensors[],
    int output_num,
    bool user_mem,
    bool user_stmode,
    const int *core_list,
    int core_num);

/**
 *  @name    bmrt_memcpy_s2d_parallel
 *  @brief   To copy data from system memory to muti-devices memory in parallel
 *  @ingroup bmruntime
 *
 *  This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices.
 *  After calling this API, datas[:tensor_num[0]] will be copied to the first device, and
 *  datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] will be copied to the second device and so on.
 *  The process of copying data to different devices is done in parallel and to the same device is in sequence.
 * 
 *  @param [in]     p_bmrt      Bmruntime that had been created with multi bm_handles
 *  @param [in]     tensors     Array of tensors that will be copied to devices
 *  @param [in]     datas       Array of satas allocated in system memory
 *  @param [in]     tensor_num  Array of tensor_num that will be copied to each device
 *  @param [in]     device_num  Device number
*/
DECL_EXPORT bool bmrt_memcpy_s2d_parallel(
    void *p_bmrt,
    bm_tensor_t tensors[],
    void *datas[],
    int tensor_num[],
    int device_num);

/**
 *  @name    bmrt_memcpy_d2s_parallel
 *  @brief   To copy data from muti-devices memory to system memory in parallel
 *  @ingroup bmruntime
 *
 *  This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices.
 *  After calling this API, tensors on the first device will be copied to datas[:tensor_num[0]] , and
 *  tensors on the second device will be copied to datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] and so on.
 *  The process of copying data from different devices is done in parallel and from the same device is in sequence.
 * 
 *  @param [in]     p_bmrt      Bmruntime that had been created with multi bm_handles
 *  @param [in]     datas       Array of satas allocated in system memory
 *  @param [in]     tensors     Array of tensors that will be copied from devices
 *  @param [in]     tensor_num  Array of tensor_num that will be copied from each device
 *  @param [in]     device_num  Device number
*/
DECL_EXPORT bool bmrt_memcpy_d2s_parallel(
    void *p_bmrt,
    void *datas[],
    bm_tensor_t tensors[],
    int tensor_num[],
    int device_num);

#if defined (__cplusplus)
}
#endif

#endif