/***************************************************************************** | |
* | |
* Copyright (c) 2016-2026 by Sophgo Technologies Inc. All rights reserved. | |
* | |
* The material in this file is confidential and contains trade secrets | |
* of Sophgo Technologies Inc. This is proprietary information owned by | |
* Sophgo Technologies Inc. No part of this work may be disclosed, | |
* reproduced, copied, transmitted, or used in any way for any purpose, | |
* without the express written permission of Sophgo Technologies Inc. | |
* | |
*****************************************************************************/ | |
/***************************************************************************** | |
* BMRuntime Interface is mainly for inference. | |
* Also we can use it for device computation from BMLang programming. | |
* Note: please use interface from bmlib_runtime.h for device memory operation. | |
****************************************************************************/ | |
extern "C" { | |
/* --------------------------------------------------------------------------*/ | |
/* interface for basic data type */ | |
/* get data type byte size */ | |
DECL_EXPORT size_t bmrt_data_type_size(bm_data_type_t dtype); | |
/* | |
dims array to bm_shape_t, | |
shape and dims should not be NULL, num_dims should not be larger than BM_MAX_DIMS_NUM */ | |
DECL_EXPORT void bmrt_shape(bm_shape_t* shape, const int* dims, int num_dims); | |
/* | |
number of shape elements, shape should not be NULL and num_dims should not large than | |
BM_MAX_DIMS_NUM */ | |
DECL_EXPORT uint64_t bmrt_shape_count(const bm_shape_t* shape); | |
/* compare whether two shape is same */ | |
DECL_EXPORT bool bmrt_shape_is_same(const bm_shape_t* left, const bm_shape_t* right); | |
/* | |
fill a tensor with data type and shape, and st_mode = 0 as default. | |
tensor and p_bmrt should not be NULL, shape count should not be 0. | |
it will alloc device mem to tensor->device_mem, so user should bmrt_free_device(p_bmrt, | |
tensor->device_mem) to free it.*/ | |
DECL_EXPORT bool bmrt_tensor(bm_tensor_t* tensor, void* p_bmrt, bm_data_type_t dtype, bm_shape_t shape); | |
/* | |
fill a tensor with data type and shape, and st_mode = 0 as default. | |
tensor and p_bmrt should not be NULL, shape count should not be 0. | |
it will alloc device mem to tensor->device_mem on devid-th device.*/ | |
DECL_EXPORT bool bmrt_tensor_ex(bm_tensor_t* tensor, void* p_bmrt, int devid, bm_data_type_t dtype, bm_shape_t shape); | |
/* fill a tensor with device mem existed, tensor byte size should not large than device mem size */ | |
DECL_EXPORT void bmrt_tensor_with_device(bm_tensor_t* tensor, bm_device_mem_t device_mem, | |
bm_data_type_t dtype, bm_shape_t shape); | |
/* get tensor bytes size, tensor should not be NULL */ | |
DECL_EXPORT size_t bmrt_tensor_bytesize(const bm_tensor_t* tensor); | |
/* get tensor mem size allocated in device mem, tensor should not be NULL */ | |
DECL_EXPORT size_t bmrt_tensor_device_size(const bm_tensor_t* tensor); | |
/* print net info for debug */ | |
DECL_EXPORT void bmrt_print_network_info(const bm_net_info_t* net_info); | |
/* --------------------------------------------------------------------------*/ | |
/** | |
* @name bmrt_create | |
* @brief To create the bmruntime with bm_handle. | |
* @ingroup bmruntime | |
* | |
* This API creates the bmruntime. It returns a void* pointer which is the pointer | |
* of bmruntime. Device id is set when get bm_handle; | |
* | |
* @param [in] bm_handle bm handle. It must be initialized by using bmlib. | |
* | |
* @retval void* the pointer of bmruntime | |
*/ | |
DECL_EXPORT void* bmrt_create(bm_handle_t bm_handle); | |
/* --------------------------------------------------------------------------*/ | |
/** | |
* @name bmrt_create_ex | |
* @brief To create the bmruntime with one or more bm_handle. | |
* @ingroup bmruntime | |
* | |
* This API creates the bmruntime. It returns a void* pointer which is the pointer | |
* of bmruntime. | |
* | |
* @param [in] bm_handles bm handles. They must be initialized by using bmlib. | |
* @param [in] num_handles number of bm_handles. | |
* | |
* @retval void* the pointer of bmruntime | |
*/ | |
DECL_EXPORT void *bmrt_create_ex(bm_handle_t *bm_handles, int num_handles); | |
/** | |
* @name bmrt_destroy | |
* @brief To destroy the bmruntime pointer | |
* @ingroup bmruntime | |
* | |
* This API destroy the bmruntime. | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
*/ | |
DECL_EXPORT void bmrt_destroy(void* p_bmrt); | |
/** | |
* @name bmrt_get_bm_handle | |
* @brief To get the BM runtime context. | |
* @ingroup bmruntime | |
* | |
* This API get the BM runtime context for using BMDNN, BMCV or BMLIB | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
*/ | |
DECL_EXPORT void * bmrt_get_bm_handle(void* p_bmrt); | |
/** | |
* @name bmrt_load_bmodel | |
* @brief To load the bmodel which is created by BM compiler | |
* @ingroup bmruntime | |
* | |
* This API is to load bmodel created by BM compiler. | |
* After loading bmodel, we can run the inference of neuron network. | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [in] bmodel_path Bmodel file directory. | |
* | |
* @retval true Load context sucess. | |
* @retval false Load context failed. | |
*/ | |
DECL_EXPORT bool bmrt_load_bmodel(void* p_bmrt, const char *bmodel_path); | |
/** | |
* @name bmrt_load_bmodel_data | |
* @brief To load the bmodel which is created by BM compiler from buffer | |
* @ingroup bmruntime | |
* | |
* This API is to load bmodel created by BM compiler. | |
* After loading bmodel, we can run the inference of neuron network. | |
* Different with bmrt_load_bmodel, bmodel is the data in host memory. | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [in] bmodel_data Bmodel data pointer to buffer | |
* @param [in] size Bmodel data size | |
* | |
* @retval true Load context sucess. | |
* @retval false Load context failed. | |
*/ | |
DECL_EXPORT bool bmrt_load_bmodel_data(void* p_bmrt, const void * bmodel_data, size_t size); | |
/** | |
* @name bmrt_show_neuron_network | |
* @brief To print the name of all neuron network | |
* @ingroup bmruntime | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
*/ | |
DECL_EXPORT void bmrt_show_neuron_network(void* p_bmrt); | |
/** | |
* @name bmrt_get_network_number | |
* @brief To get the number of neuron network in the bmruntime | |
* @ingroup bmruntime | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* | |
* @retval int value The number of neuron networks. | |
*/ | |
DECL_EXPORT int bmrt_get_network_number(void* p_bmrt); | |
/** | |
* @name bmrt_get_network_names | |
* @brief To get the names of all neuron network in the bmruntime | |
* @ingroup bmruntime | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [out] network_names The names of all neuron networks. It should be declare as (const char** networks_ = NULL), | |
* and use as the param &networks_. After this API, user need to free(networks_) if user | |
* do not need it. | |
*/ | |
DECL_EXPORT void bmrt_get_network_names(void* p_bmrt, const char*** network_names); | |
/** | |
* @name bmrt_get_network_info | |
* @brief To get network info by net name | |
* @ingroup bmruntime | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [in] net_name Network name | |
* | |
* @retval bm_net_info_t* Pointer to net info, needn't free by user; if net name not found, will return NULL. | |
*/ | |
DECL_EXPORT const bm_net_info_t* bmrt_get_network_info(void* p_bmrt, const char* net_name); | |
/** | |
* @name bmrt_launch_tensor | |
* @brief To launch the inference of the neuron network with setting input tensors | |
* @ingroup bmruntime | |
* | |
* This API supports the neuron nework that is static-compiled or dynamic-compiled | |
* After calling this API, inference on TPU is launched. And the CPU program will not | |
* be blocked. bm_thread_sync should be called to make sure inference finished. | |
* This API support multiple inputs, and multi thread safety | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [in] net_name The name of the neuron network | |
* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num]. | |
* User should initialize each input tensor. | |
* @param [in] input_num Input number | |
* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num]. | |
* This interface will alloc devcie mem to store output data. User should free each | |
* device mem by bm_free_device after the result data not used. | |
* @param [in] output_num Output number | |
* | |
* @retval true Launch success. | |
* @retval false Launch failed. | |
*/ | |
DECL_EXPORT bool bmrt_launch_tensor(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num, | |
bm_tensor_t output_tensors[], int output_num); | |
/** | |
* @name bmrt_launch_tensor_ex | |
* @brief To launch the inference of the neuron network with setting input tensors | |
* @ingroup bmruntime | |
* | |
* This API supports the neuron nework that is static-compiled or dynamic-compiled | |
* After calling this API, inference on TPU is launched. And the CPU program will not | |
* be blocked. bm_thread_sync should be called to make sure inference finished. | |
* This API support multiple inputs, and multi thread safety | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [in] net_name The name of the neuron network | |
* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num], | |
* User should initialize each input tensor. | |
* @param [in] input_num Input number | |
* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num]. | |
* User can set device_mem or stmode of output tensors. If user_mem is true, this interface | |
* will use device mem of output_tensors to store output data, and not alloc device mem; | |
* Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in | |
* each output tensor; Or stmode will be BM_STORE_1N as default. | |
* @param [in] output_num Output number | |
* @param [in] user_mem whether device_mem of output tensors are set | |
* @param [in] user_stmode whether stmode of output tensors are set | |
* | |
* @retval true Launch success. | |
* @retval false Launch failed. | |
*/ | |
DECL_EXPORT bool bmrt_launch_tensor_ex(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num, | |
bm_tensor_t output_tensors[], int output_num, bool user_mem, bool user_stmode); | |
/** | |
* @name bmrt_launch_data | |
* @brief To launch the inference of the neuron network with setting input datas in system memory | |
* @ingroup bmruntime | |
* | |
* This API supports the neuron nework that is static-compiled or dynamic-compiled | |
* After calling this API, inference on TPU is launched. And the CPU | |
* program will be blocked. | |
* This API support multiple inputs, and multi thread safety | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [in] net_name The name of the neuron network | |
* @param [in] input_datas Array of input data, defined like void * input_datas[input_num]. User should | |
* initialize each data pointer as input. | |
* @param [in] input_shapes Array of input shape, defined like bm_shape_t input_shapes[input_num]. | |
* User should set each input shape | |
* @param [in] input_num Input number | |
* @param [out] output_datas Array of output data, defined like void * output_datas[output_num]. | |
* If user don't alloc each output data, set user_mem to false, and this api will alloc | |
* output mem, user should free each output mem when output data not used. Also | |
* user can alloc system memory for each output data by self and set user_mem = true. | |
* @param [out] output_shapes Array of output shape, defined like bm_shape_t output_shapes[output_num]. | |
* It will store each output shape. | |
* @param [in] output_num Output number | |
* @param [in] user_mem whether output_datas[i] have allocated memory | |
* | |
* @retval true Launch success. | |
* @retval false Launch failed. | |
*/ | |
DECL_EXPORT bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* const input_datas[], | |
const bm_shape_t input_shapes[], int input_num, void * output_datas[], | |
bm_shape_t output_shapes[], int output_num, bool user_mem); | |
/** | |
* @name bmrt_trace | |
* @brief To check runtime environment, and collect info for DEBUG | |
* @ingroup bmruntime | |
* | |
* This API is to collect runtime info for DEBUG. Expecially when launch result sudden mistake, call bmrt_trace | |
* will show whether device mems are broken, and other check info. | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
*/ | |
DECL_EXPORT void bmrt_trace(void* p_bmrt); | |
/** | |
* @name bmrt_launch_tensor_multi_cores | |
* @brief To launch the inference of the neuron network with setting input tensors, and support multi core inference. | |
* @ingroup bmruntime | |
* | |
* This API supports the neuron nework that is static-compiled or dynamic-compiled | |
* After calling this API, inference on TPU is launched. And the CPU program will not | |
* be blocked. bm_thread_sync_from_core should be called to make sure inference is finished. | |
* This API support multiple inputs, and multi thread safety | |
* | |
* @param [in] p_bmrt Bmruntime that had been created | |
* @param [in] net_name The name of the neuron network | |
* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num], | |
* User should initialize each input tensor. | |
* @param [in] input_num Input number | |
* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num]. | |
* User can set device_mem or stmode of output tensors. If user_mem is true, this interface | |
* will use device mem of output_tensors to store output data, and not alloc device mem; | |
* Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in | |
* each output tensor; Or stmode will be BM_STORE_1N as default. | |
* @param [in] output_num Output number | |
* @param [in] user_mem whether device_mem of output tensors are set | |
* @param [in] user_stmode whether stmode of output tensors are set | |
* @param [in] core_list core id list those will be used to inference | |
* @param [in] core_num number of the core list | |
* | |
* @retval true Launch success. | |
* @retval false Launch failed. | |
*/ | |
DECL_EXPORT bool bmrt_launch_tensor_multi_cores( | |
void *p_bmrt, | |
const char *net_name, | |
const bm_tensor_t input_tensors[], | |
int input_num, | |
bm_tensor_t output_tensors[], | |
int output_num, | |
bool user_mem, | |
bool user_stmode, | |
const int *core_list, | |
int core_num); | |
/** | |
* @name bmrt_memcpy_s2d_parallel | |
* @brief To copy data from system memory to muti-devices memory in parallel | |
* @ingroup bmruntime | |
* | |
* This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices. | |
* After calling this API, datas[:tensor_num[0]] will be copied to the first device, and | |
* datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] will be copied to the second device and so on. | |
* The process of copying data to different devices is done in parallel and to the same device is in sequence. | |
* | |
* @param [in] p_bmrt Bmruntime that had been created with multi bm_handles | |
* @param [in] tensors Array of tensors that will be copied to devices | |
* @param [in] datas Array of satas allocated in system memory | |
* @param [in] tensor_num Array of tensor_num that will be copied to each device | |
* @param [in] device_num Device number | |
*/ | |
DECL_EXPORT bool bmrt_memcpy_s2d_parallel( | |
void *p_bmrt, | |
bm_tensor_t tensors[], | |
void *datas[], | |
int tensor_num[], | |
int device_num); | |
/** | |
* @name bmrt_memcpy_d2s_parallel | |
* @brief To copy data from muti-devices memory to system memory in parallel | |
* @ingroup bmruntime | |
* | |
* This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices. | |
* After calling this API, tensors on the first device will be copied to datas[:tensor_num[0]] , and | |
* tensors on the second device will be copied to datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] and so on. | |
* The process of copying data from different devices is done in parallel and from the same device is in sequence. | |
* | |
* @param [in] p_bmrt Bmruntime that had been created with multi bm_handles | |
* @param [in] datas Array of satas allocated in system memory | |
* @param [in] tensors Array of tensors that will be copied from devices | |
* @param [in] tensor_num Array of tensor_num that will be copied from each device | |
* @param [in] device_num Device number | |
*/ | |
DECL_EXPORT bool bmrt_memcpy_d2s_parallel( | |
void *p_bmrt, | |
void *datas[], | |
bm_tensor_t tensors[], | |
int tensor_num[], | |
int device_num); | |
} | |