bmodel-qwen1.5-1.8b / Baichuan2 /src /include /bmruntime_interface.h
JoshuaChak's picture
Upload folder using huggingface_hub
7c071a8 verified
raw
history blame
No virus
18 kB
/*****************************************************************************
*
* Copyright (c) 2016-2026 by Sophgo Technologies Inc. All rights reserved.
*
* The material in this file is confidential and contains trade secrets
* of Sophgo Technologies Inc. This is proprietary information owned by
* Sophgo Technologies Inc. No part of this work may be disclosed,
* reproduced, copied, transmitted, or used in any way for any purpose,
* without the express written permission of Sophgo Technologies Inc.
*
*****************************************************************************/
/*****************************************************************************
* BMRuntime Interface is mainly for inference.
* Also we can use it for device computation from BMLang programming.
* Note: please use interface from bmlib_runtime.h for device memory operation.
****************************************************************************/
#ifndef BMRUNTIME_INTERFACE_H_
#define BMRUNTIME_INTERFACE_H_
#include "bmdef.h"
#ifdef _WIN32
#define DECL_EXPORT _declspec(dllexport)
#define DECL_IMPORT _declspec(dllimport)
#else
#define DECL_EXPORT
#define DECL_IMPORT
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/* --------------------------------------------------------------------------*/
/* interface for basic data type */
/* get data type byte size */
DECL_EXPORT size_t bmrt_data_type_size(bm_data_type_t dtype);
/*
dims array to bm_shape_t,
shape and dims should not be NULL, num_dims should not be larger than BM_MAX_DIMS_NUM */
DECL_EXPORT void bmrt_shape(bm_shape_t* shape, const int* dims, int num_dims);
/*
number of shape elements, shape should not be NULL and num_dims should not large than
BM_MAX_DIMS_NUM */
DECL_EXPORT uint64_t bmrt_shape_count(const bm_shape_t* shape);
/* compare whether two shape is same */
DECL_EXPORT bool bmrt_shape_is_same(const bm_shape_t* left, const bm_shape_t* right);
/*
fill a tensor with data type and shape, and st_mode = 0 as default.
tensor and p_bmrt should not be NULL, shape count should not be 0.
it will alloc device mem to tensor->device_mem, so user should bmrt_free_device(p_bmrt,
tensor->device_mem) to free it.*/
DECL_EXPORT bool bmrt_tensor(bm_tensor_t* tensor, void* p_bmrt, bm_data_type_t dtype, bm_shape_t shape);
/*
fill a tensor with data type and shape, and st_mode = 0 as default.
tensor and p_bmrt should not be NULL, shape count should not be 0.
it will alloc device mem to tensor->device_mem on devid-th device.*/
DECL_EXPORT bool bmrt_tensor_ex(bm_tensor_t* tensor, void* p_bmrt, int devid, bm_data_type_t dtype, bm_shape_t shape);
/* fill a tensor with device mem existed, tensor byte size should not large than device mem size */
DECL_EXPORT void bmrt_tensor_with_device(bm_tensor_t* tensor, bm_device_mem_t device_mem,
bm_data_type_t dtype, bm_shape_t shape);
/* get tensor bytes size, tensor should not be NULL */
DECL_EXPORT size_t bmrt_tensor_bytesize(const bm_tensor_t* tensor);
/* get tensor mem size allocated in device mem, tensor should not be NULL */
DECL_EXPORT size_t bmrt_tensor_device_size(const bm_tensor_t* tensor);
/* print net info for debug */
DECL_EXPORT void bmrt_print_network_info(const bm_net_info_t* net_info);
/* --------------------------------------------------------------------------*/
/**
* @name bmrt_create
* @brief To create the bmruntime with bm_handle.
* @ingroup bmruntime
*
* This API creates the bmruntime. It returns a void* pointer which is the pointer
* of bmruntime. Device id is set when get bm_handle;
*
* @param [in] bm_handle bm handle. It must be initialized by using bmlib.
*
* @retval void* the pointer of bmruntime
*/
DECL_EXPORT void* bmrt_create(bm_handle_t bm_handle);
/* --------------------------------------------------------------------------*/
/**
* @name bmrt_create_ex
* @brief To create the bmruntime with one or more bm_handle.
* @ingroup bmruntime
*
* This API creates the bmruntime. It returns a void* pointer which is the pointer
* of bmruntime.
*
* @param [in] bm_handles bm handles. They must be initialized by using bmlib.
* @param [in] num_handles number of bm_handles.
*
* @retval void* the pointer of bmruntime
*/
DECL_EXPORT void *bmrt_create_ex(bm_handle_t *bm_handles, int num_handles);
/**
* @name bmrt_destroy
* @brief To destroy the bmruntime pointer
* @ingroup bmruntime
*
* This API destroy the bmruntime.
*
* @param [in] p_bmrt Bmruntime that had been created
*/
DECL_EXPORT void bmrt_destroy(void* p_bmrt);
/**
* @name bmrt_get_bm_handle
* @brief To get the BM runtime context.
* @ingroup bmruntime
*
* This API get the BM runtime context for using BMDNN, BMCV or BMLIB
*
* @param [in] p_bmrt Bmruntime that had been created
*/
DECL_EXPORT void * bmrt_get_bm_handle(void* p_bmrt);
/**
* @name bmrt_load_bmodel
* @brief To load the bmodel which is created by BM compiler
* @ingroup bmruntime
*
* This API is to load bmodel created by BM compiler.
* After loading bmodel, we can run the inference of neuron network.
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [in] bmodel_path Bmodel file directory.
*
* @retval true Load context sucess.
* @retval false Load context failed.
*/
DECL_EXPORT bool bmrt_load_bmodel(void* p_bmrt, const char *bmodel_path);
/**
* @name bmrt_load_bmodel_data
* @brief To load the bmodel which is created by BM compiler from buffer
* @ingroup bmruntime
*
* This API is to load bmodel created by BM compiler.
* After loading bmodel, we can run the inference of neuron network.
* Different with bmrt_load_bmodel, bmodel is the data in host memory.
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [in] bmodel_data Bmodel data pointer to buffer
* @param [in] size Bmodel data size
*
* @retval true Load context sucess.
* @retval false Load context failed.
*/
DECL_EXPORT bool bmrt_load_bmodel_data(void* p_bmrt, const void * bmodel_data, size_t size);
/**
* @name bmrt_show_neuron_network
* @brief To print the name of all neuron network
* @ingroup bmruntime
*
* @param [in] p_bmrt Bmruntime that had been created
*/
DECL_EXPORT void bmrt_show_neuron_network(void* p_bmrt);
/**
* @name bmrt_get_network_number
* @brief To get the number of neuron network in the bmruntime
* @ingroup bmruntime
*
* @param [in] p_bmrt Bmruntime that had been created
*
* @retval int value The number of neuron networks.
*/
DECL_EXPORT int bmrt_get_network_number(void* p_bmrt);
/**
* @name bmrt_get_network_names
* @brief To get the names of all neuron network in the bmruntime
* @ingroup bmruntime
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [out] network_names The names of all neuron networks. It should be declare as (const char** networks_ = NULL),
* and use as the param &networks_. After this API, user need to free(networks_) if user
* do not need it.
*/
DECL_EXPORT void bmrt_get_network_names(void* p_bmrt, const char*** network_names);
/**
* @name bmrt_get_network_info
* @brief To get network info by net name
* @ingroup bmruntime
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [in] net_name Network name
*
* @retval bm_net_info_t* Pointer to net info, needn't free by user; if net name not found, will return NULL.
*/
DECL_EXPORT const bm_net_info_t* bmrt_get_network_info(void* p_bmrt, const char* net_name);
/**
* @name bmrt_launch_tensor
* @brief To launch the inference of the neuron network with setting input tensors
* @ingroup bmruntime
*
* This API supports the neuron nework that is static-compiled or dynamic-compiled
* After calling this API, inference on TPU is launched. And the CPU program will not
* be blocked. bm_thread_sync should be called to make sure inference finished.
* This API support multiple inputs, and multi thread safety
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [in] net_name The name of the neuron network
* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num].
* User should initialize each input tensor.
* @param [in] input_num Input number
* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
* This interface will alloc devcie mem to store output data. User should free each
* device mem by bm_free_device after the result data not used.
* @param [in] output_num Output number
*
* @retval true Launch success.
* @retval false Launch failed.
*/
DECL_EXPORT bool bmrt_launch_tensor(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num,
bm_tensor_t output_tensors[], int output_num);
/**
* @name bmrt_launch_tensor_ex
* @brief To launch the inference of the neuron network with setting input tensors
* @ingroup bmruntime
*
* This API supports the neuron nework that is static-compiled or dynamic-compiled
* After calling this API, inference on TPU is launched. And the CPU program will not
* be blocked. bm_thread_sync should be called to make sure inference finished.
* This API support multiple inputs, and multi thread safety
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [in] net_name The name of the neuron network
* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num],
* User should initialize each input tensor.
* @param [in] input_num Input number
* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
* User can set device_mem or stmode of output tensors. If user_mem is true, this interface
* will use device mem of output_tensors to store output data, and not alloc device mem;
* Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in
* each output tensor; Or stmode will be BM_STORE_1N as default.
* @param [in] output_num Output number
* @param [in] user_mem whether device_mem of output tensors are set
* @param [in] user_stmode whether stmode of output tensors are set
*
* @retval true Launch success.
* @retval false Launch failed.
*/
DECL_EXPORT bool bmrt_launch_tensor_ex(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num,
bm_tensor_t output_tensors[], int output_num, bool user_mem, bool user_stmode);
/**
* @name bmrt_launch_data
* @brief To launch the inference of the neuron network with setting input datas in system memory
* @ingroup bmruntime
*
* This API supports the neuron nework that is static-compiled or dynamic-compiled
* After calling this API, inference on TPU is launched. And the CPU
* program will be blocked.
* This API support multiple inputs, and multi thread safety
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [in] net_name The name of the neuron network
* @param [in] input_datas Array of input data, defined like void * input_datas[input_num]. User should
* initialize each data pointer as input.
* @param [in] input_shapes Array of input shape, defined like bm_shape_t input_shapes[input_num].
* User should set each input shape
* @param [in] input_num Input number
* @param [out] output_datas Array of output data, defined like void * output_datas[output_num].
* If user don't alloc each output data, set user_mem to false, and this api will alloc
* output mem, user should free each output mem when output data not used. Also
* user can alloc system memory for each output data by self and set user_mem = true.
* @param [out] output_shapes Array of output shape, defined like bm_shape_t output_shapes[output_num].
* It will store each output shape.
* @param [in] output_num Output number
* @param [in] user_mem whether output_datas[i] have allocated memory
*
* @retval true Launch success.
* @retval false Launch failed.
*/
DECL_EXPORT bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* const input_datas[],
const bm_shape_t input_shapes[], int input_num, void * output_datas[],
bm_shape_t output_shapes[], int output_num, bool user_mem);
/**
* @name bmrt_trace
* @brief To check runtime environment, and collect info for DEBUG
* @ingroup bmruntime
*
* This API is to collect runtime info for DEBUG. Expecially when launch result sudden mistake, call bmrt_trace
* will show whether device mems are broken, and other check info.
*
* @param [in] p_bmrt Bmruntime that had been created
*/
DECL_EXPORT void bmrt_trace(void* p_bmrt);
/**
* @name bmrt_launch_tensor_multi_cores
* @brief To launch the inference of the neuron network with setting input tensors, and support multi core inference.
* @ingroup bmruntime
*
* This API supports the neuron nework that is static-compiled or dynamic-compiled
* After calling this API, inference on TPU is launched. And the CPU program will not
* be blocked. bm_thread_sync_from_core should be called to make sure inference is finished.
* This API support multiple inputs, and multi thread safety
*
* @param [in] p_bmrt Bmruntime that had been created
* @param [in] net_name The name of the neuron network
* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num],
* User should initialize each input tensor.
* @param [in] input_num Input number
* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
* User can set device_mem or stmode of output tensors. If user_mem is true, this interface
* will use device mem of output_tensors to store output data, and not alloc device mem;
* Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in
* each output tensor; Or stmode will be BM_STORE_1N as default.
* @param [in] output_num Output number
* @param [in] user_mem whether device_mem of output tensors are set
* @param [in] user_stmode whether stmode of output tensors are set
* @param [in] core_list core id list those will be used to inference
* @param [in] core_num number of the core list
*
* @retval true Launch success.
* @retval false Launch failed.
*/
DECL_EXPORT bool bmrt_launch_tensor_multi_cores(
void *p_bmrt,
const char *net_name,
const bm_tensor_t input_tensors[],
int input_num,
bm_tensor_t output_tensors[],
int output_num,
bool user_mem,
bool user_stmode,
const int *core_list,
int core_num);
/**
* @name bmrt_memcpy_s2d_parallel
* @brief To copy data from system memory to muti-devices memory in parallel
* @ingroup bmruntime
*
* This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices.
* After calling this API, datas[:tensor_num[0]] will be copied to the first device, and
* datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] will be copied to the second device and so on.
* The process of copying data to different devices is done in parallel and to the same device is in sequence.
*
* @param [in] p_bmrt Bmruntime that had been created with multi bm_handles
* @param [in] tensors Array of tensors that will be copied to devices
* @param [in] datas Array of satas allocated in system memory
* @param [in] tensor_num Array of tensor_num that will be copied to each device
* @param [in] device_num Device number
*/
DECL_EXPORT bool bmrt_memcpy_s2d_parallel(
void *p_bmrt,
bm_tensor_t tensors[],
void *datas[],
int tensor_num[],
int device_num);
/**
* @name bmrt_memcpy_d2s_parallel
* @brief To copy data from muti-devices memory to system memory in parallel
* @ingroup bmruntime
*
* This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices.
* After calling this API, tensors on the first device will be copied to datas[:tensor_num[0]] , and
* tensors on the second device will be copied to datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] and so on.
* The process of copying data from different devices is done in parallel and from the same device is in sequence.
*
* @param [in] p_bmrt Bmruntime that had been created with multi bm_handles
* @param [in] datas Array of satas allocated in system memory
* @param [in] tensors Array of tensors that will be copied from devices
* @param [in] tensor_num Array of tensor_num that will be copied from each device
* @param [in] device_num Device number
*/
DECL_EXPORT bool bmrt_memcpy_d2s_parallel(
void *p_bmrt,
void *datas[],
bm_tensor_t tensors[],
int tensor_num[],
int device_num);
#if defined (__cplusplus)
}
#endif
#endif