JoshuaChak
/

bmodel-qwen1.5-1.8b

Model card Files Files and versions Community

bmodel-qwen1.5-1.8b / Baichuan2 /src /include /bmruntime_interface.h

JoshuaChak

Upload folder using huggingface_hub

7c071a8 verified 5 months ago

raw

history blame

No virus

18 kB

	/*****************************************************************************
	*
	* Copyright (c) 2016-2026 by Sophgo Technologies Inc. All rights reserved.
	*
	* The material in this file is confidential and contains trade secrets
	* of Sophgo Technologies Inc. This is proprietary information owned by
	* Sophgo Technologies Inc. No part of this work may be disclosed,
	* reproduced, copied, transmitted, or used in any way for any purpose,
	* without the express written permission of Sophgo Technologies Inc.
	*
	*****************************************************************************/

	/*****************************************************************************
	* BMRuntime Interface is mainly for inference.
	* Also we can use it for device computation from BMLang programming.
	* Note: please use interface from bmlib_runtime.h for device memory operation.
	****************************************************************************/

	#ifndef BMRUNTIME_INTERFACE_H_
	#define BMRUNTIME_INTERFACE_H_

	#include "bmdef.h"

	#ifdef _WIN32
	#define DECL_EXPORT _declspec(dllexport)
	#define DECL_IMPORT _declspec(dllimport)
	#else
	#define DECL_EXPORT
	#define DECL_IMPORT
	#endif

	#if defined(__cplusplus)
	extern "C" {
	#endif

	/* --------------------------------------------------------------------------*/
	/* interface for basic data type */

	/* get data type byte size */
	DECL_EXPORT size_t bmrt_data_type_size(bm_data_type_t dtype);

	/*
	dims array to bm_shape_t,
	shape and dims should not be NULL, num_dims should not be larger than BM_MAX_DIMS_NUM */
	DECL_EXPORT void bmrt_shape(bm_shape_t* shape, const int* dims, int num_dims);

	/*
	number of shape elements, shape should not be NULL and num_dims should not large than
	BM_MAX_DIMS_NUM */
	DECL_EXPORT uint64_t bmrt_shape_count(const bm_shape_t* shape);

	/* compare whether two shape is same */
	DECL_EXPORT bool bmrt_shape_is_same(const bm_shape_t* left, const bm_shape_t* right);

	/*
	fill a tensor with data type and shape, and st_mode = 0 as default.
	tensor and p_bmrt should not be NULL, shape count should not be 0.
	it will alloc device mem to tensor->device_mem, so user should bmrt_free_device(p_bmrt,
	tensor->device_mem) to free it.*/
	DECL_EXPORT bool bmrt_tensor(bm_tensor_t* tensor, void* p_bmrt, bm_data_type_t dtype, bm_shape_t shape);

	/*
	fill a tensor with data type and shape, and st_mode = 0 as default.
	tensor and p_bmrt should not be NULL, shape count should not be 0.
	it will alloc device mem to tensor->device_mem on devid-th device.*/
	DECL_EXPORT bool bmrt_tensor_ex(bm_tensor_t* tensor, void* p_bmrt, int devid, bm_data_type_t dtype, bm_shape_t shape);

	/* fill a tensor with device mem existed, tensor byte size should not large than device mem size */
	DECL_EXPORT void bmrt_tensor_with_device(bm_tensor_t* tensor, bm_device_mem_t device_mem,
	bm_data_type_t dtype, bm_shape_t shape);

	/* get tensor bytes size, tensor should not be NULL */
	DECL_EXPORT size_t bmrt_tensor_bytesize(const bm_tensor_t* tensor);

	/* get tensor mem size allocated in device mem, tensor should not be NULL */
	DECL_EXPORT size_t bmrt_tensor_device_size(const bm_tensor_t* tensor);

	/* print net info for debug */
	DECL_EXPORT void bmrt_print_network_info(const bm_net_info_t* net_info);

	/* --------------------------------------------------------------------------*/
	/**
	* @name bmrt_create
	* @brief To create the bmruntime with bm_handle.
	* @ingroup bmruntime
	*
	* This API creates the bmruntime. It returns a void* pointer which is the pointer
	* of bmruntime. Device id is set when get bm_handle;
	*
	* @param [in] bm_handle bm handle. It must be initialized by using bmlib.
	*
	* @retval void* the pointer of bmruntime
	*/
	DECL_EXPORT void* bmrt_create(bm_handle_t bm_handle);

	/* --------------------------------------------------------------------------*/
	/**
	* @name bmrt_create_ex
	* @brief To create the bmruntime with one or more bm_handle.
	* @ingroup bmruntime
	*
	* This API creates the bmruntime. It returns a void* pointer which is the pointer
	* of bmruntime.
	*
	* @param [in] bm_handles bm handles. They must be initialized by using bmlib.
	* @param [in] num_handles number of bm_handles.
	*
	* @retval void* the pointer of bmruntime
	*/
	DECL_EXPORT void bmrt_create_ex(bm_handle_t bm_handles, int num_handles);

	/**
	* @name bmrt_destroy
	* @brief To destroy the bmruntime pointer
	* @ingroup bmruntime
	*
	* This API destroy the bmruntime.
	*
	* @param [in] p_bmrt Bmruntime that had been created
	*/
	DECL_EXPORT void bmrt_destroy(void* p_bmrt);

	/**
	* @name bmrt_get_bm_handle
	* @brief To get the BM runtime context.
	* @ingroup bmruntime
	*
	* This API get the BM runtime context for using BMDNN, BMCV or BMLIB
	*
	* @param [in] p_bmrt Bmruntime that had been created
	*/
	DECL_EXPORT void * bmrt_get_bm_handle(void* p_bmrt);

	/**
	* @name bmrt_load_bmodel
	* @brief To load the bmodel which is created by BM compiler
	* @ingroup bmruntime
	*
	* This API is to load bmodel created by BM compiler.
	* After loading bmodel, we can run the inference of neuron network.
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [in] bmodel_path Bmodel file directory.
	*
	* @retval true Load context sucess.
	* @retval false Load context failed.
	*/
	DECL_EXPORT bool bmrt_load_bmodel(void* p_bmrt, const char *bmodel_path);

	/**
	* @name bmrt_load_bmodel_data
	* @brief To load the bmodel which is created by BM compiler from buffer
	* @ingroup bmruntime
	*
	* This API is to load bmodel created by BM compiler.
	* After loading bmodel, we can run the inference of neuron network.
	* Different with bmrt_load_bmodel, bmodel is the data in host memory.
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [in] bmodel_data Bmodel data pointer to buffer
	* @param [in] size Bmodel data size
	*
	* @retval true Load context sucess.
	* @retval false Load context failed.
	*/
	DECL_EXPORT bool bmrt_load_bmodel_data(void* p_bmrt, const void * bmodel_data, size_t size);

	/**
	* @name bmrt_show_neuron_network
	* @brief To print the name of all neuron network
	* @ingroup bmruntime
	*
	* @param [in] p_bmrt Bmruntime that had been created
	*/
	DECL_EXPORT void bmrt_show_neuron_network(void* p_bmrt);

	/**
	* @name bmrt_get_network_number
	* @brief To get the number of neuron network in the bmruntime
	* @ingroup bmruntime
	*
	* @param [in] p_bmrt Bmruntime that had been created
	*
	* @retval int value The number of neuron networks.
	*/
	DECL_EXPORT int bmrt_get_network_number(void* p_bmrt);

	/**
	* @name bmrt_get_network_names
	* @brief To get the names of all neuron network in the bmruntime
	* @ingroup bmruntime
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [out] network_names The names of all neuron networks. It should be declare as (const char** networks_ = NULL),
	* and use as the param &networks_. After this API, user need to free(networks_) if user
	* do not need it.
	*/
	DECL_EXPORT void bmrt_get_network_names(void* p_bmrt, const char*** network_names);

	/**
	* @name bmrt_get_network_info
	* @brief To get network info by net name
	* @ingroup bmruntime
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [in] net_name Network name
	*
	* @retval bm_net_info_t* Pointer to net info, needn't free by user; if net name not found, will return NULL.
	*/
	DECL_EXPORT const bm_net_info_t* bmrt_get_network_info(void* p_bmrt, const char* net_name);

	/**
	* @name bmrt_launch_tensor
	* @brief To launch the inference of the neuron network with setting input tensors
	* @ingroup bmruntime
	*
	* This API supports the neuron nework that is static-compiled or dynamic-compiled
	* After calling this API, inference on TPU is launched. And the CPU program will not
	* be blocked. bm_thread_sync should be called to make sure inference finished.
	* This API support multiple inputs, and multi thread safety
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [in] net_name The name of the neuron network
	* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num].
	* User should initialize each input tensor.
	* @param [in] input_num Input number
	* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
	* This interface will alloc devcie mem to store output data. User should free each
	* device mem by bm_free_device after the result data not used.
	* @param [in] output_num Output number
	*
	* @retval true Launch success.
	* @retval false Launch failed.
	*/
	DECL_EXPORT bool bmrt_launch_tensor(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num,
	bm_tensor_t output_tensors[], int output_num);

	/**
	* @name bmrt_launch_tensor_ex
	* @brief To launch the inference of the neuron network with setting input tensors
	* @ingroup bmruntime
	*
	* This API supports the neuron nework that is static-compiled or dynamic-compiled
	* After calling this API, inference on TPU is launched. And the CPU program will not
	* be blocked. bm_thread_sync should be called to make sure inference finished.
	* This API support multiple inputs, and multi thread safety
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [in] net_name The name of the neuron network
	* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num],
	* User should initialize each input tensor.
	* @param [in] input_num Input number
	* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
	* User can set device_mem or stmode of output tensors. If user_mem is true, this interface
	* will use device mem of output_tensors to store output data, and not alloc device mem;
	* Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in
	* each output tensor; Or stmode will be BM_STORE_1N as default.
	* @param [in] output_num Output number
	* @param [in] user_mem whether device_mem of output tensors are set
	* @param [in] user_stmode whether stmode of output tensors are set
	*
	* @retval true Launch success.
	* @retval false Launch failed.
	*/
	DECL_EXPORT bool bmrt_launch_tensor_ex(void* p_bmrt, const char * net_name, const bm_tensor_t input_tensors[], int input_num,
	bm_tensor_t output_tensors[], int output_num, bool user_mem, bool user_stmode);

	/**
	* @name bmrt_launch_data
	* @brief To launch the inference of the neuron network with setting input datas in system memory
	* @ingroup bmruntime
	*
	* This API supports the neuron nework that is static-compiled or dynamic-compiled
	* After calling this API, inference on TPU is launched. And the CPU
	* program will be blocked.
	* This API support multiple inputs, and multi thread safety
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [in] net_name The name of the neuron network
	* @param [in] input_datas Array of input data, defined like void * input_datas[input_num]. User should
	* initialize each data pointer as input.
	* @param [in] input_shapes Array of input shape, defined like bm_shape_t input_shapes[input_num].
	* User should set each input shape
	* @param [in] input_num Input number
	* @param [out] output_datas Array of output data, defined like void * output_datas[output_num].
	* If user don't alloc each output data, set user_mem to false, and this api will alloc
	* output mem, user should free each output mem when output data not used. Also
	* user can alloc system memory for each output data by self and set user_mem = true.
	* @param [out] output_shapes Array of output shape, defined like bm_shape_t output_shapes[output_num].
	* It will store each output shape.
	* @param [in] output_num Output number
	* @param [in] user_mem whether output_datas[i] have allocated memory
	*
	* @retval true Launch success.
	* @retval false Launch failed.
	*/
	DECL_EXPORT bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* const input_datas[],
	const bm_shape_t input_shapes[], int input_num, void * output_datas[],
	bm_shape_t output_shapes[], int output_num, bool user_mem);

	/**
	* @name bmrt_trace
	* @brief To check runtime environment, and collect info for DEBUG
	* @ingroup bmruntime
	*
	* This API is to collect runtime info for DEBUG. Expecially when launch result sudden mistake, call bmrt_trace
	* will show whether device mems are broken, and other check info.
	*
	* @param [in] p_bmrt Bmruntime that had been created
	*/
	DECL_EXPORT void bmrt_trace(void* p_bmrt);

	/**
	* @name bmrt_launch_tensor_multi_cores
	* @brief To launch the inference of the neuron network with setting input tensors, and support multi core inference.
	* @ingroup bmruntime
	*
	* This API supports the neuron nework that is static-compiled or dynamic-compiled
	* After calling this API, inference on TPU is launched. And the CPU program will not
	* be blocked. bm_thread_sync_from_core should be called to make sure inference is finished.
	* This API support multiple inputs, and multi thread safety
	*
	* @param [in] p_bmrt Bmruntime that had been created
	* @param [in] net_name The name of the neuron network
	* @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num],
	* User should initialize each input tensor.
	* @param [in] input_num Input number
	* @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num].
	* User can set device_mem or stmode of output tensors. If user_mem is true, this interface
	* will use device mem of output_tensors to store output data, and not alloc device mem;
	* Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in
	* each output tensor; Or stmode will be BM_STORE_1N as default.
	* @param [in] output_num Output number
	* @param [in] user_mem whether device_mem of output tensors are set
	* @param [in] user_stmode whether stmode of output tensors are set
	* @param [in] core_list core id list those will be used to inference
	* @param [in] core_num number of the core list
	*
	* @retval true Launch success.
	* @retval false Launch failed.
	*/
	DECL_EXPORT bool bmrt_launch_tensor_multi_cores(
	void *p_bmrt,
	const char *net_name,
	const bm_tensor_t input_tensors[],
	int input_num,
	bm_tensor_t output_tensors[],
	int output_num,
	bool user_mem,
	bool user_stmode,
	const int *core_list,
	int core_num);

	/**
	* @name bmrt_memcpy_s2d_parallel
	* @brief To copy data from system memory to muti-devices memory in parallel
	* @ingroup bmruntime
	*
	* This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices.
	* After calling this API, datas[:tensor_num[0]] will be copied to the first device, and
	* datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] will be copied to the second device and so on.
	* The process of copying data to different devices is done in parallel and to the same device is in sequence.
	*
	* @param [in] p_bmrt Bmruntime that had been created with multi bm_handles
	* @param [in] tensors Array of tensors that will be copied to devices
	* @param [in] datas Array of satas allocated in system memory
	* @param [in] tensor_num Array of tensor_num that will be copied to each device
	* @param [in] device_num Device number
	*/
	DECL_EXPORT bool bmrt_memcpy_s2d_parallel(
	void *p_bmrt,
	bm_tensor_t tensors[],
	void *datas[],
	int tensor_num[],
	int device_num);

	/**
	* @name bmrt_memcpy_d2s_parallel
	* @brief To copy data from muti-devices memory to system memory in parallel
	* @ingroup bmruntime
	*
	* This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices.
	* After calling this API, tensors on the first device will be copied to datas[:tensor_num[0]] , and
	* tensors on the second device will be copied to datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] and so on.
	* The process of copying data from different devices is done in parallel and from the same device is in sequence.
	*
	* @param [in] p_bmrt Bmruntime that had been created with multi bm_handles
	* @param [in] datas Array of satas allocated in system memory
	* @param [in] tensors Array of tensors that will be copied from devices
	* @param [in] tensor_num Array of tensor_num that will be copied from each device
	* @param [in] device_num Device number
	*/
	DECL_EXPORT bool bmrt_memcpy_d2s_parallel(
	void *p_bmrt,
	void *datas[],
	bm_tensor_t tensors[],
	int tensor_num[],
	int device_num);

	#if defined (__cplusplus)
	}
	#endif

	#endif