ss928_framework/libapi/svp_npu/libapi_npu_process.c

511 lines
14 KiB
C
Raw Normal View History

2024-12-16 13:31:45 +08:00
/*
Copyright (c), 2001-2022, Shenshu Tech. Co., Ltd.
*/
#include "libapi_npu_process.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <semaphore.h>
#include <pthread.h>
#include <limits.h>
#include "ot_common_svp.h"
#include "libapi_common_svp.h"
#include "libapi_npu_model.h"
static td_u32 g_npu_dev_id = 0;
static td_s32 libapi_svp_npu_fill_input_data(td_void *dev_buf, size_t buf_size)
{
td_s32 ret;
td_char path[PATH_MAX] = { 0 };
size_t file_size;
if (realpath("testyuv.sp420", path) == TD_NULL) {
macro_svp_trace_err("Invalid file!.\n");
return TD_FAILURE;
}
FILE *fp = fopen(path, "rb");
macro_svp_check_exps_return(fp == TD_NULL, TD_FAILURE, ENUM_SVP_ERR_LEVEL_ERROR,
"open image file failed!\n");
ret = fseek(fp, 0L, SEEK_END);
macro_svp_check_exps_goto(ret == -1, end, ENUM_SVP_ERR_LEVEL_ERROR, "Fseek failed!\n");
file_size = ftell(fp);
macro_svp_check_exps_goto(file_size == 0, end, ENUM_SVP_ERR_LEVEL_ERROR, "Ftell failed!\n");
ret = fseek(fp, 0L, SEEK_SET);
macro_svp_check_exps_goto(ret == -1, end, ENUM_SVP_ERR_LEVEL_ERROR, "Fseek failed!\n");
file_size = (file_size > buf_size) ? buf_size : file_size;
ret = fread(dev_buf, file_size, 1, fp);
macro_svp_check_exps_goto(ret != 1, end, ENUM_SVP_ERR_LEVEL_ERROR, "Read file failed!\n");
if (fp != TD_NULL) {
fclose(fp);
}
return TD_SUCCESS;
end:
if (fp != TD_NULL) {
fclose(fp);
}
return TD_FAILURE;
}
static td_void libapi_svp_npu_destroy_resource(td_void)
{
aclError ret;
ret = aclrtResetDevice(g_npu_dev_id);
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("reset device fail\n");
}
macro_svp_trace_info("end to reset device is %d\n", g_npu_dev_id);
ret = aclFinalize();
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("finalize acl fail\n");
}
macro_svp_trace_info("end to finalize acl\n");
}
static td_s32 libapi_svp_npu_init_resource(td_void)
{
/* ACL init */
const char *acl_config_path = "";
aclrtRunMode run_mode;
td_s32 ret;
ret = aclInit(acl_config_path);
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("acl init fail.\n");
return TD_FAILURE;
}
macro_svp_trace_info("acl init success.\n");
/* open device */
ret = aclrtSetDevice(g_npu_dev_id);
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("acl open device %d fail.\n", g_npu_dev_id);
return TD_FAILURE;
}
macro_svp_trace_info("open device %d success.\n", g_npu_dev_id);
/* get run mode */
ret = aclrtGetRunMode(&run_mode);
if ((ret != ACL_ERROR_NONE) || (run_mode != ACL_DEVICE)) {
macro_svp_trace_err("acl get run mode fail.\n");
return TD_FAILURE;
}
macro_svp_trace_info("get run mode success\n");
return TD_SUCCESS;
}
static td_void libapi_svp_npu_acl_resnet50_stop(td_void)
{
libapi_svp_npu_destroy_resource();
printf("\033[0;31mprogram termination abnormally!\033[0;39m\n");
}
static td_s32 libapi_svp_npu_acl_prepare_init()
{
td_s32 ret;
ret = libapi_svp_npu_init_resource();
if (ret != TD_SUCCESS) {
libapi_svp_npu_destroy_resource();
}
return ret;
}
static td_void libapi_svp_npu_acl_prepare_exit(td_u32 thread_num)
{
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
libapi_npu_destroy_desc(model_index);
libapi_npu_unload_model(model_index);
}
libapi_svp_npu_destroy_resource();
}
static td_s32 libapi_svp_npu_load_model(const char* om_model_path, td_u32 model_index, td_bool is_cached)
{
td_char path[PATH_MAX] = { 0 };
td_s32 ret;
if (sizeof(om_model_path) > PATH_MAX) {
macro_svp_trace_err("pathname too long!.\n");
return TD_NULL;
}
if (realpath(om_model_path, path) == TD_NULL) {
macro_svp_trace_err("invalid file!.\n");
return TD_NULL;
}
if (is_cached == TD_TRUE) {
ret = libapi_npu_load_model_with_mem_cached(path, model_index);
} else {
ret = libapi_npu_load_model_with_mem(path, model_index);
}
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute load model fail, model_index is:%d.\n", model_index);
goto acl_prepare_end1;
}
ret = libapi_npu_create_desc(model_index);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute create desc fail.\n");
goto acl_prepare_end2;
}
return TD_SUCCESS;
acl_prepare_end2:
libapi_npu_destroy_desc(model_index);
acl_prepare_end1:
libapi_npu_unload_model(model_index);
return ret;
}
static td_s32 libapi_svp_npu_dataset_prepare_init(td_u32 model_index)
{
td_s32 ret;
ret = libapi_npu_create_input_dataset(model_index);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute create input fail.\n");
return TD_FAILURE;
}
ret = libapi_npu_create_output(model_index);
if (ret != TD_SUCCESS) {
libapi_npu_destroy_input_dataset(model_index);
macro_svp_trace_err("execute create output fail.\n");
return TD_FAILURE;
}
return TD_SUCCESS;
}
static td_s32 libapi_svp_npu_create_cached_input_output(td_u32 model_index)
{
td_s32 ret;
ret = libapi_npu_create_cached_input(model_index);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute create input fail.\n");
return TD_FAILURE;
}
ret = libapi_npu_create_cached_output(model_index);
if (ret != TD_SUCCESS) {
libapi_npu_destroy_cached_input(model_index);
macro_svp_trace_err("execute create output fail.\n");
return TD_FAILURE;
}
return TD_SUCCESS;
}
static td_void libapi_svp_npu_dataset_prepare_exit(td_u32 thread_num)
{
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
libapi_npu_destroy_output(model_index);
libapi_npu_destroy_input_dataset(model_index);
}
}
static td_void libapi_svp_npu_release_input_data(td_void **data_buf, size_t *data_len, td_u32 thread_num)
{
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
ot_unused(data_len[model_index]);
(td_void)aclrtFree(data_buf[model_index]);
}
}
static td_s32 libapi_svp_npu_get_input_data(td_void **data_buf, size_t *data_len, td_u32 model_index)
{
size_t buf_size;
td_s32 ret;
ret = libapi_npu_get_input_size_by_index(0, &buf_size, model_index);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute get input size fail.\n");
return TD_FAILURE;
}
ret = aclrtMalloc(data_buf, buf_size, ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("malloc device buffer fail. size is %zu, errorCode is %d.\n", buf_size, ret);
return TD_FAILURE;
}
// ret = libapi_svp_npu_fill_input_data(*data_buf, buf_size);
// if (ret != TD_SUCCESS) {
// macro_svp_trace_err("memcpy_s device buffer fail.\n");
// (td_void)aclrtFree(data_buf);
// return TD_FAILURE;
// }
*data_len = buf_size;
macro_svp_trace_info("get input data success\n");
return TD_SUCCESS;
}
static td_s32 libapi_svp_npu_create_input_databuf(td_void *data_buf, size_t data_len, td_u32 model_index)
{
return libapi_npu_create_input_databuf(data_buf, data_len, model_index);
}
static td_void libapi_svp_npu_destroy_input_databuf(td_u32 thread_num)
{
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
libapi_npu_destroy_input_databuf(model_index);
}
}
void *libapi_svp_execute_func_contious(void *args)
{
td_s32 ret;
td_u32 model_index = *(td_u32 *)args;
ret = aclrtSetDevice(g_npu_dev_id);
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("acl open device %d fail.\n", g_npu_dev_id);
return NULL;
}
macro_svp_trace_info("open device %d success.\n", g_npu_dev_id);
ret = libapi_npu_model_execute(model_index);
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("execute inference failed of thread[%d].\n", model_index);
}
ret = aclrtResetDevice(g_npu_dev_id);
if (ret != ACL_ERROR_NONE) {
macro_svp_trace_err("model[%d]reset device failed\n", model_index);
}
return NULL;
}
static td_void libapi_svp_npu_model_execute_multithread()
{
pthread_t execute_threads[MAX_THREAD_NUM] = {0};
td_u32 index[MAX_THREAD_NUM];
td_u32 model_index;
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
index[model_index] = model_index;
pthread_create(&execute_threads[model_index], NULL, libapi_svp_execute_func_contious,
(void *)&index[model_index]);
}
void *waitret[MAX_THREAD_NUM];
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
pthread_join(execute_threads[model_index], &waitret[model_index]);
}
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
libapi_npu_output_model_result(model_index);
}
}
/* function : show the sample of npu resnet50_multithread */
td_void libapi_svp_npu_acl_resnet50_multithread(td_void)
{
td_void *data_buf[MAX_THREAD_NUM] = {TD_NULL};
size_t buf_size[MAX_THREAD_NUM];
td_u32 model_index;
td_s32 ret;
const char *om_model_path = "./data/model/resnet50.om";
ret = libapi_svp_npu_acl_prepare_init();
if (ret != TD_SUCCESS) {
return;
}
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
ret = libapi_svp_npu_load_model(om_model_path, model_index, TD_FALSE);
if (ret != TD_SUCCESS) {
goto acl_process_end0;
}
ret = libapi_svp_npu_dataset_prepare_init(model_index);
if (ret != TD_SUCCESS) {
goto acl_process_end1;
}
ret = libapi_svp_npu_get_input_data(&data_buf[model_index], &buf_size[model_index], model_index);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute create input fail.\n");
goto acl_process_end2;
}
ret = libapi_svp_npu_create_input_databuf(data_buf[model_index], buf_size[model_index], model_index);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("memcpy_s device buffer fail.\n");
goto acl_process_end3;
}
}
libapi_svp_npu_model_execute_multithread();
acl_process_end3:
libapi_svp_npu_destroy_input_databuf(MAX_THREAD_NUM);
acl_process_end2:
libapi_svp_npu_release_input_data(data_buf, buf_size, MAX_THREAD_NUM);
acl_process_end1:
libapi_svp_npu_dataset_prepare_exit(MAX_THREAD_NUM);
acl_process_end0:
libapi_svp_npu_acl_prepare_exit(MAX_THREAD_NUM);
}
td_void libapi_svp_npu_acl_mobilenet_v3_dynamicbatch(td_void)
{
td_s32 ret;
const char *om_model_path = "./data/model/mobilenet_v3_dynamic_batch.om";
ret = libapi_svp_npu_acl_prepare_init();
if (ret != TD_SUCCESS) {
return;
}
ret = libapi_svp_npu_load_model(om_model_path, 0, TD_TRUE);
if (ret != TD_SUCCESS) {
goto acl_process_end0;
}
ret = libapi_svp_npu_create_cached_input_output(0);
if (ret != TD_SUCCESS) {
goto acl_process_end0;
}
libapi_svp_npu_loop_execute_dynamicbatch(0);
libapi_npu_destroy_cached_input(0);
libapi_npu_destroy_cached_output(0);
acl_process_end0:
libapi_npu_destroy_desc(0);
libapi_npu_unload_model_cached(0);
}
/* function : show the sample of npu resnet50 */
td_void libapi_svp_npu_acl_resnet50(td_void)
{
td_void *data_buf = TD_NULL;
size_t buf_size;
td_s32 ret;
// const char *om_model_path = "./data/model/resnet50.om";
const char *om_model_path = "yolov5s_v6.2.om";
ret = libapi_svp_npu_acl_prepare_init(om_model_path);
if (ret != TD_SUCCESS) {
return;
}
ret = libapi_svp_npu_load_model(om_model_path, 0, TD_FALSE);
if (ret != TD_SUCCESS) {
goto acl_process_end0;
}
ret = libapi_svp_npu_dataset_prepare_init(0);
if (ret != TD_SUCCESS) {
goto acl_process_end0;
}
ret = libapi_svp_npu_get_input_data(&data_buf, &buf_size, 0);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute create input fail.\n");
goto acl_process_end1;
}
ret = libapi_svp_npu_create_input_databuf(data_buf, buf_size, 0);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("memcpy_s device buffer fail.\n");
goto acl_process_end2;
}
ret = libapi_npu_model_execute(0);
if (ret != TD_SUCCESS) {
macro_svp_trace_err("execute inference fail.\n");
goto acl_process_end3;
}
libapi_npu_output_model_result(0);
acl_process_end3:
libapi_svp_npu_destroy_input_databuf(1);
acl_process_end2:
libapi_svp_npu_release_input_data(&data_buf, &buf_size, 1);
acl_process_end1:
libapi_svp_npu_dataset_prepare_exit(1);
acl_process_end0:
libapi_svp_npu_acl_prepare_exit(1);
}
/* function : npu resnet50 sample signal handle */
td_void libapi_svp_npu_acl_resnet50_handle_sig(td_void)
{
libapi_svp_npu_acl_resnet50_stop();
}
// /* function : show the sample of npu resnet50 */
// td_void libapi_svp_npu_acl_resnet50(const char* om_model_path)
// {
// td_void *data_buf = TD_NULL;
// size_t buf_size;
// td_s32 ret;
// // const char *om_model_path = "./data/model/resnet50.om";
// const char *om_model_path = "yolov5s_v6.2.om";
// ret = libapi_svp_npu_acl_prepare_init(om_model_path);
// if (ret != TD_SUCCESS) {
// return;
// }
// ret = libapi_svp_npu_load_model(om_model_path, 0, TD_FALSE);
// if (ret != TD_SUCCESS) {
// goto acl_process_end0;
// }
// ret = libapi_svp_npu_dataset_prepare_init(0);
// if (ret != TD_SUCCESS) {
// goto acl_process_end0;
// }
// ret = libapi_svp_npu_get_input_data(&data_buf, &buf_size, 0);
// if (ret != TD_SUCCESS) {
// macro_svp_trace_err("execute create input fail.\n");
// goto acl_process_end1;
// }
// ret = libapi_svp_npu_create_input_databuf(data_buf, buf_size, 0);
// if (ret != TD_SUCCESS) {
// macro_svp_trace_err("memcpy_s device buffer fail.\n");
// goto acl_process_end2;
// }
// ret = libapi_npu_model_execute(0);
// if (ret != TD_SUCCESS) {
// macro_svp_trace_err("execute inference fail.\n");
// goto acl_process_end3;
// }
// libapi_npu_output_model_result(0);
// acl_process_end3:
// libapi_svp_npu_destroy_input_databuf(1);
// acl_process_end2:
// libapi_svp_npu_release_input_data(&data_buf, &buf_size, 1);
// acl_process_end1:
// libapi_svp_npu_dataset_prepare_exit(1);
// acl_process_end0:
// libapi_svp_npu_acl_prepare_exit(1);
// }