511 lines
14 KiB
C
Executable File
511 lines
14 KiB
C
Executable File
/*
|
|
Copyright (c), 2001-2022, Shenshu Tech. Co., Ltd.
|
|
*/
|
|
|
|
#include "libapi_npu_process.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <signal.h>
|
|
#include <semaphore.h>
|
|
#include <pthread.h>
|
|
#include <limits.h>
|
|
|
|
#include "ot_common_svp.h"
|
|
#include "libapi_common_svp.h"
|
|
#include "libapi_npu_model.h"
|
|
|
|
static td_u32 g_npu_dev_id = 0;
|
|
static td_s32 libapi_svp_npu_fill_input_data(td_void *dev_buf, size_t buf_size)
|
|
{
|
|
td_s32 ret;
|
|
td_char path[PATH_MAX] = { 0 };
|
|
size_t file_size;
|
|
|
|
if (realpath("testyuv.sp420", path) == TD_NULL) {
|
|
macro_svp_trace_err("Invalid file!.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
|
|
FILE *fp = fopen(path, "rb");
|
|
macro_svp_check_exps_return(fp == TD_NULL, TD_FAILURE, ENUM_SVP_ERR_LEVEL_ERROR,
|
|
"open image file failed!\n");
|
|
|
|
ret = fseek(fp, 0L, SEEK_END);
|
|
macro_svp_check_exps_goto(ret == -1, end, ENUM_SVP_ERR_LEVEL_ERROR, "Fseek failed!\n");
|
|
file_size = ftell(fp);
|
|
macro_svp_check_exps_goto(file_size == 0, end, ENUM_SVP_ERR_LEVEL_ERROR, "Ftell failed!\n");
|
|
ret = fseek(fp, 0L, SEEK_SET);
|
|
macro_svp_check_exps_goto(ret == -1, end, ENUM_SVP_ERR_LEVEL_ERROR, "Fseek failed!\n");
|
|
|
|
file_size = (file_size > buf_size) ? buf_size : file_size;
|
|
ret = fread(dev_buf, file_size, 1, fp);
|
|
macro_svp_check_exps_goto(ret != 1, end, ENUM_SVP_ERR_LEVEL_ERROR, "Read file failed!\n");
|
|
|
|
if (fp != TD_NULL) {
|
|
fclose(fp);
|
|
}
|
|
return TD_SUCCESS;
|
|
|
|
end:
|
|
if (fp != TD_NULL) {
|
|
fclose(fp);
|
|
}
|
|
return TD_FAILURE;
|
|
}
|
|
|
|
static td_void libapi_svp_npu_destroy_resource(td_void)
|
|
{
|
|
aclError ret;
|
|
|
|
ret = aclrtResetDevice(g_npu_dev_id);
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("reset device fail\n");
|
|
}
|
|
macro_svp_trace_info("end to reset device is %d\n", g_npu_dev_id);
|
|
|
|
ret = aclFinalize();
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("finalize acl fail\n");
|
|
}
|
|
macro_svp_trace_info("end to finalize acl\n");
|
|
}
|
|
|
|
static td_s32 libapi_svp_npu_init_resource(td_void)
|
|
{
|
|
/* ACL init */
|
|
const char *acl_config_path = "";
|
|
aclrtRunMode run_mode;
|
|
td_s32 ret;
|
|
|
|
ret = aclInit(acl_config_path);
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("acl init fail.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
macro_svp_trace_info("acl init success.\n");
|
|
|
|
/* open device */
|
|
ret = aclrtSetDevice(g_npu_dev_id);
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("acl open device %d fail.\n", g_npu_dev_id);
|
|
return TD_FAILURE;
|
|
}
|
|
macro_svp_trace_info("open device %d success.\n", g_npu_dev_id);
|
|
|
|
/* get run mode */
|
|
ret = aclrtGetRunMode(&run_mode);
|
|
if ((ret != ACL_ERROR_NONE) || (run_mode != ACL_DEVICE)) {
|
|
macro_svp_trace_err("acl get run mode fail.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
macro_svp_trace_info("get run mode success\n");
|
|
|
|
return TD_SUCCESS;
|
|
}
|
|
|
|
static td_void libapi_svp_npu_acl_resnet50_stop(td_void)
|
|
{
|
|
libapi_svp_npu_destroy_resource();
|
|
printf("\033[0;31mprogram termination abnormally!\033[0;39m\n");
|
|
}
|
|
|
|
static td_s32 libapi_svp_npu_acl_prepare_init()
|
|
{
|
|
td_s32 ret;
|
|
|
|
ret = libapi_svp_npu_init_resource();
|
|
if (ret != TD_SUCCESS) {
|
|
libapi_svp_npu_destroy_resource();
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static td_void libapi_svp_npu_acl_prepare_exit(td_u32 thread_num)
|
|
{
|
|
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
|
|
libapi_npu_destroy_desc(model_index);
|
|
libapi_npu_unload_model(model_index);
|
|
}
|
|
libapi_svp_npu_destroy_resource();
|
|
}
|
|
|
|
static td_s32 libapi_svp_npu_load_model(const char* om_model_path, td_u32 model_index, td_bool is_cached)
|
|
{
|
|
td_char path[PATH_MAX] = { 0 };
|
|
td_s32 ret;
|
|
|
|
if (sizeof(om_model_path) > PATH_MAX) {
|
|
macro_svp_trace_err("pathname too long!.\n");
|
|
return TD_NULL;
|
|
}
|
|
if (realpath(om_model_path, path) == TD_NULL) {
|
|
macro_svp_trace_err("invalid file!.\n");
|
|
return TD_NULL;
|
|
}
|
|
|
|
if (is_cached == TD_TRUE) {
|
|
ret = libapi_npu_load_model_with_mem_cached(path, model_index);
|
|
} else {
|
|
ret = libapi_npu_load_model_with_mem(path, model_index);
|
|
}
|
|
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute load model fail, model_index is:%d.\n", model_index);
|
|
goto acl_prepare_end1;
|
|
}
|
|
ret = libapi_npu_create_desc(model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute create desc fail.\n");
|
|
goto acl_prepare_end2;
|
|
}
|
|
|
|
return TD_SUCCESS;
|
|
|
|
acl_prepare_end2:
|
|
libapi_npu_destroy_desc(model_index);
|
|
|
|
acl_prepare_end1:
|
|
libapi_npu_unload_model(model_index);
|
|
return ret;
|
|
}
|
|
static td_s32 libapi_svp_npu_dataset_prepare_init(td_u32 model_index)
|
|
{
|
|
td_s32 ret;
|
|
|
|
ret = libapi_npu_create_input_dataset(model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute create input fail.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
ret = libapi_npu_create_output(model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
libapi_npu_destroy_input_dataset(model_index);
|
|
macro_svp_trace_err("execute create output fail.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
return TD_SUCCESS;
|
|
}
|
|
|
|
static td_s32 libapi_svp_npu_create_cached_input_output(td_u32 model_index)
|
|
{
|
|
td_s32 ret;
|
|
|
|
ret = libapi_npu_create_cached_input(model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute create input fail.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
ret = libapi_npu_create_cached_output(model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
libapi_npu_destroy_cached_input(model_index);
|
|
macro_svp_trace_err("execute create output fail.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
return TD_SUCCESS;
|
|
}
|
|
|
|
static td_void libapi_svp_npu_dataset_prepare_exit(td_u32 thread_num)
|
|
{
|
|
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
|
|
libapi_npu_destroy_output(model_index);
|
|
libapi_npu_destroy_input_dataset(model_index);
|
|
}
|
|
}
|
|
|
|
static td_void libapi_svp_npu_release_input_data(td_void **data_buf, size_t *data_len, td_u32 thread_num)
|
|
{
|
|
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
|
|
ot_unused(data_len[model_index]);
|
|
(td_void)aclrtFree(data_buf[model_index]);
|
|
}
|
|
}
|
|
|
|
static td_s32 libapi_svp_npu_get_input_data(td_void **data_buf, size_t *data_len, td_u32 model_index)
|
|
{
|
|
size_t buf_size;
|
|
td_s32 ret;
|
|
|
|
ret = libapi_npu_get_input_size_by_index(0, &buf_size, model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute get input size fail.\n");
|
|
return TD_FAILURE;
|
|
}
|
|
|
|
ret = aclrtMalloc(data_buf, buf_size, ACL_MEM_MALLOC_NORMAL_ONLY);
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("malloc device buffer fail. size is %zu, errorCode is %d.\n", buf_size, ret);
|
|
return TD_FAILURE;
|
|
}
|
|
|
|
// ret = libapi_svp_npu_fill_input_data(*data_buf, buf_size);
|
|
// if (ret != TD_SUCCESS) {
|
|
// macro_svp_trace_err("memcpy_s device buffer fail.\n");
|
|
// (td_void)aclrtFree(data_buf);
|
|
// return TD_FAILURE;
|
|
// }
|
|
|
|
*data_len = buf_size;
|
|
|
|
macro_svp_trace_info("get input data success\n");
|
|
|
|
return TD_SUCCESS;
|
|
}
|
|
|
|
static td_s32 libapi_svp_npu_create_input_databuf(td_void *data_buf, size_t data_len, td_u32 model_index)
|
|
{
|
|
return libapi_npu_create_input_databuf(data_buf, data_len, model_index);
|
|
}
|
|
|
|
static td_void libapi_svp_npu_destroy_input_databuf(td_u32 thread_num)
|
|
{
|
|
for (td_u32 model_index = 0; model_index < thread_num; model_index++) {
|
|
libapi_npu_destroy_input_databuf(model_index);
|
|
}
|
|
}
|
|
|
|
void *libapi_svp_execute_func_contious(void *args)
|
|
{
|
|
td_s32 ret;
|
|
td_u32 model_index = *(td_u32 *)args;
|
|
|
|
ret = aclrtSetDevice(g_npu_dev_id);
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("acl open device %d fail.\n", g_npu_dev_id);
|
|
return NULL;
|
|
}
|
|
|
|
macro_svp_trace_info("open device %d success.\n", g_npu_dev_id);
|
|
|
|
ret = libapi_npu_model_execute(model_index);
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("execute inference failed of thread[%d].\n", model_index);
|
|
}
|
|
|
|
ret = aclrtResetDevice(g_npu_dev_id);
|
|
if (ret != ACL_ERROR_NONE) {
|
|
macro_svp_trace_err("model[%d]reset device failed\n", model_index);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static td_void libapi_svp_npu_model_execute_multithread()
|
|
{
|
|
pthread_t execute_threads[MAX_THREAD_NUM] = {0};
|
|
td_u32 index[MAX_THREAD_NUM];
|
|
td_u32 model_index;
|
|
|
|
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
|
|
index[model_index] = model_index;
|
|
pthread_create(&execute_threads[model_index], NULL, libapi_svp_execute_func_contious,
|
|
(void *)&index[model_index]);
|
|
}
|
|
|
|
void *waitret[MAX_THREAD_NUM];
|
|
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
|
|
pthread_join(execute_threads[model_index], &waitret[model_index]);
|
|
}
|
|
|
|
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
|
|
libapi_npu_output_model_result(model_index);
|
|
}
|
|
}
|
|
|
|
/* function : show the sample of npu resnet50_multithread */
|
|
td_void libapi_svp_npu_acl_resnet50_multithread(td_void)
|
|
{
|
|
td_void *data_buf[MAX_THREAD_NUM] = {TD_NULL};
|
|
size_t buf_size[MAX_THREAD_NUM];
|
|
td_u32 model_index;
|
|
td_s32 ret;
|
|
|
|
const char *om_model_path = "./data/model/resnet50.om";
|
|
ret = libapi_svp_npu_acl_prepare_init();
|
|
if (ret != TD_SUCCESS) {
|
|
return;
|
|
}
|
|
|
|
for (model_index = 0; model_index < MAX_THREAD_NUM; model_index++) {
|
|
ret = libapi_svp_npu_load_model(om_model_path, model_index, TD_FALSE);
|
|
if (ret != TD_SUCCESS) {
|
|
goto acl_process_end0;
|
|
}
|
|
|
|
ret = libapi_svp_npu_dataset_prepare_init(model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
goto acl_process_end1;
|
|
}
|
|
|
|
ret = libapi_svp_npu_get_input_data(&data_buf[model_index], &buf_size[model_index], model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute create input fail.\n");
|
|
goto acl_process_end2;
|
|
}
|
|
|
|
ret = libapi_svp_npu_create_input_databuf(data_buf[model_index], buf_size[model_index], model_index);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("memcpy_s device buffer fail.\n");
|
|
goto acl_process_end3;
|
|
}
|
|
}
|
|
|
|
libapi_svp_npu_model_execute_multithread();
|
|
|
|
acl_process_end3:
|
|
libapi_svp_npu_destroy_input_databuf(MAX_THREAD_NUM);
|
|
acl_process_end2:
|
|
libapi_svp_npu_release_input_data(data_buf, buf_size, MAX_THREAD_NUM);
|
|
acl_process_end1:
|
|
libapi_svp_npu_dataset_prepare_exit(MAX_THREAD_NUM);
|
|
acl_process_end0:
|
|
libapi_svp_npu_acl_prepare_exit(MAX_THREAD_NUM);
|
|
}
|
|
|
|
td_void libapi_svp_npu_acl_mobilenet_v3_dynamicbatch(td_void)
|
|
{
|
|
td_s32 ret;
|
|
const char *om_model_path = "./data/model/mobilenet_v3_dynamic_batch.om";
|
|
ret = libapi_svp_npu_acl_prepare_init();
|
|
if (ret != TD_SUCCESS) {
|
|
return;
|
|
}
|
|
|
|
ret = libapi_svp_npu_load_model(om_model_path, 0, TD_TRUE);
|
|
if (ret != TD_SUCCESS) {
|
|
goto acl_process_end0;
|
|
}
|
|
|
|
ret = libapi_svp_npu_create_cached_input_output(0);
|
|
if (ret != TD_SUCCESS) {
|
|
goto acl_process_end0;
|
|
}
|
|
|
|
libapi_svp_npu_loop_execute_dynamicbatch(0);
|
|
|
|
libapi_npu_destroy_cached_input(0);
|
|
libapi_npu_destroy_cached_output(0);
|
|
acl_process_end0:
|
|
libapi_npu_destroy_desc(0);
|
|
libapi_npu_unload_model_cached(0);
|
|
}
|
|
|
|
/* function : show the sample of npu resnet50 */
|
|
td_void libapi_svp_npu_acl_resnet50(td_void)
|
|
{
|
|
td_void *data_buf = TD_NULL;
|
|
size_t buf_size;
|
|
td_s32 ret;
|
|
|
|
// const char *om_model_path = "./data/model/resnet50.om";
|
|
const char *om_model_path = "yolov5s_v6.2.om";
|
|
ret = libapi_svp_npu_acl_prepare_init(om_model_path);
|
|
if (ret != TD_SUCCESS) {
|
|
return;
|
|
}
|
|
|
|
ret = libapi_svp_npu_load_model(om_model_path, 0, TD_FALSE);
|
|
if (ret != TD_SUCCESS) {
|
|
goto acl_process_end0;
|
|
}
|
|
|
|
ret = libapi_svp_npu_dataset_prepare_init(0);
|
|
if (ret != TD_SUCCESS) {
|
|
goto acl_process_end0;
|
|
}
|
|
|
|
ret = libapi_svp_npu_get_input_data(&data_buf, &buf_size, 0);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute create input fail.\n");
|
|
goto acl_process_end1;
|
|
}
|
|
|
|
ret = libapi_svp_npu_create_input_databuf(data_buf, buf_size, 0);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("memcpy_s device buffer fail.\n");
|
|
goto acl_process_end2;
|
|
}
|
|
|
|
ret = libapi_npu_model_execute(0);
|
|
if (ret != TD_SUCCESS) {
|
|
macro_svp_trace_err("execute inference fail.\n");
|
|
goto acl_process_end3;
|
|
}
|
|
|
|
libapi_npu_output_model_result(0);
|
|
|
|
acl_process_end3:
|
|
libapi_svp_npu_destroy_input_databuf(1);
|
|
acl_process_end2:
|
|
libapi_svp_npu_release_input_data(&data_buf, &buf_size, 1);
|
|
acl_process_end1:
|
|
libapi_svp_npu_dataset_prepare_exit(1);
|
|
acl_process_end0:
|
|
libapi_svp_npu_acl_prepare_exit(1);
|
|
}
|
|
|
|
/* function : npu resnet50 sample signal handle */
|
|
td_void libapi_svp_npu_acl_resnet50_handle_sig(td_void)
|
|
{
|
|
libapi_svp_npu_acl_resnet50_stop();
|
|
}
|
|
|
|
|
|
// /* function : show the sample of npu resnet50 */
|
|
// td_void libapi_svp_npu_acl_resnet50(const char* om_model_path)
|
|
// {
|
|
// td_void *data_buf = TD_NULL;
|
|
// size_t buf_size;
|
|
// td_s32 ret;
|
|
|
|
// // const char *om_model_path = "./data/model/resnet50.om";
|
|
// const char *om_model_path = "yolov5s_v6.2.om";
|
|
// ret = libapi_svp_npu_acl_prepare_init(om_model_path);
|
|
// if (ret != TD_SUCCESS) {
|
|
// return;
|
|
// }
|
|
|
|
// ret = libapi_svp_npu_load_model(om_model_path, 0, TD_FALSE);
|
|
// if (ret != TD_SUCCESS) {
|
|
// goto acl_process_end0;
|
|
// }
|
|
|
|
// ret = libapi_svp_npu_dataset_prepare_init(0);
|
|
// if (ret != TD_SUCCESS) {
|
|
// goto acl_process_end0;
|
|
// }
|
|
|
|
// ret = libapi_svp_npu_get_input_data(&data_buf, &buf_size, 0);
|
|
// if (ret != TD_SUCCESS) {
|
|
// macro_svp_trace_err("execute create input fail.\n");
|
|
// goto acl_process_end1;
|
|
// }
|
|
|
|
// ret = libapi_svp_npu_create_input_databuf(data_buf, buf_size, 0);
|
|
// if (ret != TD_SUCCESS) {
|
|
// macro_svp_trace_err("memcpy_s device buffer fail.\n");
|
|
// goto acl_process_end2;
|
|
// }
|
|
|
|
// ret = libapi_npu_model_execute(0);
|
|
// if (ret != TD_SUCCESS) {
|
|
// macro_svp_trace_err("execute inference fail.\n");
|
|
// goto acl_process_end3;
|
|
// }
|
|
|
|
// libapi_npu_output_model_result(0);
|
|
|
|
// acl_process_end3:
|
|
// libapi_svp_npu_destroy_input_databuf(1);
|
|
// acl_process_end2:
|
|
// libapi_svp_npu_release_input_data(&data_buf, &buf_size, 1);
|
|
// acl_process_end1:
|
|
// libapi_svp_npu_dataset_prepare_exit(1);
|
|
// acl_process_end0:
|
|
// libapi_svp_npu_acl_prepare_exit(1);
|
|
// }
|