159 lines
4.5 KiB
C
159 lines
4.5 KiB
C
![]() |
// Tencent is pleased to support the open source community by making ncnn available.
|
||
|
//
|
||
|
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
|
||
|
//
|
||
|
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||
|
// in compliance with the License. You may obtain a copy of the License at
|
||
|
//
|
||
|
// https://opensource.org/licenses/BSD-3-Clause
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software distributed
|
||
|
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||
|
// specific language governing permissions and limitations under the License.
|
||
|
|
||
|
#ifndef NCNN_OPTION_H
|
||
|
#define NCNN_OPTION_H
|
||
|
|
||
|
#include "platform.h"
|
||
|
|
||
|
namespace ncnn {
|
||
|
|
||
|
#if NCNN_VULKAN
|
||
|
class VkAllocator;
|
||
|
class PipelineCache;
|
||
|
#endif // NCNN_VULKAN
|
||
|
|
||
|
class Allocator;
|
||
|
class NCNN_EXPORT Option
|
||
|
{
|
||
|
public:
|
||
|
// default option
|
||
|
Option();
|
||
|
|
||
|
public:
|
||
|
// light mode
|
||
|
// intermediate blob will be recycled when enabled
|
||
|
// enabled by default
|
||
|
bool lightmode;
|
||
|
|
||
|
// thread count
|
||
|
// default value is the one returned by get_cpu_count()
|
||
|
int num_threads;
|
||
|
|
||
|
// blob memory allocator
|
||
|
Allocator* blob_allocator;
|
||
|
|
||
|
// workspace memory allocator
|
||
|
Allocator* workspace_allocator;
|
||
|
|
||
|
#if NCNN_VULKAN
|
||
|
// blob memory allocator
|
||
|
VkAllocator* blob_vkallocator;
|
||
|
|
||
|
// workspace memory allocator
|
||
|
VkAllocator* workspace_vkallocator;
|
||
|
|
||
|
// staging memory allocator
|
||
|
VkAllocator* staging_vkallocator;
|
||
|
|
||
|
// pipeline cache
|
||
|
PipelineCache* pipeline_cache;
|
||
|
#endif // NCNN_VULKAN
|
||
|
|
||
|
// the time openmp threads busy-wait for more work before going to sleep
|
||
|
// default value is 20ms to keep the cores enabled
|
||
|
// without too much extra power consumption afterwards
|
||
|
int openmp_blocktime;
|
||
|
|
||
|
// enable winograd convolution optimization
|
||
|
// improve convolution 3x3 stride1 performance, may consume more memory
|
||
|
// changes should be applied before loading network structure and weight
|
||
|
// enabled by default
|
||
|
bool use_winograd_convolution;
|
||
|
|
||
|
// enable sgemm convolution optimization
|
||
|
// improve convolution 1x1 stride1 performance, may consume more memory
|
||
|
// changes should be applied before loading network structure and weight
|
||
|
// enabled by default
|
||
|
bool use_sgemm_convolution;
|
||
|
|
||
|
// enable quantized int8 inference
|
||
|
// use low-precision int8 path for quantized model
|
||
|
// changes should be applied before loading network structure and weight
|
||
|
// enabled by default
|
||
|
bool use_int8_inference;
|
||
|
|
||
|
// enable vulkan compute
|
||
|
bool use_vulkan_compute;
|
||
|
|
||
|
// enable bf16 data type for storage
|
||
|
// improve most operator performance on all arm devices, may consume more memory
|
||
|
bool use_bf16_storage;
|
||
|
|
||
|
// enable options for gpu inference
|
||
|
bool use_fp16_packed;
|
||
|
bool use_fp16_storage;
|
||
|
bool use_fp16_arithmetic;
|
||
|
bool use_int8_packed;
|
||
|
bool use_int8_storage;
|
||
|
bool use_int8_arithmetic;
|
||
|
|
||
|
// enable simd-friendly packed memory layout
|
||
|
// improve all operator performance on all arm devices, will consume more memory
|
||
|
// changes should be applied before loading network structure and weight
|
||
|
// enabled by default
|
||
|
bool use_packing_layout;
|
||
|
|
||
|
bool use_shader_pack8;
|
||
|
|
||
|
// subgroup option
|
||
|
bool use_subgroup_basic;
|
||
|
bool use_subgroup_vote;
|
||
|
bool use_subgroup_ballot;
|
||
|
bool use_subgroup_shuffle;
|
||
|
|
||
|
// turn on for adreno
|
||
|
bool use_image_storage;
|
||
|
bool use_tensor_storage;
|
||
|
|
||
|
bool use_reserved_0;
|
||
|
|
||
|
// enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)
|
||
|
// default value is 3
|
||
|
// 0 = DAZ OFF, FTZ OFF
|
||
|
// 1 = DAZ ON , FTZ OFF
|
||
|
// 2 = DAZ OFF, FTZ ON
|
||
|
// 3 = DAZ ON, FTZ ON
|
||
|
int flush_denormals;
|
||
|
|
||
|
bool use_local_pool_allocator;
|
||
|
|
||
|
// enable local memory optimization for gpu inference
|
||
|
bool use_shader_local_memory;
|
||
|
|
||
|
// enable cooperative matrix optimization for gpu inference
|
||
|
bool use_cooperative_matrix;
|
||
|
|
||
|
// more fine-grained control of winograd convolution
|
||
|
bool use_winograd23_convolution;
|
||
|
bool use_winograd43_convolution;
|
||
|
bool use_winograd63_convolution;
|
||
|
|
||
|
// this option is turned on for A53/A55 automatically
|
||
|
// but you can force this on/off if you wish
|
||
|
bool use_a53_a55_optimized_kernel;
|
||
|
|
||
|
// enable options for shared variables in gpu shader
|
||
|
bool use_fp16_uniform;
|
||
|
bool use_int8_uniform;
|
||
|
|
||
|
bool use_reserved_9;
|
||
|
bool use_reserved_10;
|
||
|
bool use_reserved_11;
|
||
|
};
|
||
|
|
||
|
} // namespace ncnn
|
||
|
|
||
|
#endif // NCNN_OPTION_H
|