// Tencent is pleased to support the open source community by making ncnn available. // // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. // // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except // in compliance with the License. You may obtain a copy of the License at // // https://opensource.org/licenses/BSD-3-Clause // // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. #ifndef NCNN_OPTION_H #define NCNN_OPTION_H #include "platform.h" namespace ncnn { #if NCNN_VULKAN class VkAllocator; class PipelineCache; #endif // NCNN_VULKAN class Allocator; class NCNN_EXPORT Option { public: // default option Option(); public: // light mode // intermediate blob will be recycled when enabled // enabled by default bool lightmode; // thread count // default value is the one returned by get_cpu_count() int num_threads; // blob memory allocator Allocator* blob_allocator; // workspace memory allocator Allocator* workspace_allocator; #if NCNN_VULKAN // blob memory allocator VkAllocator* blob_vkallocator; // workspace memory allocator VkAllocator* workspace_vkallocator; // staging memory allocator VkAllocator* staging_vkallocator; // pipeline cache PipelineCache* pipeline_cache; #endif // NCNN_VULKAN // the time openmp threads busy-wait for more work before going to sleep // default value is 20ms to keep the cores enabled // without too much extra power consumption afterwards int openmp_blocktime; // enable winograd convolution optimization // improve convolution 3x3 stride1 performance, may consume more memory // changes should be applied before loading network structure and weight // enabled by default bool use_winograd_convolution; // enable sgemm convolution optimization // improve convolution 1x1 stride1 performance, may consume more memory // changes should be applied before loading network structure and weight // enabled by default bool use_sgemm_convolution; // enable quantized int8 inference // use low-precision int8 path for quantized model // changes should be applied before loading network structure and weight // enabled by default bool use_int8_inference; // enable vulkan compute bool use_vulkan_compute; // enable bf16 data type for storage // improve most operator performance on all arm devices, may consume more memory bool use_bf16_storage; // enable options for gpu inference bool use_fp16_packed; bool use_fp16_storage; bool use_fp16_arithmetic; bool use_int8_packed; bool use_int8_storage; bool use_int8_arithmetic; // enable simd-friendly packed memory layout // improve all operator performance on all arm devices, will consume more memory // changes should be applied before loading network structure and weight // enabled by default bool use_packing_layout; bool use_shader_pack8; // subgroup option bool use_subgroup_basic; bool use_subgroup_vote; bool use_subgroup_ballot; bool use_subgroup_shuffle; // turn on for adreno bool use_image_storage; bool use_tensor_storage; bool use_reserved_0; // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) // default value is 3 // 0 = DAZ OFF, FTZ OFF // 1 = DAZ ON , FTZ OFF // 2 = DAZ OFF, FTZ ON // 3 = DAZ ON, FTZ ON int flush_denormals; bool use_local_pool_allocator; // enable local memory optimization for gpu inference bool use_shader_local_memory; // enable cooperative matrix optimization for gpu inference bool use_cooperative_matrix; // more fine-grained control of winograd convolution bool use_winograd23_convolution; bool use_winograd43_convolution; bool use_winograd63_convolution; // this option is turned on for A53/A55 automatically // but you can force this on/off if you wish bool use_a53_a55_optimized_kernel; // enable options for shared variables in gpu shader bool use_fp16_uniform; bool use_int8_uniform; bool use_reserved_9; bool use_reserved_10; bool use_reserved_11; }; } // namespace ncnn #endif // NCNN_OPTION_H