ss928_framework/thridpart/ncnn/include/option.h

// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef NCNN_OPTION_H
#define NCNN_OPTION_H

#include "platform.h"

namespace ncnn {

#if NCNN_VULKAN
class VkAllocator;
class PipelineCache;
#endif // NCNN_VULKAN

class Allocator;
class NCNN_EXPORT Option
{
public:
    // default option
    Option();

public:
    // light mode
    // intermediate blob will be recycled when enabled
    // enabled by default
    bool lightmode;

    // thread count
    // default value is the one returned by get_cpu_count()
    int num_threads;

    // blob memory allocator
    Allocator* blob_allocator;

    // workspace memory allocator
    Allocator* workspace_allocator;

#if NCNN_VULKAN
    // blob memory allocator
    VkAllocator* blob_vkallocator;

    // workspace memory allocator
    VkAllocator* workspace_vkallocator;

    // staging memory allocator
    VkAllocator* staging_vkallocator;

    // pipeline cache
    PipelineCache* pipeline_cache;
#endif // NCNN_VULKAN

    // the time openmp threads busy-wait for more work before going to sleep
    // default value is 20ms to keep the cores enabled
    // without too much extra power consumption afterwards
    int openmp_blocktime;

    // enable winograd convolution optimization
    // improve convolution 3x3 stride1 performance, may consume more memory
    // changes should be applied before loading network structure and weight
    // enabled by default
    bool use_winograd_convolution;

    // enable sgemm convolution optimization
    // improve convolution 1x1 stride1 performance, may consume more memory
    // changes should be applied before loading network structure and weight
    // enabled by default
    bool use_sgemm_convolution;

    // enable quantized int8 inference
    // use low-precision int8 path for quantized model
    // changes should be applied before loading network structure and weight
    // enabled by default
    bool use_int8_inference;

    // enable vulkan compute
    bool use_vulkan_compute;

    // enable bf16 data type for storage
    // improve most operator performance on all arm devices, may consume more memory
    bool use_bf16_storage;

    // enable options for gpu inference
    bool use_fp16_packed;
    bool use_fp16_storage;
    bool use_fp16_arithmetic;
    bool use_int8_packed;
    bool use_int8_storage;
    bool use_int8_arithmetic;

    // enable simd-friendly packed memory layout
    // improve all operator performance on all arm devices, will consume more memory
    // changes should be applied before loading network structure and weight
    // enabled by default
    bool use_packing_layout;

    bool use_shader_pack8;

    // subgroup option
    bool use_subgroup_basic;
    bool use_subgroup_vote;
    bool use_subgroup_ballot;
    bool use_subgroup_shuffle;

    // turn on for adreno
    bool use_image_storage;
    bool use_tensor_storage;

    bool use_reserved_0;

    // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)
    // default value is 3
    // 0 = DAZ OFF, FTZ OFF
    // 1 = DAZ ON , FTZ OFF
    // 2 = DAZ OFF, FTZ ON
    // 3 = DAZ ON,  FTZ ON
    int flush_denormals;

    bool use_local_pool_allocator;

    // enable local memory optimization for gpu inference
    bool use_shader_local_memory;

    // enable cooperative matrix optimization for gpu inference
    bool use_cooperative_matrix;

    // more fine-grained control of winograd convolution
    bool use_winograd23_convolution;
    bool use_winograd43_convolution;
    bool use_winograd63_convolution;

    // this option is turned on for A53/A55 automatically
    // but you can force this on/off if you wish
    bool use_a53_a55_optimized_kernel;

    // enable options for shared variables in gpu shader
    bool use_fp16_uniform;
    bool use_int8_uniform;

    bool use_reserved_9;
    bool use_reserved_10;
    bool use_reserved_11;
};

} // namespace ncnn

#endif // NCNN_OPTION_H
first commit 2024-12-16 13:31:45 +08:00			`// Tencent is pleased to support the open source community by making ncnn available.`
			`//`
			`// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.`
			`//`
			`// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except`
			`// in compliance with the License. You may obtain a copy of the License at`
			`//`
			`// https://opensource.org/licenses/BSD-3-Clause`
			`//`
			`// Unless required by applicable law or agreed to in writing, software distributed`
			`// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR`
			`// CONDITIONS OF ANY KIND, either express or implied. See the License for the`
			`// specific language governing permissions and limitations under the License.`

			`#ifndef NCNN_OPTION_H`
			`#define NCNN_OPTION_H`

			`#include "platform.h"`

			`namespace ncnn {`

			`#if NCNN_VULKAN`
			`class VkAllocator;`
			`class PipelineCache;`
			`#endif // NCNN_VULKAN`

			`class Allocator;`
			`class NCNN_EXPORT Option`
			`{`
			`public:`
			`// default option`
			`Option();`

			`public:`
			`// light mode`
			`// intermediate blob will be recycled when enabled`
			`// enabled by default`
			`bool lightmode;`

			`// thread count`
			`// default value is the one returned by get_cpu_count()`
			`int num_threads;`

			`// blob memory allocator`
			`Allocator* blob_allocator;`

			`// workspace memory allocator`
			`Allocator* workspace_allocator;`

			`#if NCNN_VULKAN`
			`// blob memory allocator`
			`VkAllocator* blob_vkallocator;`

			`// workspace memory allocator`
			`VkAllocator* workspace_vkallocator;`

			`// staging memory allocator`
			`VkAllocator* staging_vkallocator;`

			`// pipeline cache`
			`PipelineCache* pipeline_cache;`
			`#endif // NCNN_VULKAN`

			`// the time openmp threads busy-wait for more work before going to sleep`
			`// default value is 20ms to keep the cores enabled`
			`// without too much extra power consumption afterwards`
			`int openmp_blocktime;`

			`// enable winograd convolution optimization`
			`// improve convolution 3x3 stride1 performance, may consume more memory`
			`// changes should be applied before loading network structure and weight`
			`// enabled by default`
			`bool use_winograd_convolution;`

			`// enable sgemm convolution optimization`
			`// improve convolution 1x1 stride1 performance, may consume more memory`
			`// changes should be applied before loading network structure and weight`
			`// enabled by default`
			`bool use_sgemm_convolution;`

			`// enable quantized int8 inference`
			`// use low-precision int8 path for quantized model`
			`// changes should be applied before loading network structure and weight`
			`// enabled by default`
			`bool use_int8_inference;`

			`// enable vulkan compute`
			`bool use_vulkan_compute;`

			`// enable bf16 data type for storage`
			`// improve most operator performance on all arm devices, may consume more memory`
			`bool use_bf16_storage;`

			`// enable options for gpu inference`
			`bool use_fp16_packed;`
			`bool use_fp16_storage;`
			`bool use_fp16_arithmetic;`
			`bool use_int8_packed;`
			`bool use_int8_storage;`
			`bool use_int8_arithmetic;`

			`// enable simd-friendly packed memory layout`
			`// improve all operator performance on all arm devices, will consume more memory`
			`// changes should be applied before loading network structure and weight`
			`// enabled by default`
			`bool use_packing_layout;`

			`bool use_shader_pack8;`

			`// subgroup option`
			`bool use_subgroup_basic;`
			`bool use_subgroup_vote;`
			`bool use_subgroup_ballot;`
			`bool use_subgroup_shuffle;`

			`// turn on for adreno`
			`bool use_image_storage;`
			`bool use_tensor_storage;`

			`bool use_reserved_0;`

			`// enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)`
			`// default value is 3`
			`// 0 = DAZ OFF, FTZ OFF`
			`// 1 = DAZ ON , FTZ OFF`
			`// 2 = DAZ OFF, FTZ ON`
			`// 3 = DAZ ON, FTZ ON`
			`int flush_denormals;`

			`bool use_local_pool_allocator;`

			`// enable local memory optimization for gpu inference`
			`bool use_shader_local_memory;`

			`// enable cooperative matrix optimization for gpu inference`
			`bool use_cooperative_matrix;`

			`// more fine-grained control of winograd convolution`
			`bool use_winograd23_convolution;`
			`bool use_winograd43_convolution;`
			`bool use_winograd63_convolution;`

			`// this option is turned on for A53/A55 automatically`
			`// but you can force this on/off if you wish`
			`bool use_a53_a55_optimized_kernel;`

			`// enable options for shared variables in gpu shader`
			`bool use_fp16_uniform;`
			`bool use_int8_uniform;`

			`bool use_reserved_9;`
			`bool use_reserved_10;`
			`bool use_reserved_11;`
			`};`

			`} // namespace ncnn`

			`#endif // NCNN_OPTION_H`