179 lines
5.3 KiB
C
179 lines
5.3 KiB
C
![]() |
// Tencent is pleased to support the open source community by making ncnn available.
|
||
|
//
|
||
|
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
|
||
|
//
|
||
|
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||
|
// in compliance with the License. You may obtain a copy of the License at
|
||
|
//
|
||
|
// https://opensource.org/licenses/BSD-3-Clause
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software distributed
|
||
|
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||
|
// specific language governing permissions and limitations under the License.
|
||
|
|
||
|
#ifndef NCNN_CPU_H
|
||
|
#define NCNN_CPU_H
|
||
|
|
||
|
#include <stddef.h>
|
||
|
|
||
|
#if (defined _WIN32 && !(defined __MINGW32__))
|
||
|
#define WIN32_LEAN_AND_MEAN
|
||
|
#include <windows.h>
|
||
|
#endif
|
||
|
#if defined __ANDROID__ || defined __linux__
|
||
|
#include <sched.h> // cpu_set_t
|
||
|
#endif
|
||
|
|
||
|
#include "platform.h"
|
||
|
|
||
|
namespace ncnn {
|
||
|
|
||
|
class NCNN_EXPORT CpuSet
|
||
|
{
|
||
|
public:
|
||
|
CpuSet();
|
||
|
void enable(int cpu);
|
||
|
void disable(int cpu);
|
||
|
void disable_all();
|
||
|
bool is_enabled(int cpu) const;
|
||
|
int num_enabled() const;
|
||
|
|
||
|
public:
|
||
|
#if (defined _WIN32 && !(defined __MINGW32__))
|
||
|
ULONG_PTR mask;
|
||
|
#endif
|
||
|
#if defined __ANDROID__ || defined __linux__
|
||
|
cpu_set_t cpu_set;
|
||
|
#endif
|
||
|
#if __APPLE__
|
||
|
unsigned int policy;
|
||
|
#endif
|
||
|
};
|
||
|
|
||
|
// test optional cpu features
|
||
|
// edsp = armv7 edsp
|
||
|
NCNN_EXPORT int cpu_support_arm_edsp();
|
||
|
// neon = armv7 neon or aarch64 asimd
|
||
|
NCNN_EXPORT int cpu_support_arm_neon();
|
||
|
// vfpv4 = armv7 fp16 + fma
|
||
|
NCNN_EXPORT int cpu_support_arm_vfpv4();
|
||
|
// asimdhp = aarch64 asimd half precision
|
||
|
NCNN_EXPORT int cpu_support_arm_asimdhp();
|
||
|
// cpuid = aarch64 cpuid info
|
||
|
NCNN_EXPORT int cpu_support_arm_cpuid();
|
||
|
// asimddp = aarch64 asimd dot product
|
||
|
NCNN_EXPORT int cpu_support_arm_asimddp();
|
||
|
// asimdfhm = aarch64 asimd fhm
|
||
|
NCNN_EXPORT int cpu_support_arm_asimdfhm();
|
||
|
// bf16 = aarch64 bf16
|
||
|
NCNN_EXPORT int cpu_support_arm_bf16();
|
||
|
// i8mm = aarch64 i8mm
|
||
|
NCNN_EXPORT int cpu_support_arm_i8mm();
|
||
|
// sve = aarch64 sve
|
||
|
NCNN_EXPORT int cpu_support_arm_sve();
|
||
|
// sve2 = aarch64 sve2
|
||
|
NCNN_EXPORT int cpu_support_arm_sve2();
|
||
|
// svebf16 = aarch64 svebf16
|
||
|
NCNN_EXPORT int cpu_support_arm_svebf16();
|
||
|
// svei8mm = aarch64 svei8mm
|
||
|
NCNN_EXPORT int cpu_support_arm_svei8mm();
|
||
|
// svef32mm = aarch64 svef32mm
|
||
|
NCNN_EXPORT int cpu_support_arm_svef32mm();
|
||
|
|
||
|
// avx = x86 avx
|
||
|
NCNN_EXPORT int cpu_support_x86_avx();
|
||
|
// fma = x86 fma
|
||
|
NCNN_EXPORT int cpu_support_x86_fma();
|
||
|
// xop = x86 xop
|
||
|
NCNN_EXPORT int cpu_support_x86_xop();
|
||
|
// f16c = x86 f16c
|
||
|
NCNN_EXPORT int cpu_support_x86_f16c();
|
||
|
// avx2 = x86 avx2 + fma + f16c
|
||
|
NCNN_EXPORT int cpu_support_x86_avx2();
|
||
|
// avx_vnni = x86 avx vnni
|
||
|
NCNN_EXPORT int cpu_support_x86_avx_vnni();
|
||
|
// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl
|
||
|
NCNN_EXPORT int cpu_support_x86_avx512();
|
||
|
// avx512_vnni = x86 avx512 vnni
|
||
|
NCNN_EXPORT int cpu_support_x86_avx512_vnni();
|
||
|
// avx512_bf16 = x86 avx512 bf16
|
||
|
NCNN_EXPORT int cpu_support_x86_avx512_bf16();
|
||
|
// avx512_fp16 = x86 avx512 fp16
|
||
|
NCNN_EXPORT int cpu_support_x86_avx512_fp16();
|
||
|
|
||
|
// lsx = loongarch lsx
|
||
|
NCNN_EXPORT int cpu_support_loongarch_lsx();
|
||
|
// lasx = loongarch lasx
|
||
|
NCNN_EXPORT int cpu_support_loongarch_lasx();
|
||
|
|
||
|
// msa = mips mas
|
||
|
NCNN_EXPORT int cpu_support_mips_msa();
|
||
|
// mmi = loongson mmi
|
||
|
NCNN_EXPORT int cpu_support_loongson_mmi();
|
||
|
|
||
|
// v = riscv vector
|
||
|
NCNN_EXPORT int cpu_support_riscv_v();
|
||
|
// zfh = riscv half-precision float
|
||
|
NCNN_EXPORT int cpu_support_riscv_zfh();
|
||
|
// vlenb = riscv vector length in bytes
|
||
|
NCNN_EXPORT int cpu_riscv_vlenb();
|
||
|
|
||
|
// cpu info
|
||
|
NCNN_EXPORT int get_cpu_count();
|
||
|
NCNN_EXPORT int get_little_cpu_count();
|
||
|
NCNN_EXPORT int get_big_cpu_count();
|
||
|
|
||
|
NCNN_EXPORT int get_physical_cpu_count();
|
||
|
NCNN_EXPORT int get_physical_little_cpu_count();
|
||
|
NCNN_EXPORT int get_physical_big_cpu_count();
|
||
|
|
||
|
// cpu l2 varies from 64k to 1M, but l3 can be zero
|
||
|
NCNN_EXPORT int get_cpu_level2_cache_size();
|
||
|
NCNN_EXPORT int get_cpu_level3_cache_size();
|
||
|
|
||
|
// bind all threads on little clusters if powersave enabled
|
||
|
// affects HMP arch cpu like ARM big.LITTLE
|
||
|
// only implemented on android at the moment
|
||
|
// switching powersave is expensive and not thread-safe
|
||
|
// 0 = all cores enabled(default)
|
||
|
// 1 = only little clusters enabled
|
||
|
// 2 = only big clusters enabled
|
||
|
// return 0 if success for setter function
|
||
|
NCNN_EXPORT int get_cpu_powersave();
|
||
|
NCNN_EXPORT int set_cpu_powersave(int powersave);
|
||
|
|
||
|
// convenient wrapper
|
||
|
NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave);
|
||
|
|
||
|
// set explicit thread affinity
|
||
|
NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask);
|
||
|
|
||
|
// runtime thread affinity info
|
||
|
NCNN_EXPORT int is_current_thread_running_on_a53_a55();
|
||
|
|
||
|
// misc function wrapper for openmp routines
|
||
|
NCNN_EXPORT int get_omp_num_threads();
|
||
|
NCNN_EXPORT void set_omp_num_threads(int num_threads);
|
||
|
|
||
|
NCNN_EXPORT int get_omp_dynamic();
|
||
|
NCNN_EXPORT void set_omp_dynamic(int dynamic);
|
||
|
|
||
|
NCNN_EXPORT int get_omp_thread_num();
|
||
|
|
||
|
NCNN_EXPORT int get_kmp_blocktime();
|
||
|
NCNN_EXPORT void set_kmp_blocktime(int time_ms);
|
||
|
|
||
|
// need to flush denormals on Intel Chipset.
|
||
|
// Other architectures such as ARM can be added as needed.
|
||
|
// 0 = DAZ OFF, FTZ OFF
|
||
|
// 1 = DAZ ON , FTZ OFF
|
||
|
// 2 = DAZ OFF, FTZ ON
|
||
|
// 3 = DAZ ON, FTZ ON
|
||
|
NCNN_EXPORT int get_flush_denormals();
|
||
|
NCNN_EXPORT int set_flush_denormals(int flush_denormals);
|
||
|
|
||
|
} // namespace ncnn
|
||
|
|
||
|
#endif // NCNN_CPU_H
|