image_framework_ymj/include/open3d/3rdparty/math/fast.h
2024-12-06 16:25:16 +08:00

197 lines
5.6 KiB
C++
Executable File

/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_MATH_FAST_H
#define TNT_MATH_FAST_H
#include <cmath>
#include <cstdint>
#include <type_traits>
#include <math/compiler.h>
#include <math/scalar.h>
#ifdef __ARM_NEON
#include <arm_neon.h>
#endif
namespace filament {
namespace math {
namespace fast {
// fast cos(x), ~8 cycles (vs. 66 cycles on ARM)
// can be vectorized
// x between -pi and pi
template<typename T, typename = std::enable_if_t<std::is_floating_point<T>::value>>
constexpr T MATH_PURE cos(T x) noexcept {
x *= T(F_1_PI / 2);
x -= T(0.25) + std::floor(x + T(0.25));
x *= T(16.0) * std::abs(x) - T(8.0);
x += T(0.225) * x * (std::abs(x) - T(1.0));
return x;
}
// fast sin(x), ~8 cycles (vs. 66 cycles on ARM)
// can be vectorized
// x between -pi and pi
template<typename T, typename = std::enable_if_t<std::is_floating_point<T>::value>>
constexpr T MATH_PURE sin(T x) noexcept {
return filament::math::fast::cos<T>(x - T(F_PI_2));
}
constexpr inline float MATH_PURE ilog2(float x) noexcept {
union {
float val;
int32_t x;
} u = { x };
return ((u.x >> 23) & 0xff) - 127;
}
constexpr inline float MATH_PURE log2(float x) noexcept {
union {
float val;
int32_t x;
} u = { x };
float ilog2 = float(((u.x >> 23) & 0xff) - 128);
u.x = (u.x & 0x007fffff) | 0x3f800000;
return ilog2 + (-0.34484843f * u.val + 2.02466578f) * u.val - 0.67487759f;
}
// fast 1/sqrt(), on ARMv8 this is 5 cycles vs. 7 cycles, so maybe not worth it.
// we keep this mostly for reference and benchmarking.
inline float MATH_PURE isqrt(float x) noexcept {
#if defined(__ARM_NEON) && defined(__aarch64__)
float y = vrsqrtes_f32(x);
return y * vrsqrtss_f32(x, y * y);
#else
return 1 / std::sqrt(x);
#endif
}
inline double MATH_PURE isqrt(double x) noexcept {
#if defined(__ARM_NEON) && defined(__aarch64__)
double y = vrsqrted_f64(x);
return y * vrsqrtsd_f64(x, y * y);
#else
return 1 / std::sqrt(x);
#endif
}
inline int signbit(float x) noexcept {
#if __has_builtin(__builtin_signbitf)
// Note: on Android NDK, signbit() is a function call -- not what we want.
return __builtin_signbitf(x);
#else
return std::signbit(x);
#endif
}
/*
* constexpr exp(), pow(), factorial()
*/
constexpr double pow(double x, unsigned int y) noexcept {
return y == 0 ? 1.0 : x * pow(x, y - 1);
}
constexpr unsigned int factorial(unsigned int x) noexcept {
return x == 0 ? 1 : x * factorial(x - 1);
}
constexpr double exp(double x) noexcept {
return 1.0 + x + pow(x, 2) / factorial(2) + pow(x, 3) / factorial(3)
+ pow(x, 4) / factorial(4) + pow(x, 5) / factorial(5)
+ pow(x, 6) / factorial(6) + pow(x, 7) / factorial(7)
+ pow(x, 8) / factorial(8) + pow(x, 9) / factorial(9);
}
constexpr float exp(float x) noexcept {
return float(exp(double(x)));
}
inline float MATH_PURE pow(float a, float b) noexcept {
constexpr int fudgeMinRMSE = 486411;
constexpr int K = (127 << 23) - fudgeMinRMSE;
union {
float f;
int x;
} u = { a };
u.x = (int)(b * (u.x - K) + K);
return u.f;
}
// this is more precise than pow() above
inline float MATH_PURE pow2dot2(float a) noexcept {
union {
float f;
int x;
} u = { a };
constexpr int K = (127 << 23);
u.x = (int)(0.2f * (u.x - K) + K);
return a * a * u.f; // a^2 * a^0.2
}
/*
* unsigned saturated arithmetic
*/
#if defined(__ARM_NEON) && defined(__aarch64__)
inline uint8_t MATH_PURE qadd(uint8_t a, uint8_t b) noexcept { return vuqaddb_s8(a, b); }
inline uint16_t MATH_PURE qadd(uint16_t a, uint16_t b) noexcept { return vuqaddh_s16(a, b); }
inline uint32_t MATH_PURE qadd(uint32_t a, uint32_t b) noexcept { return vuqadds_s32(a, b); }
inline uint8_t MATH_PURE qsub(uint8_t a, uint8_t b) noexcept { return vqsubb_s8(a, b); }
inline uint16_t MATH_PURE qsub(uint16_t a, uint16_t b) noexcept { return vqsubh_s16(a, b); }
inline uint32_t MATH_PURE qsub(uint32_t a, uint32_t b) noexcept { return vqsubs_s32(a, b); }
#else
template<typename T, typename = std::enable_if_t<
std::is_same<uint8_t, T>::value ||
std::is_same<uint16_t, T>::value ||
std::is_same<uint32_t, T>::value>>
inline T MATH_PURE qadd(T a, T b) noexcept {
T r = a + b;
return r | -T(r < a);
}
template<typename T, typename = std::enable_if_t<
std::is_same<uint8_t, T>::value ||
std::is_same<uint16_t, T>::value ||
std::is_same<uint32_t, T>::value>>
inline T MATH_PURE qsub(T a, T b) noexcept {
T r = a - b;
return r & -T(r <= a);
}
#endif
template<typename T>
inline T MATH_PURE qinc(T a) noexcept {
return qadd(a, T(1));
}
template<typename T>
inline T MATH_PURE qdec(T a) noexcept {
return qsub(a, T(1));
}
} // namespace fast
} // namespace math
} // namespace filament
#endif // TNT_MATH_FAST_H