/* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef TNT_MATH_FAST_H #define TNT_MATH_FAST_H #include #include #include #include #include #ifdef __ARM_NEON #include #endif namespace filament { namespace math { namespace fast { // fast cos(x), ~8 cycles (vs. 66 cycles on ARM) // can be vectorized // x between -pi and pi template::value>> constexpr T MATH_PURE cos(T x) noexcept { x *= T(F_1_PI / 2); x -= T(0.25) + std::floor(x + T(0.25)); x *= T(16.0) * std::abs(x) - T(8.0); x += T(0.225) * x * (std::abs(x) - T(1.0)); return x; } // fast sin(x), ~8 cycles (vs. 66 cycles on ARM) // can be vectorized // x between -pi and pi template::value>> constexpr T MATH_PURE sin(T x) noexcept { return filament::math::fast::cos(x - T(F_PI_2)); } constexpr inline float MATH_PURE ilog2(float x) noexcept { union { float val; int32_t x; } u = { x }; return ((u.x >> 23) & 0xff) - 127; } constexpr inline float MATH_PURE log2(float x) noexcept { union { float val; int32_t x; } u = { x }; float ilog2 = float(((u.x >> 23) & 0xff) - 128); u.x = (u.x & 0x007fffff) | 0x3f800000; return ilog2 + (-0.34484843f * u.val + 2.02466578f) * u.val - 0.67487759f; } // fast 1/sqrt(), on ARMv8 this is 5 cycles vs. 7 cycles, so maybe not worth it. // we keep this mostly for reference and benchmarking. inline float MATH_PURE isqrt(float x) noexcept { #if defined(__ARM_NEON) && defined(__aarch64__) float y = vrsqrtes_f32(x); return y * vrsqrtss_f32(x, y * y); #else return 1 / std::sqrt(x); #endif } inline double MATH_PURE isqrt(double x) noexcept { #if defined(__ARM_NEON) && defined(__aarch64__) double y = vrsqrted_f64(x); return y * vrsqrtsd_f64(x, y * y); #else return 1 / std::sqrt(x); #endif } inline int signbit(float x) noexcept { #if __has_builtin(__builtin_signbitf) // Note: on Android NDK, signbit() is a function call -- not what we want. return __builtin_signbitf(x); #else return std::signbit(x); #endif } /* * constexpr exp(), pow(), factorial() */ constexpr double pow(double x, unsigned int y) noexcept { return y == 0 ? 1.0 : x * pow(x, y - 1); } constexpr unsigned int factorial(unsigned int x) noexcept { return x == 0 ? 1 : x * factorial(x - 1); } constexpr double exp(double x) noexcept { return 1.0 + x + pow(x, 2) / factorial(2) + pow(x, 3) / factorial(3) + pow(x, 4) / factorial(4) + pow(x, 5) / factorial(5) + pow(x, 6) / factorial(6) + pow(x, 7) / factorial(7) + pow(x, 8) / factorial(8) + pow(x, 9) / factorial(9); } constexpr float exp(float x) noexcept { return float(exp(double(x))); } inline float MATH_PURE pow(float a, float b) noexcept { constexpr int fudgeMinRMSE = 486411; constexpr int K = (127 << 23) - fudgeMinRMSE; union { float f; int x; } u = { a }; u.x = (int)(b * (u.x - K) + K); return u.f; } // this is more precise than pow() above inline float MATH_PURE pow2dot2(float a) noexcept { union { float f; int x; } u = { a }; constexpr int K = (127 << 23); u.x = (int)(0.2f * (u.x - K) + K); return a * a * u.f; // a^2 * a^0.2 } /* * unsigned saturated arithmetic */ #if defined(__ARM_NEON) && defined(__aarch64__) inline uint8_t MATH_PURE qadd(uint8_t a, uint8_t b) noexcept { return vuqaddb_s8(a, b); } inline uint16_t MATH_PURE qadd(uint16_t a, uint16_t b) noexcept { return vuqaddh_s16(a, b); } inline uint32_t MATH_PURE qadd(uint32_t a, uint32_t b) noexcept { return vuqadds_s32(a, b); } inline uint8_t MATH_PURE qsub(uint8_t a, uint8_t b) noexcept { return vqsubb_s8(a, b); } inline uint16_t MATH_PURE qsub(uint16_t a, uint16_t b) noexcept { return vqsubh_s16(a, b); } inline uint32_t MATH_PURE qsub(uint32_t a, uint32_t b) noexcept { return vqsubs_s32(a, b); } #else template::value || std::is_same::value || std::is_same::value>> inline T MATH_PURE qadd(T a, T b) noexcept { T r = a + b; return r | -T(r < a); } template::value || std::is_same::value || std::is_same::value>> inline T MATH_PURE qsub(T a, T b) noexcept { T r = a - b; return r & -T(r <= a); } #endif template inline T MATH_PURE qinc(T a) noexcept { return qadd(a, T(1)); } template inline T MATH_PURE qdec(T a) noexcept { return qsub(a, T(1)); } } // namespace fast } // namespace math } // namespace filament #endif // TNT_MATH_FAST_H