859 lines
25 KiB
C
859 lines
25 KiB
C
![]() |
#ifndef _include_guard_nanotime_
|
||
|
#define _include_guard_nanotime_
|
||
|
|
||
|
/*
|
||
|
* You can choose this license, if possible in your jurisdiction:
|
||
|
*
|
||
|
* Unlicense
|
||
|
*
|
||
|
* This is free and unencumbered software released into the public domain.
|
||
|
*
|
||
|
* Anyone is free to copy, modify, publish, use, compile, sell, or distribute
|
||
|
* this software, either in source code form or as a compiled binary, for any
|
||
|
* purpose, commercial or non-commercial, and by any means.
|
||
|
*
|
||
|
* In jurisdictions that recognize copyright laws, the author or authors of
|
||
|
* this software dedicate any and all copyright interest in the software to the
|
||
|
* public domain. We make this dedication for the benefit of the public at
|
||
|
* large and to the detriment of our heirs and successors. We intend this
|
||
|
* dedication to be an overt act of relinquishment in perpetuity of all present
|
||
|
* and future rights to this software under copyright law.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
* AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
|
*
|
||
|
* For more information, please refer to <http://unlicense.org/>
|
||
|
*
|
||
|
*
|
||
|
* Alternative license choice, if works can't be directly submitted to the
|
||
|
* public domain in your jurisdiction:
|
||
|
*
|
||
|
* The MIT License (MIT)
|
||
|
*
|
||
|
* Copyright (C) 2022 Brandon McGriff <nightmareci@gmail.com>
|
||
|
*
|
||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
* of this software and associated documentation files (the "Software"), to
|
||
|
* deal in the Software without restriction, including without limitation the
|
||
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||
|
* sell copies of the Software, and to permit persons to whom the Software is
|
||
|
* furnished to do so, subject to the following conditions:
|
||
|
*
|
||
|
* The above copyright notice and this permission notice shall be included in
|
||
|
* all copies or substantial portions of the Software.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||
|
* IN THE SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
#if defined(_MSC_VER)
|
||
|
#if (_MSC_VER < 1600)
|
||
|
#error "Current Visual Studio version is not at least Visual Studio 2010, the nanotime library requires at least 2010."
|
||
|
#endif
|
||
|
#elif defined(__cplusplus)
|
||
|
#if (__cplusplus < 201103L)
|
||
|
#error "Current C++ standard is not at least C++11, the nanotime library requires at least C++11."
|
||
|
#endif
|
||
|
#elif defined(__STDC_VERSION__)
|
||
|
#if (__STDC_VERSION__ < 199901L)
|
||
|
#error "Current C standard is not at least C99, the nanotime library requires at least C99."
|
||
|
#endif
|
||
|
#else
|
||
|
#error "Current C or C++ standard is unknown, the nanotime library requires stdint.h and stdbool.h to be available (C99 or higher, C++11 or higher, Visual Studio 2010 or higher)."
|
||
|
#endif
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Implementor's note: This library directly uses Win32 APIs both for MSVC and
|
||
|
* MinGW GCC, as they work for both, and produce better behavior in MinGW
|
||
|
* builds. Detection of them is accomplished via checking if _WIN32 is defined,
|
||
|
* as it's defined in both MSVC and MinGW GCC. Though it's convenient to have
|
||
|
* UNIX-like APIs on Windows provided by MinGW, they just aren't as good as
|
||
|
* directly using Win32 APIs on Windows.
|
||
|
*/
|
||
|
|
||
|
#include <stdint.h>
|
||
|
#include <stdbool.h>
|
||
|
#include <assert.h>
|
||
|
|
||
|
#define NANOTIME_NSEC_PER_SEC UINT64_C(1000000000)
|
||
|
|
||
|
#ifndef NANOTIME_ONLY_STEP
|
||
|
|
||
|
/*
|
||
|
* Returns the current time since some unspecified epoch. With the exception of
|
||
|
* the standard C11 implementation and non-Apple/Mach kernel POSIX
|
||
|
* implementation when neither CLOCK_MONOTONIC_RAW nor CLOCK_MONOTONIC are
|
||
|
* available, the time values monotonically increase, so they're not equivalent
|
||
|
* to calendar time (i.e., no leap seconds are accounted for, etc.). Calendar
|
||
|
* time has to be used as a last resort sometimes, as monotonic time isn't
|
||
|
* always available.
|
||
|
*/
|
||
|
uint64_t nanotime_now();
|
||
|
|
||
|
/*
|
||
|
* Returns the maximum possible timestamp value. Use of this value is required
|
||
|
* to properly handle overflow of timestamp values, such as when calculating the
|
||
|
* interval between a time value before overflow and the next time value after
|
||
|
* overflow.
|
||
|
*/
|
||
|
uint64_t nanotime_now_max();
|
||
|
|
||
|
/*
|
||
|
* Sleeps the current thread for the requested count of nanoseconds. The slept
|
||
|
* duration may be less than, equal to, or greater than the time requested.
|
||
|
*/
|
||
|
void nanotime_sleep(uint64_t nsec_count);
|
||
|
|
||
|
/*
|
||
|
* Yield the CPU/core that called nanotime_yield to the operating system for a
|
||
|
* small time slice.
|
||
|
*/
|
||
|
void nanotime_yield();
|
||
|
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Calculates the time interval between two nanosecond time values, correctly
|
||
|
* handling the case when the end time value overflows past max. You should
|
||
|
* probably use this function when calculating time intervals, as not all
|
||
|
* platforms' maximum timestamp value is UINT64_MAX, which is required for the
|
||
|
* trivial "end - start" formula for interval calculation to work as expected.
|
||
|
*/
|
||
|
uint64_t nanotime_interval(const uint64_t start, const uint64_t end, const uint64_t max);
|
||
|
|
||
|
typedef struct nanotime_step_data {
|
||
|
uint64_t sleep_duration;
|
||
|
uint64_t now_max;
|
||
|
uint64_t (* now)();
|
||
|
void (* sleep)(uint64_t nsec_count);
|
||
|
|
||
|
uint64_t zero_sleep_duration;
|
||
|
uint64_t accumulator;
|
||
|
uint64_t sleep_point;
|
||
|
} nanotime_step_data;
|
||
|
|
||
|
/*
|
||
|
* Initializes the nanotime precise fixed timestep object. Call immediately
|
||
|
* before entering the loop using the stepper object.
|
||
|
*/
|
||
|
void nanotime_step_init(
|
||
|
nanotime_step_data* const stepper,
|
||
|
const uint64_t sleep_duration,
|
||
|
const uint64_t now_max,
|
||
|
uint64_t (* const now)(),
|
||
|
void (* const sleep)(uint64_t nsec_count)
|
||
|
);
|
||
|
|
||
|
/*
|
||
|
* Does one step of sleeping for a fixed timestep logic update cycle. It makes
|
||
|
* a best-attempt at a precise delay per iteration, but might skip a cycle of
|
||
|
* sleeping if skipping sleeps is required to catch up to the correct
|
||
|
* wall-clock time. Returns true if a sleep up to the latest target sleep end
|
||
|
* time occurred, otherwise returns false in the case of a sleep step skip.
|
||
|
*/
|
||
|
bool nanotime_step(nanotime_step_data* const stepper);
|
||
|
|
||
|
#if !defined(NANOTIME_ONLY_STEP) && defined(NANOTIME_IMPLEMENTATION)
|
||
|
|
||
|
/*
|
||
|
* Non-portable, platform-specific implementations are first. If none of them
|
||
|
* match the current platform, the standard C/C++ versions are used as a last
|
||
|
* resort.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Checking _WIN32 must be above the UNIX-like implementations, so MinGW is
|
||
|
* guaranteed to use it.
|
||
|
*/
|
||
|
#ifdef _WIN32
|
||
|
|
||
|
#ifndef WIN32_LEAN_AND_MEAN
|
||
|
#define WIN32_LEAN_AND_MEAN
|
||
|
#endif
|
||
|
#ifndef NOMINMAX
|
||
|
#define NOMINMAX
|
||
|
#endif
|
||
|
#include <Windows.h>
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
uint64_t nanotime_now() {
|
||
|
static uint64_t scale = UINT64_C(0);
|
||
|
static bool multiply;
|
||
|
if (scale == 0u) {
|
||
|
LARGE_INTEGER frequency;
|
||
|
QueryPerformanceFrequency(&frequency);
|
||
|
if (frequency.QuadPart < NANOTIME_NSEC_PER_SEC) {
|
||
|
scale = NANOTIME_NSEC_PER_SEC / frequency.QuadPart;
|
||
|
multiply = true;
|
||
|
}
|
||
|
else {
|
||
|
scale = frequency.QuadPart / NANOTIME_NSEC_PER_SEC;
|
||
|
multiply = false;
|
||
|
}
|
||
|
}
|
||
|
LARGE_INTEGER performanceCount;
|
||
|
QueryPerformanceCounter(&performanceCount);
|
||
|
if (multiply) {
|
||
|
return performanceCount.QuadPart * scale;
|
||
|
}
|
||
|
else {
|
||
|
return performanceCount.QuadPart / scale;
|
||
|
}
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_NOW_MAX_IMPLEMENTED
|
||
|
uint64_t nanotime_now_max() {
|
||
|
static uint64_t now_max;
|
||
|
if (now_max == UINT64_C(0)) {
|
||
|
LARGE_INTEGER frequency;
|
||
|
QueryPerformanceFrequency(&frequency);
|
||
|
if (frequency.QuadPart < NANOTIME_NSEC_PER_SEC) {
|
||
|
now_max = UINT64_MAX * (NANOTIME_NSEC_PER_SEC / frequency.QuadPart);
|
||
|
}
|
||
|
else {
|
||
|
now_max = UINT64_MAX / (frequency.QuadPart / NANOTIME_NSEC_PER_SEC);
|
||
|
}
|
||
|
}
|
||
|
return now_max;
|
||
|
}
|
||
|
#define NANOTIME_NOW_MAX_IMPLEMENTED
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
void nanotime_sleep(uint64_t nsec_count) {
|
||
|
LARGE_INTEGER dueTime;
|
||
|
|
||
|
if (nsec_count < UINT64_C(100)) {
|
||
|
/*
|
||
|
* Allows the OS to schedule another process for a single time
|
||
|
* slice. Better than a delay of 0, which immediately returns
|
||
|
* with no actual non-CPU-hogging delay. The time-slice-yield
|
||
|
* behavior is specified in Microsoft's Windows documentation.
|
||
|
*/
|
||
|
SleepEx(0UL, FALSE);
|
||
|
}
|
||
|
else {
|
||
|
HANDLE timer = NULL;
|
||
|
if (
|
||
|
#ifdef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
|
||
|
/*
|
||
|
* Requesting a high resolution timer can make quite the
|
||
|
* difference, so always request high resolution if available. It's
|
||
|
* available in Windows 10 1803 and above. This arrangement of
|
||
|
* building it if the build system supports it will allow the
|
||
|
* executable to use high resolution if available on a user's
|
||
|
* system, but revert to low resolution if the user's system
|
||
|
* doesn't support high resolution.
|
||
|
*/
|
||
|
(timer = CreateWaitableTimerEx(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS)) == NULL &&
|
||
|
#endif
|
||
|
(timer = CreateWaitableTimer(NULL, TRUE, NULL)) == NULL
|
||
|
) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
dueTime.QuadPart = -(LONGLONG)(nsec_count / UINT64_C(100));
|
||
|
|
||
|
SetWaitableTimer(timer, &dueTime, 0L, NULL, NULL, FALSE);
|
||
|
WaitForSingleObject(timer, INFINITE);
|
||
|
|
||
|
CloseHandle(timer);
|
||
|
}
|
||
|
}
|
||
|
#define NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_YIELD_IMPLEMENTED
|
||
|
#ifndef WIN32_LEAN_AND_MEAN
|
||
|
#define WIN32_LEAN_AND_MEAN
|
||
|
#endif
|
||
|
#ifndef NOMINMAX
|
||
|
#define NOMINMAX
|
||
|
#endif
|
||
|
#include <Windows.h>
|
||
|
void nanotime_yield() {
|
||
|
YieldProcessor();
|
||
|
}
|
||
|
#define NANOTIME_YIELD_IMPLEMENTED
|
||
|
#endif
|
||
|
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* To avoid using standard UNIX APIs on UNIX-like platforms, the
|
||
|
* platform-specific implementations must be first. That way, the
|
||
|
* lower-overhead kernel APIs can be used, that aren't UNIX-like.
|
||
|
*/
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#if defined(__APPLE__) || defined(__MACH__)
|
||
|
/*
|
||
|
* The current platform is some Apple operating system, or at least uses some
|
||
|
* Mach kernel. The POSIX implementation below using clock_gettime works on at
|
||
|
* least Apple platforms, though this version using Mach functions has lower
|
||
|
* overhead.
|
||
|
*/
|
||
|
#include <mach/mach_time.h>
|
||
|
uint64_t nanotime_now() {
|
||
|
static mach_timebase_info_data_t info = { 0 };
|
||
|
if (info.denom == UINT32_C(0)) {
|
||
|
const kern_return_t status = mach_timebase_info(&info);
|
||
|
assert(status == KERN_SUCCESS);
|
||
|
if (status != KERN_SUCCESS) {
|
||
|
return UINT64_C(0);
|
||
|
}
|
||
|
}
|
||
|
return (mach_absolute_time() * info.numer) / info.denom;
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_NOW_MAX_IMPLEMENTED
|
||
|
#if defined(__APPLE__) || defined(__MACH__)
|
||
|
#include <mach/mach_time.h>
|
||
|
uint64_t nanotime_now_max() {
|
||
|
static uint64_t now_max = UINT64_C(0);
|
||
|
if (now_max == UINT64_C(0)) {
|
||
|
mach_timebase_info_data_t info;
|
||
|
const kern_return_t status = mach_timebase_info(&info);
|
||
|
assert(status == KERN_SUCCESS);
|
||
|
if (status != KERN_SUCCESS) {
|
||
|
return UINT64_C(0);
|
||
|
}
|
||
|
else {
|
||
|
now_max = UINT64_MAX / info.denom;
|
||
|
}
|
||
|
}
|
||
|
return now_max;
|
||
|
}
|
||
|
#define NANOTIME_NOW_MAX_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#if defined(__unix__) && defined(_POSIX_VERSION) && (_POSIX_VERSION >= 199309L) && !defined(NANOTIME_NOW_IMPLEMENTED)
|
||
|
/*
|
||
|
* Current platform is some version of POSIX, that might have clock_gettime.
|
||
|
*/
|
||
|
#include <unistd.h>
|
||
|
#include <time.h>
|
||
|
#include <errno.h>
|
||
|
uint64_t nanotime_now() {
|
||
|
struct timespec now;
|
||
|
const int status = clock_gettime(
|
||
|
#if defined(CLOCK_MONOTONIC_RAW)
|
||
|
/*
|
||
|
* Monotonic raw is more precise, but not always available. For
|
||
|
* the sorts of applications this code is intended for, mainly
|
||
|
* soft real time applications such as game programming, the
|
||
|
* subtle inconsistencies of it vs. monotonic aren't an issue.
|
||
|
*/
|
||
|
CLOCK_MONOTONIC_RAW
|
||
|
#elif defined(CLOCK_MONOTONIC)
|
||
|
/*
|
||
|
* Monotonic is quite good, and widely available, but not as
|
||
|
* precise as monotonic raw, so it's only used if required.
|
||
|
*/
|
||
|
CLOCK_MONOTONIC
|
||
|
#else
|
||
|
/*
|
||
|
* Realtime isn't fully correct, as it's calendar time, but is
|
||
|
* even more widely available than monotonic. Monotonic is only
|
||
|
* unavailable on very old platforms though, so old they're
|
||
|
* likely unused now (as of last editing this, 2023).
|
||
|
*/
|
||
|
CLOCK_REALTIME
|
||
|
#endif
|
||
|
, &now);
|
||
|
assert(status == 0 || (status == -1 && errno != EOVERFLOW));
|
||
|
if (status == 0 || (status == -1 && errno != EOVERFLOW)) {
|
||
|
return (uint64_t)now.tv_sec * NANOTIME_NSEC_PER_SEC + (uint64_t)now.tv_nsec;
|
||
|
}
|
||
|
else {
|
||
|
return UINT64_C(0);
|
||
|
}
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__MINGW32__) || defined(__MINGW64__)
|
||
|
#include <unistd.h>
|
||
|
#include <time.h>
|
||
|
#include <errno.h>
|
||
|
void nanotime_sleep(uint64_t nsec_count) {
|
||
|
const struct timespec req = {
|
||
|
.tv_sec = (time_t)(nsec_count / NANOTIME_NSEC_PER_SEC),
|
||
|
.tv_nsec = (long)(nsec_count % NANOTIME_NSEC_PER_SEC)
|
||
|
};
|
||
|
#ifndef NDEBUG
|
||
|
const int status =
|
||
|
#endif
|
||
|
nanosleep(&req, NULL);
|
||
|
assert(status == 0 || (status == -1 && errno != EINVAL));
|
||
|
}
|
||
|
#define NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_YIELD_IMPLEMENTED
|
||
|
#if (defined(__unix__) || defined(__APPLE__)) && defined(_POSIX_VERSION) && (_POSIX_VERSION >= 200112L)
|
||
|
#include <sched.h>
|
||
|
void nanotime_yield() {
|
||
|
(void)sched_yield();
|
||
|
}
|
||
|
#define NANOTIME_YIELD_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#if defined(__vita__)
|
||
|
#include <psp2/kernel/processmgr.h>
|
||
|
uint64_t nanotime_now() {
|
||
|
return sceKernelGetProcessTimeWide() * UINT64_C(1000);
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#if defined(__vita__)
|
||
|
#include <psp2/kernel/processmgr.h>
|
||
|
void nanotime_sleep(uint64_t nsec_count) {
|
||
|
sceKernelDelayThreadCB(nsec_count / UINT64_C(1000));
|
||
|
}
|
||
|
#define NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#ifdef __EMSCRIPTEN__
|
||
|
#include <emscripten.h>
|
||
|
/*
|
||
|
* NOTE: You *must* have asyncify enabled in the Emscripten build (pass
|
||
|
* -sASYNCIFY to the compiler/linker) or sleeping won't work.
|
||
|
*/
|
||
|
void nanotime_sleep(uint64_t nsec_count) {
|
||
|
emscripten_sleep(nsec_count / UINT64_C(1000000));
|
||
|
}
|
||
|
#define NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#ifdef __EMSCRIPTEN__
|
||
|
#include <emscripten.h>
|
||
|
uint64_t nanotime_now() {
|
||
|
const double now = emscripten_get_now();
|
||
|
return (uint64_t)now * UINT64_C(1000000);
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#ifdef __SWITCH__
|
||
|
#include <switch.h>
|
||
|
void nanotime_sleep(uint64_t nsec_count) {
|
||
|
if (nsec_count > INT64_MAX) {
|
||
|
svcSleepThread(INT64_MAX);
|
||
|
}
|
||
|
else {
|
||
|
svcSleepThread((s64)nsec_count);
|
||
|
}
|
||
|
}
|
||
|
#define NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#ifdef __SWITCH__
|
||
|
#include <switch.h>
|
||
|
uint64_t nanotime_now() {
|
||
|
return armTicksToNs(armGetSystemTick());
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_YIELD_IMPLEMENTED
|
||
|
#if defined(__SWITCH__)
|
||
|
#include <switch.h>
|
||
|
void nanotime_yield() {
|
||
|
svcSleepThread(YieldType_ToAnyThread);
|
||
|
}
|
||
|
#define NANOTIME_YIELD_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
|
||
|
#include <time.h>
|
||
|
uint64_t nanotime_now() {
|
||
|
struct timespec now;
|
||
|
const int status = timespec_get(&now, TIME_UTC);
|
||
|
assert(status == TIME_UTC);
|
||
|
if (status == TIME_UTC) {
|
||
|
return (uint64_t)now.tv_sec * NANOTIME_NSEC_PER_SEC + (uint64_t)now.tv_nsec;
|
||
|
}
|
||
|
else {
|
||
|
return UINT64_C(0);
|
||
|
}
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__)
|
||
|
#include <threads.h>
|
||
|
void nanotime_sleep(uint64_t nsec_count) {
|
||
|
const struct timespec req = {
|
||
|
.tv_sec = (time_t)(nsec_count / NANOTIME_NSEC_PER_SEC),
|
||
|
.tv_nsec = (long)(nsec_count % NANOTIME_NSEC_PER_SEC)
|
||
|
};
|
||
|
const int status = thrd_sleep(&req, NULL);
|
||
|
assert(status == 0 || status == -1);
|
||
|
}
|
||
|
#define NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_YIELD_IMPLEMENTED
|
||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__)
|
||
|
#include <threads.h>
|
||
|
void nanotime_yield() {
|
||
|
thrd_yield();
|
||
|
}
|
||
|
#define NANOTIME_YIELD_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* C++ implementations follow here, but defined with C linkage.
|
||
|
*/
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#ifdef __cplusplus
|
||
|
#include <cstdint>
|
||
|
#include <chrono>
|
||
|
extern "C" uint64_t nanotime_now() {
|
||
|
return static_cast<uint64_t>(
|
||
|
std::chrono::time_point_cast<std::chrono::nanoseconds>(
|
||
|
std::chrono::steady_clock::now()
|
||
|
).time_since_epoch().count()
|
||
|
);
|
||
|
}
|
||
|
#define NANOTIME_NOW_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#ifdef __cplusplus
|
||
|
#include <cstdint>
|
||
|
#include <thread>
|
||
|
#include <exception>
|
||
|
extern "C" void nanotime_sleep(uint64_t nsec_count) {
|
||
|
try {
|
||
|
std::this_thread::sleep_for(std::chrono::nanoseconds(nsec_count));
|
||
|
}
|
||
|
catch (std::exception e) {
|
||
|
}
|
||
|
}
|
||
|
#define NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_YIELD_IMPLEMENTED
|
||
|
#ifdef __cplusplus
|
||
|
#include <thread>
|
||
|
extern "C" void nanotime_yield() {
|
||
|
std::this_thread::yield();
|
||
|
}
|
||
|
#define NANOTIME_YIELD_IMPLEMENTED
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifndef NANOTIME_NOW_IMPLEMENTED
|
||
|
#error "Failed to implement nanotime_now (try using C11 with C11 threads support or C++11)."
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_SLEEP_IMPLEMENTED
|
||
|
#error "Failed to implement nanotime_sleep (try using C11 with C11 threads support or C++11)."
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_YIELD_IMPLEMENTED
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
/*
|
||
|
* As a last resort, make a zero-duration sleep request to implement yield.
|
||
|
* Such sleep requests often have the desired yielding behavior on many
|
||
|
* platforms.
|
||
|
*/
|
||
|
void nanotime_yield() {
|
||
|
nanotime_sleep(0u);
|
||
|
}
|
||
|
#define NANOTIME_YIELD_IMPLEMENTED
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
|
||
|
#ifndef NANOTIME_NOW_MAX_IMPLEMENTED
|
||
|
/*
|
||
|
* Might not be correct on some platforms, but it's the best we can do as a last
|
||
|
* resort.
|
||
|
*/
|
||
|
uint64_t nanotime_now_max() {
|
||
|
return UINT64_MAX;
|
||
|
}
|
||
|
#define NANOTIME_NOW_MAX_IMPLEMENTED
|
||
|
#endif
|
||
|
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifdef NANOTIME_IMPLEMENTATION
|
||
|
|
||
|
uint64_t nanotime_interval(const uint64_t start, const uint64_t end, const uint64_t max) {
|
||
|
assert(max > UINT64_C(0));
|
||
|
assert(start <= max);
|
||
|
assert(end <= max);
|
||
|
|
||
|
if (end >= start) {
|
||
|
return end - start;
|
||
|
}
|
||
|
else {
|
||
|
return end + (max - start) + UINT64_C(1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void nanotime_step_init(
|
||
|
nanotime_step_data* const stepper,
|
||
|
const uint64_t sleep_duration,
|
||
|
const uint64_t now_max,
|
||
|
uint64_t (* const now)(),
|
||
|
void (* const sleep)(uint64_t nsec_count)
|
||
|
) {
|
||
|
assert(stepper != NULL);
|
||
|
assert(sleep_duration > UINT64_C(0));
|
||
|
assert(now_max > UINT64_C(0));
|
||
|
assert(now != NULL);
|
||
|
assert(sleep != NULL);
|
||
|
|
||
|
stepper->sleep_duration = sleep_duration;
|
||
|
stepper->now_max = now_max;
|
||
|
stepper->now = now;
|
||
|
stepper->sleep = sleep;
|
||
|
|
||
|
const uint64_t start = now();
|
||
|
sleep(UINT64_C(0));
|
||
|
stepper->zero_sleep_duration = nanotime_interval(start, now(), now_max);
|
||
|
stepper->accumulator = UINT64_C(0);
|
||
|
|
||
|
/*
|
||
|
* This should be last here, so the sleep point is close to what it
|
||
|
* should be.
|
||
|
*/
|
||
|
stepper->sleep_point = now();
|
||
|
}
|
||
|
|
||
|
bool nanotime_step(nanotime_step_data* const stepper) {
|
||
|
assert(stepper != NULL);
|
||
|
|
||
|
const uint64_t start_point = stepper->now();
|
||
|
|
||
|
if (nanotime_interval(stepper->sleep_point, start_point, stepper->now_max) >= stepper->sleep_duration + NANOTIME_NSEC_PER_SEC / UINT64_C(10)) {
|
||
|
stepper->sleep_point = start_point;
|
||
|
stepper->accumulator = UINT64_C(0);
|
||
|
}
|
||
|
|
||
|
bool slept;
|
||
|
if (stepper->accumulator < stepper->sleep_duration) {
|
||
|
const uint64_t total_sleep_duration = stepper->sleep_duration - stepper->accumulator;
|
||
|
uint64_t current_sleep_duration = total_sleep_duration;
|
||
|
const uint64_t shift = UINT64_C(4);
|
||
|
|
||
|
/*
|
||
|
* The algorithm implemented here takes the assumption that a
|
||
|
* sequence of repeated sleep requests of the same requested
|
||
|
* duration end up being approximately of equal actual sleep
|
||
|
* duration, even if they're all well above the requested
|
||
|
* duration. In practice, such an assumption proves out to be
|
||
|
* true on various platforms.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* A big initial sleep lowers power usage on any platform, as
|
||
|
* more small sleep requests use more power than fewer bigger,
|
||
|
* equivalent sleep requests. In practice, operating systems
|
||
|
* "actually sleep" when 1ms or more is requested, and 1ms is
|
||
|
* the minimum request duration you can make on some platforms
|
||
|
* (like older versions of Windows). Additionally, power usage
|
||
|
* is nice and low when doing the number of 1ms sleeps that's
|
||
|
* (hopefully) short of the target duration.
|
||
|
*
|
||
|
* But, the loop here maintains a maximum of the actual slept
|
||
|
* durations, breaking out when the time remaining is greater
|
||
|
* than or equal to the maximum found. By breaking out on the
|
||
|
* maximum found rather than just 1ms-or-less remaining,
|
||
|
* sleeping beyond the target deadline is reduced.
|
||
|
*/
|
||
|
{
|
||
|
uint64_t max = NANOTIME_NSEC_PER_SEC / UINT64_C(1000);
|
||
|
uint64_t start = stepper->now();
|
||
|
while (nanotime_interval(stepper->sleep_point, start, stepper->now_max) + max < total_sleep_duration) {
|
||
|
stepper->sleep(NANOTIME_NSEC_PER_SEC / UINT64_C(1000));
|
||
|
const uint64_t next = stepper->now();
|
||
|
const uint64_t current_interval = nanotime_interval(start, next, stepper->now_max);
|
||
|
if (current_interval > max) {
|
||
|
max = current_interval;
|
||
|
}
|
||
|
start = next;
|
||
|
}
|
||
|
const uint64_t initial_duration = nanotime_interval(start_point, stepper->now(), stepper->now_max);
|
||
|
if (initial_duration < current_sleep_duration) {
|
||
|
current_sleep_duration -= initial_duration;
|
||
|
}
|
||
|
else {
|
||
|
goto step_end;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* This has the flavor of Zeno's dichotomous paradox of motion,
|
||
|
* as it successively divides the time remaining to sleep, but
|
||
|
* attempts to stop short of the deadline to hopefully be able
|
||
|
* to precisely sleep up to the deadline below this loop. The
|
||
|
* divisor is larger than two though, as it produces better
|
||
|
* behavior, and seems to work fine in testing on real
|
||
|
* hardware. The same method of keeping track of the max
|
||
|
* duration per loop of same sleep request durations above is
|
||
|
* used here. The overshoot possible in the loop below this one
|
||
|
* won't overshoot much, or in the best case won't overshoot,
|
||
|
* so the busyloop can finish up the sleep precisely.
|
||
|
*/
|
||
|
current_sleep_duration >>= shift;
|
||
|
for (
|
||
|
uint64_t max = stepper->zero_sleep_duration;
|
||
|
nanotime_interval(stepper->sleep_point, stepper->now(), stepper->now_max) + max < total_sleep_duration && current_sleep_duration > UINT64_C(0);
|
||
|
current_sleep_duration >>= shift
|
||
|
) {
|
||
|
max = stepper->zero_sleep_duration;
|
||
|
uint64_t start;
|
||
|
while (max < stepper->sleep_duration && nanotime_interval(stepper->sleep_point, start = stepper->now(), stepper->now_max) + max < total_sleep_duration) {
|
||
|
stepper->sleep(current_sleep_duration);
|
||
|
uint64_t slept_duration;
|
||
|
if ((slept_duration = nanotime_interval(start, stepper->now(), stepper->now_max)) > max) {
|
||
|
max = slept_duration;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (nanotime_interval(stepper->sleep_point, stepper->now(), stepper->now_max) >= total_sleep_duration) {
|
||
|
goto step_end;
|
||
|
}
|
||
|
|
||
|
{
|
||
|
/*
|
||
|
* After (hopefully) stopping short of the deadline by
|
||
|
* a small amount, do small sleeps here to get closer
|
||
|
* to the deadline, but again attempting to stop short
|
||
|
* by an even smaller amount. It's best to do larger
|
||
|
* sleeps as done in the above loops, to reduce
|
||
|
* CPU/power usage, as each sleep iteration has a
|
||
|
* more-or-less fixed overhead of CPU/power usage.
|
||
|
*
|
||
|
* In testing on an M1 Mac mini running macOS, power
|
||
|
* usage is lower using zero-duration sleeps vs.
|
||
|
* nanotime_yield(), with no loss of timing precision.
|
||
|
* The same might be true for other hardwares/operating
|
||
|
* systems.
|
||
|
*/
|
||
|
uint64_t max = stepper->zero_sleep_duration;
|
||
|
uint64_t start;
|
||
|
while (nanotime_interval(stepper->sleep_point, start = stepper->now(), stepper->now_max) + max < total_sleep_duration) {
|
||
|
stepper->sleep(UINT64_C(0));
|
||
|
if ((stepper->zero_sleep_duration = nanotime_interval(start, stepper->now(), stepper->now_max)) > max) {
|
||
|
max = stepper->zero_sleep_duration;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
step_end:
|
||
|
{
|
||
|
/*
|
||
|
* Finally, do a busyloop to precisely sleep up to the
|
||
|
* deadline. The code above this loop attempts to
|
||
|
* reduce the remaining time to sleep to a minimum via
|
||
|
* process-yielding sleeps, so the amount of time spent
|
||
|
* spinning here is hopefully quite low.
|
||
|
*
|
||
|
* In testing on an M1 Mac mini running macOS,
|
||
|
* busylooping here produces the absolute greatest
|
||
|
* precision possible on the hardware, down to the
|
||
|
* sub-10ns-off-per-update range for longish stretches
|
||
|
* during 60 Hz updates, but in the
|
||
|
* hundreds-to-thousands of nanoseconds off when using
|
||
|
* nanotime_yield() or zero-duration sleeps. And,
|
||
|
* because the sleeping algorithm above does such a
|
||
|
* good job of stopping very close to the deadline,
|
||
|
* busylooping here has basically negligible difference
|
||
|
* in power usage vs. yields/zero-duration sleeps.
|
||
|
*/
|
||
|
uint64_t current_time;
|
||
|
uint64_t accumulated;
|
||
|
while ((accumulated = nanotime_interval(stepper->sleep_point, current_time = stepper->now(), stepper->now_max)) < total_sleep_duration);
|
||
|
|
||
|
stepper->accumulator += accumulated;
|
||
|
stepper->sleep_point = current_time;
|
||
|
slept = true;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
slept = false;
|
||
|
}
|
||
|
stepper->accumulator -= stepper->sleep_duration;
|
||
|
return slept;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#endif /* _include_guard_nanotime_ */
|