image_framework_ymj/image_framework/thead/LibapiNanoTime.h

859 lines
25 KiB
C
Raw Permalink Normal View History

2024-12-06 16:25:16 +08:00
#ifndef _include_guard_nanotime_
#define _include_guard_nanotime_
/*
* You can choose this license, if possible in your jurisdiction:
*
* Unlicense
*
* This is free and unencumbered software released into the public domain.
*
* Anyone is free to copy, modify, publish, use, compile, sell, or distribute
* this software, either in source code form or as a compiled binary, for any
* purpose, commercial or non-commercial, and by any means.
*
* In jurisdictions that recognize copyright laws, the author or authors of
* this software dedicate any and all copyright interest in the software to the
* public domain. We make this dedication for the benefit of the public at
* large and to the detriment of our heirs and successors. We intend this
* dedication to be an overt act of relinquishment in perpetuity of all present
* and future rights to this software under copyright law.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* For more information, please refer to <http://unlicense.org/>
*
*
* Alternative license choice, if works can't be directly submitted to the
* public domain in your jurisdiction:
*
* The MIT License (MIT)
*
* Copyright (C) 2022 Brandon McGriff <nightmareci@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#if defined(_MSC_VER)
#if (_MSC_VER < 1600)
#error "Current Visual Studio version is not at least Visual Studio 2010, the nanotime library requires at least 2010."
#endif
#elif defined(__cplusplus)
#if (__cplusplus < 201103L)
#error "Current C++ standard is not at least C++11, the nanotime library requires at least C++11."
#endif
#elif defined(__STDC_VERSION__)
#if (__STDC_VERSION__ < 199901L)
#error "Current C standard is not at least C99, the nanotime library requires at least C99."
#endif
#else
#error "Current C or C++ standard is unknown, the nanotime library requires stdint.h and stdbool.h to be available (C99 or higher, C++11 or higher, Visual Studio 2010 or higher)."
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
* Implementor's note: This library directly uses Win32 APIs both for MSVC and
* MinGW GCC, as they work for both, and produce better behavior in MinGW
* builds. Detection of them is accomplished via checking if _WIN32 is defined,
* as it's defined in both MSVC and MinGW GCC. Though it's convenient to have
* UNIX-like APIs on Windows provided by MinGW, they just aren't as good as
* directly using Win32 APIs on Windows.
*/
#include <stdint.h>
#include <stdbool.h>
#include <assert.h>
#define NANOTIME_NSEC_PER_SEC UINT64_C(1000000000)
#ifndef NANOTIME_ONLY_STEP
/*
* Returns the current time since some unspecified epoch. With the exception of
* the standard C11 implementation and non-Apple/Mach kernel POSIX
* implementation when neither CLOCK_MONOTONIC_RAW nor CLOCK_MONOTONIC are
* available, the time values monotonically increase, so they're not equivalent
* to calendar time (i.e., no leap seconds are accounted for, etc.). Calendar
* time has to be used as a last resort sometimes, as monotonic time isn't
* always available.
*/
uint64_t nanotime_now();
/*
* Returns the maximum possible timestamp value. Use of this value is required
* to properly handle overflow of timestamp values, such as when calculating the
* interval between a time value before overflow and the next time value after
* overflow.
*/
uint64_t nanotime_now_max();
/*
* Sleeps the current thread for the requested count of nanoseconds. The slept
* duration may be less than, equal to, or greater than the time requested.
*/
void nanotime_sleep(uint64_t nsec_count);
/*
* Yield the CPU/core that called nanotime_yield to the operating system for a
* small time slice.
*/
void nanotime_yield();
#endif
/*
* Calculates the time interval between two nanosecond time values, correctly
* handling the case when the end time value overflows past max. You should
* probably use this function when calculating time intervals, as not all
* platforms' maximum timestamp value is UINT64_MAX, which is required for the
* trivial "end - start" formula for interval calculation to work as expected.
*/
uint64_t nanotime_interval(const uint64_t start, const uint64_t end, const uint64_t max);
typedef struct nanotime_step_data {
uint64_t sleep_duration;
uint64_t now_max;
uint64_t (* now)();
void (* sleep)(uint64_t nsec_count);
uint64_t zero_sleep_duration;
uint64_t accumulator;
uint64_t sleep_point;
} nanotime_step_data;
/*
* Initializes the nanotime precise fixed timestep object. Call immediately
* before entering the loop using the stepper object.
*/
void nanotime_step_init(
nanotime_step_data* const stepper,
const uint64_t sleep_duration,
const uint64_t now_max,
uint64_t (* const now)(),
void (* const sleep)(uint64_t nsec_count)
);
/*
* Does one step of sleeping for a fixed timestep logic update cycle. It makes
* a best-attempt at a precise delay per iteration, but might skip a cycle of
* sleeping if skipping sleeps is required to catch up to the correct
* wall-clock time. Returns true if a sleep up to the latest target sleep end
* time occurred, otherwise returns false in the case of a sleep step skip.
*/
bool nanotime_step(nanotime_step_data* const stepper);
#if !defined(NANOTIME_ONLY_STEP) && defined(NANOTIME_IMPLEMENTATION)
/*
* Non-portable, platform-specific implementations are first. If none of them
* match the current platform, the standard C/C++ versions are used as a last
* resort.
*/
/*
* Checking _WIN32 must be above the UNIX-like implementations, so MinGW is
* guaranteed to use it.
*/
#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <Windows.h>
#ifndef NANOTIME_NOW_IMPLEMENTED
uint64_t nanotime_now() {
static uint64_t scale = UINT64_C(0);
static bool multiply;
if (scale == 0u) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
if (frequency.QuadPart < NANOTIME_NSEC_PER_SEC) {
scale = NANOTIME_NSEC_PER_SEC / frequency.QuadPart;
multiply = true;
}
else {
scale = frequency.QuadPart / NANOTIME_NSEC_PER_SEC;
multiply = false;
}
}
LARGE_INTEGER performanceCount;
QueryPerformanceCounter(&performanceCount);
if (multiply) {
return performanceCount.QuadPart * scale;
}
else {
return performanceCount.QuadPart / scale;
}
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#ifndef NANOTIME_NOW_MAX_IMPLEMENTED
uint64_t nanotime_now_max() {
static uint64_t now_max;
if (now_max == UINT64_C(0)) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
if (frequency.QuadPart < NANOTIME_NSEC_PER_SEC) {
now_max = UINT64_MAX * (NANOTIME_NSEC_PER_SEC / frequency.QuadPart);
}
else {
now_max = UINT64_MAX / (frequency.QuadPart / NANOTIME_NSEC_PER_SEC);
}
}
return now_max;
}
#define NANOTIME_NOW_MAX_IMPLEMENTED
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
void nanotime_sleep(uint64_t nsec_count) {
LARGE_INTEGER dueTime;
if (nsec_count < UINT64_C(100)) {
/*
* Allows the OS to schedule another process for a single time
* slice. Better than a delay of 0, which immediately returns
* with no actual non-CPU-hogging delay. The time-slice-yield
* behavior is specified in Microsoft's Windows documentation.
*/
SleepEx(0UL, FALSE);
}
else {
HANDLE timer = NULL;
if (
#ifdef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
/*
* Requesting a high resolution timer can make quite the
* difference, so always request high resolution if available. It's
* available in Windows 10 1803 and above. This arrangement of
* building it if the build system supports it will allow the
* executable to use high resolution if available on a user's
* system, but revert to low resolution if the user's system
* doesn't support high resolution.
*/
(timer = CreateWaitableTimerEx(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS)) == NULL &&
#endif
(timer = CreateWaitableTimer(NULL, TRUE, NULL)) == NULL
) {
return;
}
dueTime.QuadPart = -(LONGLONG)(nsec_count / UINT64_C(100));
SetWaitableTimer(timer, &dueTime, 0L, NULL, NULL, FALSE);
WaitForSingleObject(timer, INFINITE);
CloseHandle(timer);
}
}
#define NANOTIME_SLEEP_IMPLEMENTED
#endif
#ifndef NANOTIME_YIELD_IMPLEMENTED
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <Windows.h>
void nanotime_yield() {
YieldProcessor();
}
#define NANOTIME_YIELD_IMPLEMENTED
#endif
#endif
/*
* To avoid using standard UNIX APIs on UNIX-like platforms, the
* platform-specific implementations must be first. That way, the
* lower-overhead kernel APIs can be used, that aren't UNIX-like.
*/
#ifndef NANOTIME_NOW_IMPLEMENTED
#if defined(__APPLE__) || defined(__MACH__)
/*
* The current platform is some Apple operating system, or at least uses some
* Mach kernel. The POSIX implementation below using clock_gettime works on at
* least Apple platforms, though this version using Mach functions has lower
* overhead.
*/
#include <mach/mach_time.h>
uint64_t nanotime_now() {
static mach_timebase_info_data_t info = { 0 };
if (info.denom == UINT32_C(0)) {
const kern_return_t status = mach_timebase_info(&info);
assert(status == KERN_SUCCESS);
if (status != KERN_SUCCESS) {
return UINT64_C(0);
}
}
return (mach_absolute_time() * info.numer) / info.denom;
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_NOW_MAX_IMPLEMENTED
#if defined(__APPLE__) || defined(__MACH__)
#include <mach/mach_time.h>
uint64_t nanotime_now_max() {
static uint64_t now_max = UINT64_C(0);
if (now_max == UINT64_C(0)) {
mach_timebase_info_data_t info;
const kern_return_t status = mach_timebase_info(&info);
assert(status == KERN_SUCCESS);
if (status != KERN_SUCCESS) {
return UINT64_C(0);
}
else {
now_max = UINT64_MAX / info.denom;
}
}
return now_max;
}
#define NANOTIME_NOW_MAX_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_NOW_IMPLEMENTED
#if defined(__unix__) && defined(_POSIX_VERSION) && (_POSIX_VERSION >= 199309L) && !defined(NANOTIME_NOW_IMPLEMENTED)
/*
* Current platform is some version of POSIX, that might have clock_gettime.
*/
#include <unistd.h>
#include <time.h>
#include <errno.h>
uint64_t nanotime_now() {
struct timespec now;
const int status = clock_gettime(
#if defined(CLOCK_MONOTONIC_RAW)
/*
* Monotonic raw is more precise, but not always available. For
* the sorts of applications this code is intended for, mainly
* soft real time applications such as game programming, the
* subtle inconsistencies of it vs. monotonic aren't an issue.
*/
CLOCK_MONOTONIC_RAW
#elif defined(CLOCK_MONOTONIC)
/*
* Monotonic is quite good, and widely available, but not as
* precise as monotonic raw, so it's only used if required.
*/
CLOCK_MONOTONIC
#else
/*
* Realtime isn't fully correct, as it's calendar time, but is
* even more widely available than monotonic. Monotonic is only
* unavailable on very old platforms though, so old they're
* likely unused now (as of last editing this, 2023).
*/
CLOCK_REALTIME
#endif
, &now);
assert(status == 0 || (status == -1 && errno != EOVERFLOW));
if (status == 0 || (status == -1 && errno != EOVERFLOW)) {
return (uint64_t)now.tv_sec * NANOTIME_NSEC_PER_SEC + (uint64_t)now.tv_nsec;
}
else {
return UINT64_C(0);
}
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__MINGW32__) || defined(__MINGW64__)
#include <unistd.h>
#include <time.h>
#include <errno.h>
void nanotime_sleep(uint64_t nsec_count) {
const struct timespec req = {
.tv_sec = (time_t)(nsec_count / NANOTIME_NSEC_PER_SEC),
.tv_nsec = (long)(nsec_count % NANOTIME_NSEC_PER_SEC)
};
#ifndef NDEBUG
const int status =
#endif
nanosleep(&req, NULL);
assert(status == 0 || (status == -1 && errno != EINVAL));
}
#define NANOTIME_SLEEP_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_YIELD_IMPLEMENTED
#if (defined(__unix__) || defined(__APPLE__)) && defined(_POSIX_VERSION) && (_POSIX_VERSION >= 200112L)
#include <sched.h>
void nanotime_yield() {
(void)sched_yield();
}
#define NANOTIME_YIELD_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_NOW_IMPLEMENTED
#if defined(__vita__)
#include <psp2/kernel/processmgr.h>
uint64_t nanotime_now() {
return sceKernelGetProcessTimeWide() * UINT64_C(1000);
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
#if defined(__vita__)
#include <psp2/kernel/processmgr.h>
void nanotime_sleep(uint64_t nsec_count) {
sceKernelDelayThreadCB(nsec_count / UINT64_C(1000));
}
#define NANOTIME_SLEEP_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
#ifdef __EMSCRIPTEN__
#include <emscripten.h>
/*
* NOTE: You *must* have asyncify enabled in the Emscripten build (pass
* -sASYNCIFY to the compiler/linker) or sleeping won't work.
*/
void nanotime_sleep(uint64_t nsec_count) {
emscripten_sleep(nsec_count / UINT64_C(1000000));
}
#define NANOTIME_SLEEP_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_NOW_IMPLEMENTED
#ifdef __EMSCRIPTEN__
#include <emscripten.h>
uint64_t nanotime_now() {
const double now = emscripten_get_now();
return (uint64_t)now * UINT64_C(1000000);
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
#ifdef __SWITCH__
#include <switch.h>
void nanotime_sleep(uint64_t nsec_count) {
if (nsec_count > INT64_MAX) {
svcSleepThread(INT64_MAX);
}
else {
svcSleepThread((s64)nsec_count);
}
}
#define NANOTIME_SLEEP_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_NOW_IMPLEMENTED
#ifdef __SWITCH__
#include <switch.h>
uint64_t nanotime_now() {
return armTicksToNs(armGetSystemTick());
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_YIELD_IMPLEMENTED
#if defined(__SWITCH__)
#include <switch.h>
void nanotime_yield() {
svcSleepThread(YieldType_ToAnyThread);
}
#define NANOTIME_YIELD_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_NOW_IMPLEMENTED
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
#include <time.h>
uint64_t nanotime_now() {
struct timespec now;
const int status = timespec_get(&now, TIME_UTC);
assert(status == TIME_UTC);
if (status == TIME_UTC) {
return (uint64_t)now.tv_sec * NANOTIME_NSEC_PER_SEC + (uint64_t)now.tv_nsec;
}
else {
return UINT64_C(0);
}
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__)
#include <threads.h>
void nanotime_sleep(uint64_t nsec_count) {
const struct timespec req = {
.tv_sec = (time_t)(nsec_count / NANOTIME_NSEC_PER_SEC),
.tv_nsec = (long)(nsec_count % NANOTIME_NSEC_PER_SEC)
};
const int status = thrd_sleep(&req, NULL);
assert(status == 0 || status == -1);
}
#define NANOTIME_SLEEP_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_YIELD_IMPLEMENTED
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__)
#include <threads.h>
void nanotime_yield() {
thrd_yield();
}
#define NANOTIME_YIELD_IMPLEMENTED
#endif
#endif
#ifdef __cplusplus
}
#endif
/*
* C++ implementations follow here, but defined with C linkage.
*/
#ifndef NANOTIME_NOW_IMPLEMENTED
#ifdef __cplusplus
#include <cstdint>
#include <chrono>
extern "C" uint64_t nanotime_now() {
return static_cast<uint64_t>(
std::chrono::time_point_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now()
).time_since_epoch().count()
);
}
#define NANOTIME_NOW_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
#ifdef __cplusplus
#include <cstdint>
#include <thread>
#include <exception>
extern "C" void nanotime_sleep(uint64_t nsec_count) {
try {
std::this_thread::sleep_for(std::chrono::nanoseconds(nsec_count));
}
catch (std::exception e) {
}
}
#define NANOTIME_SLEEP_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_YIELD_IMPLEMENTED
#ifdef __cplusplus
#include <thread>
extern "C" void nanotime_yield() {
std::this_thread::yield();
}
#define NANOTIME_YIELD_IMPLEMENTED
#endif
#endif
#ifndef NANOTIME_NOW_IMPLEMENTED
#error "Failed to implement nanotime_now (try using C11 with C11 threads support or C++11)."
#endif
#ifndef NANOTIME_SLEEP_IMPLEMENTED
#error "Failed to implement nanotime_sleep (try using C11 with C11 threads support or C++11)."
#endif
#ifndef NANOTIME_YIELD_IMPLEMENTED
#ifdef __cplusplus
extern "C" {
#endif
/*
* As a last resort, make a zero-duration sleep request to implement yield.
* Such sleep requests often have the desired yielding behavior on many
* platforms.
*/
void nanotime_yield() {
nanotime_sleep(0u);
}
#define NANOTIME_YIELD_IMPLEMENTED
#ifdef __cplusplus
}
#endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifndef NANOTIME_NOW_MAX_IMPLEMENTED
/*
* Might not be correct on some platforms, but it's the best we can do as a last
* resort.
*/
uint64_t nanotime_now_max() {
return UINT64_MAX;
}
#define NANOTIME_NOW_MAX_IMPLEMENTED
#endif
#endif
#ifdef NANOTIME_IMPLEMENTATION
uint64_t nanotime_interval(const uint64_t start, const uint64_t end, const uint64_t max) {
assert(max > UINT64_C(0));
assert(start <= max);
assert(end <= max);
if (end >= start) {
return end - start;
}
else {
return end + (max - start) + UINT64_C(1);
}
}
void nanotime_step_init(
nanotime_step_data* const stepper,
const uint64_t sleep_duration,
const uint64_t now_max,
uint64_t (* const now)(),
void (* const sleep)(uint64_t nsec_count)
) {
assert(stepper != NULL);
assert(sleep_duration > UINT64_C(0));
assert(now_max > UINT64_C(0));
assert(now != NULL);
assert(sleep != NULL);
stepper->sleep_duration = sleep_duration;
stepper->now_max = now_max;
stepper->now = now;
stepper->sleep = sleep;
const uint64_t start = now();
sleep(UINT64_C(0));
stepper->zero_sleep_duration = nanotime_interval(start, now(), now_max);
stepper->accumulator = UINT64_C(0);
/*
* This should be last here, so the sleep point is close to what it
* should be.
*/
stepper->sleep_point = now();
}
bool nanotime_step(nanotime_step_data* const stepper) {
assert(stepper != NULL);
const uint64_t start_point = stepper->now();
if (nanotime_interval(stepper->sleep_point, start_point, stepper->now_max) >= stepper->sleep_duration + NANOTIME_NSEC_PER_SEC / UINT64_C(10)) {
stepper->sleep_point = start_point;
stepper->accumulator = UINT64_C(0);
}
bool slept;
if (stepper->accumulator < stepper->sleep_duration) {
const uint64_t total_sleep_duration = stepper->sleep_duration - stepper->accumulator;
uint64_t current_sleep_duration = total_sleep_duration;
const uint64_t shift = UINT64_C(4);
/*
* The algorithm implemented here takes the assumption that a
* sequence of repeated sleep requests of the same requested
* duration end up being approximately of equal actual sleep
* duration, even if they're all well above the requested
* duration. In practice, such an assumption proves out to be
* true on various platforms.
*/
/*
* A big initial sleep lowers power usage on any platform, as
* more small sleep requests use more power than fewer bigger,
* equivalent sleep requests. In practice, operating systems
* "actually sleep" when 1ms or more is requested, and 1ms is
* the minimum request duration you can make on some platforms
* (like older versions of Windows). Additionally, power usage
* is nice and low when doing the number of 1ms sleeps that's
* (hopefully) short of the target duration.
*
* But, the loop here maintains a maximum of the actual slept
* durations, breaking out when the time remaining is greater
* than or equal to the maximum found. By breaking out on the
* maximum found rather than just 1ms-or-less remaining,
* sleeping beyond the target deadline is reduced.
*/
{
uint64_t max = NANOTIME_NSEC_PER_SEC / UINT64_C(1000);
uint64_t start = stepper->now();
while (nanotime_interval(stepper->sleep_point, start, stepper->now_max) + max < total_sleep_duration) {
stepper->sleep(NANOTIME_NSEC_PER_SEC / UINT64_C(1000));
const uint64_t next = stepper->now();
const uint64_t current_interval = nanotime_interval(start, next, stepper->now_max);
if (current_interval > max) {
max = current_interval;
}
start = next;
}
const uint64_t initial_duration = nanotime_interval(start_point, stepper->now(), stepper->now_max);
if (initial_duration < current_sleep_duration) {
current_sleep_duration -= initial_duration;
}
else {
goto step_end;
}
}
/*
* This has the flavor of Zeno's dichotomous paradox of motion,
* as it successively divides the time remaining to sleep, but
* attempts to stop short of the deadline to hopefully be able
* to precisely sleep up to the deadline below this loop. The
* divisor is larger than two though, as it produces better
* behavior, and seems to work fine in testing on real
* hardware. The same method of keeping track of the max
* duration per loop of same sleep request durations above is
* used here. The overshoot possible in the loop below this one
* won't overshoot much, or in the best case won't overshoot,
* so the busyloop can finish up the sleep precisely.
*/
current_sleep_duration >>= shift;
for (
uint64_t max = stepper->zero_sleep_duration;
nanotime_interval(stepper->sleep_point, stepper->now(), stepper->now_max) + max < total_sleep_duration && current_sleep_duration > UINT64_C(0);
current_sleep_duration >>= shift
) {
max = stepper->zero_sleep_duration;
uint64_t start;
while (max < stepper->sleep_duration && nanotime_interval(stepper->sleep_point, start = stepper->now(), stepper->now_max) + max < total_sleep_duration) {
stepper->sleep(current_sleep_duration);
uint64_t slept_duration;
if ((slept_duration = nanotime_interval(start, stepper->now(), stepper->now_max)) > max) {
max = slept_duration;
}
}
}
if (nanotime_interval(stepper->sleep_point, stepper->now(), stepper->now_max) >= total_sleep_duration) {
goto step_end;
}
{
/*
* After (hopefully) stopping short of the deadline by
* a small amount, do small sleeps here to get closer
* to the deadline, but again attempting to stop short
* by an even smaller amount. It's best to do larger
* sleeps as done in the above loops, to reduce
* CPU/power usage, as each sleep iteration has a
* more-or-less fixed overhead of CPU/power usage.
*
* In testing on an M1 Mac mini running macOS, power
* usage is lower using zero-duration sleeps vs.
* nanotime_yield(), with no loss of timing precision.
* The same might be true for other hardwares/operating
* systems.
*/
uint64_t max = stepper->zero_sleep_duration;
uint64_t start;
while (nanotime_interval(stepper->sleep_point, start = stepper->now(), stepper->now_max) + max < total_sleep_duration) {
stepper->sleep(UINT64_C(0));
if ((stepper->zero_sleep_duration = nanotime_interval(start, stepper->now(), stepper->now_max)) > max) {
max = stepper->zero_sleep_duration;
}
}
}
step_end:
{
/*
* Finally, do a busyloop to precisely sleep up to the
* deadline. The code above this loop attempts to
* reduce the remaining time to sleep to a minimum via
* process-yielding sleeps, so the amount of time spent
* spinning here is hopefully quite low.
*
* In testing on an M1 Mac mini running macOS,
* busylooping here produces the absolute greatest
* precision possible on the hardware, down to the
* sub-10ns-off-per-update range for longish stretches
* during 60 Hz updates, but in the
* hundreds-to-thousands of nanoseconds off when using
* nanotime_yield() or zero-duration sleeps. And,
* because the sleeping algorithm above does such a
* good job of stopping very close to the deadline,
* busylooping here has basically negligible difference
* in power usage vs. yields/zero-duration sleeps.
*/
uint64_t current_time;
uint64_t accumulated;
while ((accumulated = nanotime_interval(stepper->sleep_point, current_time = stepper->now(), stepper->now_max)) < total_sleep_duration);
stepper->accumulator += accumulated;
stepper->sleep_point = current_time;
slept = true;
}
}
else {
slept = false;
}
stepper->accumulator -= stepper->sleep_duration;
return slept;
}
#endif
#ifdef __cplusplus
}
#endif
#endif /* _include_guard_nanotime_ */