#ifndef _include_guard_nanotime_ #define _include_guard_nanotime_ /* * You can choose this license, if possible in your jurisdiction: * * Unlicense * * This is free and unencumbered software released into the public domain. * * Anyone is free to copy, modify, publish, use, compile, sell, or distribute * this software, either in source code form or as a compiled binary, for any * purpose, commercial or non-commercial, and by any means. * * In jurisdictions that recognize copyright laws, the author or authors of * this software dedicate any and all copyright interest in the software to the * public domain. We make this dedication for the benefit of the public at * large and to the detriment of our heirs and successors. We intend this * dedication to be an overt act of relinquishment in perpetuity of all present * and future rights to this software under copyright law. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * For more information, please refer to * * * Alternative license choice, if works can't be directly submitted to the * public domain in your jurisdiction: * * The MIT License (MIT) * * Copyright (C) 2022 Brandon McGriff * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #if defined(_MSC_VER) #if (_MSC_VER < 1600) #error "Current Visual Studio version is not at least Visual Studio 2010, the nanotime library requires at least 2010." #endif #elif defined(__cplusplus) #if (__cplusplus < 201103L) #error "Current C++ standard is not at least C++11, the nanotime library requires at least C++11." #endif #elif defined(__STDC_VERSION__) #if (__STDC_VERSION__ < 199901L) #error "Current C standard is not at least C99, the nanotime library requires at least C99." #endif #else #error "Current C or C++ standard is unknown, the nanotime library requires stdint.h and stdbool.h to be available (C99 or higher, C++11 or higher, Visual Studio 2010 or higher)." #endif #ifdef __cplusplus extern "C" { #endif /* * Implementor's note: This library directly uses Win32 APIs both for MSVC and * MinGW GCC, as they work for both, and produce better behavior in MinGW * builds. Detection of them is accomplished via checking if _WIN32 is defined, * as it's defined in both MSVC and MinGW GCC. Though it's convenient to have * UNIX-like APIs on Windows provided by MinGW, they just aren't as good as * directly using Win32 APIs on Windows. */ #include #include #include #define NANOTIME_NSEC_PER_SEC UINT64_C(1000000000) #ifndef NANOTIME_ONLY_STEP /* * Returns the current time since some unspecified epoch. With the exception of * the standard C11 implementation and non-Apple/Mach kernel POSIX * implementation when neither CLOCK_MONOTONIC_RAW nor CLOCK_MONOTONIC are * available, the time values monotonically increase, so they're not equivalent * to calendar time (i.e., no leap seconds are accounted for, etc.). Calendar * time has to be used as a last resort sometimes, as monotonic time isn't * always available. */ uint64_t nanotime_now(); /* * Returns the maximum possible timestamp value. Use of this value is required * to properly handle overflow of timestamp values, such as when calculating the * interval between a time value before overflow and the next time value after * overflow. */ uint64_t nanotime_now_max(); /* * Sleeps the current thread for the requested count of nanoseconds. The slept * duration may be less than, equal to, or greater than the time requested. */ void nanotime_sleep(uint64_t nsec_count); /* * Yield the CPU/core that called nanotime_yield to the operating system for a * small time slice. */ void nanotime_yield(); #endif /* * Calculates the time interval between two nanosecond time values, correctly * handling the case when the end time value overflows past max. You should * probably use this function when calculating time intervals, as not all * platforms' maximum timestamp value is UINT64_MAX, which is required for the * trivial "end - start" formula for interval calculation to work as expected. */ uint64_t nanotime_interval(const uint64_t start, const uint64_t end, const uint64_t max); typedef struct nanotime_step_data { uint64_t sleep_duration; uint64_t now_max; uint64_t (* now)(); void (* sleep)(uint64_t nsec_count); uint64_t zero_sleep_duration; uint64_t accumulator; uint64_t sleep_point; } nanotime_step_data; /* * Initializes the nanotime precise fixed timestep object. Call immediately * before entering the loop using the stepper object. */ void nanotime_step_init( nanotime_step_data* const stepper, const uint64_t sleep_duration, const uint64_t now_max, uint64_t (* const now)(), void (* const sleep)(uint64_t nsec_count) ); /* * Does one step of sleeping for a fixed timestep logic update cycle. It makes * a best-attempt at a precise delay per iteration, but might skip a cycle of * sleeping if skipping sleeps is required to catch up to the correct * wall-clock time. Returns true if a sleep up to the latest target sleep end * time occurred, otherwise returns false in the case of a sleep step skip. */ bool nanotime_step(nanotime_step_data* const stepper); #if !defined(NANOTIME_ONLY_STEP) && defined(NANOTIME_IMPLEMENTATION) /* * Non-portable, platform-specific implementations are first. If none of them * match the current platform, the standard C/C++ versions are used as a last * resort. */ /* * Checking _WIN32 must be above the UNIX-like implementations, so MinGW is * guaranteed to use it. */ #ifdef _WIN32 #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include #ifndef NANOTIME_NOW_IMPLEMENTED uint64_t nanotime_now() { static uint64_t scale = UINT64_C(0); static bool multiply; if (scale == 0u) { LARGE_INTEGER frequency; QueryPerformanceFrequency(&frequency); if (frequency.QuadPart < NANOTIME_NSEC_PER_SEC) { scale = NANOTIME_NSEC_PER_SEC / frequency.QuadPart; multiply = true; } else { scale = frequency.QuadPart / NANOTIME_NSEC_PER_SEC; multiply = false; } } LARGE_INTEGER performanceCount; QueryPerformanceCounter(&performanceCount); if (multiply) { return performanceCount.QuadPart * scale; } else { return performanceCount.QuadPart / scale; } } #define NANOTIME_NOW_IMPLEMENTED #endif #ifndef NANOTIME_NOW_MAX_IMPLEMENTED uint64_t nanotime_now_max() { static uint64_t now_max; if (now_max == UINT64_C(0)) { LARGE_INTEGER frequency; QueryPerformanceFrequency(&frequency); if (frequency.QuadPart < NANOTIME_NSEC_PER_SEC) { now_max = UINT64_MAX * (NANOTIME_NSEC_PER_SEC / frequency.QuadPart); } else { now_max = UINT64_MAX / (frequency.QuadPart / NANOTIME_NSEC_PER_SEC); } } return now_max; } #define NANOTIME_NOW_MAX_IMPLEMENTED #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED void nanotime_sleep(uint64_t nsec_count) { LARGE_INTEGER dueTime; if (nsec_count < UINT64_C(100)) { /* * Allows the OS to schedule another process for a single time * slice. Better than a delay of 0, which immediately returns * with no actual non-CPU-hogging delay. The time-slice-yield * behavior is specified in Microsoft's Windows documentation. */ SleepEx(0UL, FALSE); } else { HANDLE timer = NULL; if ( #ifdef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION /* * Requesting a high resolution timer can make quite the * difference, so always request high resolution if available. It's * available in Windows 10 1803 and above. This arrangement of * building it if the build system supports it will allow the * executable to use high resolution if available on a user's * system, but revert to low resolution if the user's system * doesn't support high resolution. */ (timer = CreateWaitableTimerEx(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS)) == NULL && #endif (timer = CreateWaitableTimer(NULL, TRUE, NULL)) == NULL ) { return; } dueTime.QuadPart = -(LONGLONG)(nsec_count / UINT64_C(100)); SetWaitableTimer(timer, &dueTime, 0L, NULL, NULL, FALSE); WaitForSingleObject(timer, INFINITE); CloseHandle(timer); } } #define NANOTIME_SLEEP_IMPLEMENTED #endif #ifndef NANOTIME_YIELD_IMPLEMENTED #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include void nanotime_yield() { YieldProcessor(); } #define NANOTIME_YIELD_IMPLEMENTED #endif #endif /* * To avoid using standard UNIX APIs on UNIX-like platforms, the * platform-specific implementations must be first. That way, the * lower-overhead kernel APIs can be used, that aren't UNIX-like. */ #ifndef NANOTIME_NOW_IMPLEMENTED #if defined(__APPLE__) || defined(__MACH__) /* * The current platform is some Apple operating system, or at least uses some * Mach kernel. The POSIX implementation below using clock_gettime works on at * least Apple platforms, though this version using Mach functions has lower * overhead. */ #include uint64_t nanotime_now() { static mach_timebase_info_data_t info = { 0 }; if (info.denom == UINT32_C(0)) { const kern_return_t status = mach_timebase_info(&info); assert(status == KERN_SUCCESS); if (status != KERN_SUCCESS) { return UINT64_C(0); } } return (mach_absolute_time() * info.numer) / info.denom; } #define NANOTIME_NOW_IMPLEMENTED #endif #endif #ifndef NANOTIME_NOW_MAX_IMPLEMENTED #if defined(__APPLE__) || defined(__MACH__) #include uint64_t nanotime_now_max() { static uint64_t now_max = UINT64_C(0); if (now_max == UINT64_C(0)) { mach_timebase_info_data_t info; const kern_return_t status = mach_timebase_info(&info); assert(status == KERN_SUCCESS); if (status != KERN_SUCCESS) { return UINT64_C(0); } else { now_max = UINT64_MAX / info.denom; } } return now_max; } #define NANOTIME_NOW_MAX_IMPLEMENTED #endif #endif #ifndef NANOTIME_NOW_IMPLEMENTED #if defined(__unix__) && defined(_POSIX_VERSION) && (_POSIX_VERSION >= 199309L) && !defined(NANOTIME_NOW_IMPLEMENTED) /* * Current platform is some version of POSIX, that might have clock_gettime. */ #include #include #include uint64_t nanotime_now() { struct timespec now; const int status = clock_gettime( #if defined(CLOCK_MONOTONIC_RAW) /* * Monotonic raw is more precise, but not always available. For * the sorts of applications this code is intended for, mainly * soft real time applications such as game programming, the * subtle inconsistencies of it vs. monotonic aren't an issue. */ CLOCK_MONOTONIC_RAW #elif defined(CLOCK_MONOTONIC) /* * Monotonic is quite good, and widely available, but not as * precise as monotonic raw, so it's only used if required. */ CLOCK_MONOTONIC #else /* * Realtime isn't fully correct, as it's calendar time, but is * even more widely available than monotonic. Monotonic is only * unavailable on very old platforms though, so old they're * likely unused now (as of last editing this, 2023). */ CLOCK_REALTIME #endif , &now); assert(status == 0 || (status == -1 && errno != EOVERFLOW)); if (status == 0 || (status == -1 && errno != EOVERFLOW)) { return (uint64_t)now.tv_sec * NANOTIME_NSEC_PER_SEC + (uint64_t)now.tv_nsec; } else { return UINT64_C(0); } } #define NANOTIME_NOW_IMPLEMENTED #endif #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__MINGW32__) || defined(__MINGW64__) #include #include #include void nanotime_sleep(uint64_t nsec_count) { const struct timespec req = { .tv_sec = (time_t)(nsec_count / NANOTIME_NSEC_PER_SEC), .tv_nsec = (long)(nsec_count % NANOTIME_NSEC_PER_SEC) }; #ifndef NDEBUG const int status = #endif nanosleep(&req, NULL); assert(status == 0 || (status == -1 && errno != EINVAL)); } #define NANOTIME_SLEEP_IMPLEMENTED #endif #endif #ifndef NANOTIME_YIELD_IMPLEMENTED #if (defined(__unix__) || defined(__APPLE__)) && defined(_POSIX_VERSION) && (_POSIX_VERSION >= 200112L) #include void nanotime_yield() { (void)sched_yield(); } #define NANOTIME_YIELD_IMPLEMENTED #endif #endif #ifndef NANOTIME_NOW_IMPLEMENTED #if defined(__vita__) #include uint64_t nanotime_now() { return sceKernelGetProcessTimeWide() * UINT64_C(1000); } #define NANOTIME_NOW_IMPLEMENTED #endif #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED #if defined(__vita__) #include void nanotime_sleep(uint64_t nsec_count) { sceKernelDelayThreadCB(nsec_count / UINT64_C(1000)); } #define NANOTIME_SLEEP_IMPLEMENTED #endif #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED #ifdef __EMSCRIPTEN__ #include /* * NOTE: You *must* have asyncify enabled in the Emscripten build (pass * -sASYNCIFY to the compiler/linker) or sleeping won't work. */ void nanotime_sleep(uint64_t nsec_count) { emscripten_sleep(nsec_count / UINT64_C(1000000)); } #define NANOTIME_SLEEP_IMPLEMENTED #endif #endif #ifndef NANOTIME_NOW_IMPLEMENTED #ifdef __EMSCRIPTEN__ #include uint64_t nanotime_now() { const double now = emscripten_get_now(); return (uint64_t)now * UINT64_C(1000000); } #define NANOTIME_NOW_IMPLEMENTED #endif #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED #ifdef __SWITCH__ #include void nanotime_sleep(uint64_t nsec_count) { if (nsec_count > INT64_MAX) { svcSleepThread(INT64_MAX); } else { svcSleepThread((s64)nsec_count); } } #define NANOTIME_SLEEP_IMPLEMENTED #endif #endif #ifndef NANOTIME_NOW_IMPLEMENTED #ifdef __SWITCH__ #include uint64_t nanotime_now() { return armTicksToNs(armGetSystemTick()); } #define NANOTIME_NOW_IMPLEMENTED #endif #endif #ifndef NANOTIME_YIELD_IMPLEMENTED #if defined(__SWITCH__) #include void nanotime_yield() { svcSleepThread(YieldType_ToAnyThread); } #define NANOTIME_YIELD_IMPLEMENTED #endif #endif #ifndef NANOTIME_NOW_IMPLEMENTED #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) #include uint64_t nanotime_now() { struct timespec now; const int status = timespec_get(&now, TIME_UTC); assert(status == TIME_UTC); if (status == TIME_UTC) { return (uint64_t)now.tv_sec * NANOTIME_NSEC_PER_SEC + (uint64_t)now.tv_nsec; } else { return UINT64_C(0); } } #define NANOTIME_NOW_IMPLEMENTED #endif #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__) #include void nanotime_sleep(uint64_t nsec_count) { const struct timespec req = { .tv_sec = (time_t)(nsec_count / NANOTIME_NSEC_PER_SEC), .tv_nsec = (long)(nsec_count % NANOTIME_NSEC_PER_SEC) }; const int status = thrd_sleep(&req, NULL); assert(status == 0 || status == -1); } #define NANOTIME_SLEEP_IMPLEMENTED #endif #endif #ifndef NANOTIME_YIELD_IMPLEMENTED #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__) #include void nanotime_yield() { thrd_yield(); } #define NANOTIME_YIELD_IMPLEMENTED #endif #endif #ifdef __cplusplus } #endif /* * C++ implementations follow here, but defined with C linkage. */ #ifndef NANOTIME_NOW_IMPLEMENTED #ifdef __cplusplus #include #include extern "C" uint64_t nanotime_now() { return static_cast( std::chrono::time_point_cast( std::chrono::steady_clock::now() ).time_since_epoch().count() ); } #define NANOTIME_NOW_IMPLEMENTED #endif #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED #ifdef __cplusplus #include #include #include extern "C" void nanotime_sleep(uint64_t nsec_count) { try { std::this_thread::sleep_for(std::chrono::nanoseconds(nsec_count)); } catch (std::exception e) { } } #define NANOTIME_SLEEP_IMPLEMENTED #endif #endif #ifndef NANOTIME_YIELD_IMPLEMENTED #ifdef __cplusplus #include extern "C" void nanotime_yield() { std::this_thread::yield(); } #define NANOTIME_YIELD_IMPLEMENTED #endif #endif #ifndef NANOTIME_NOW_IMPLEMENTED #error "Failed to implement nanotime_now (try using C11 with C11 threads support or C++11)." #endif #ifndef NANOTIME_SLEEP_IMPLEMENTED #error "Failed to implement nanotime_sleep (try using C11 with C11 threads support or C++11)." #endif #ifndef NANOTIME_YIELD_IMPLEMENTED #ifdef __cplusplus extern "C" { #endif /* * As a last resort, make a zero-duration sleep request to implement yield. * Such sleep requests often have the desired yielding behavior on many * platforms. */ void nanotime_yield() { nanotime_sleep(0u); } #define NANOTIME_YIELD_IMPLEMENTED #ifdef __cplusplus } #endif #endif #ifdef __cplusplus extern "C" { #endif #ifndef NANOTIME_NOW_MAX_IMPLEMENTED /* * Might not be correct on some platforms, but it's the best we can do as a last * resort. */ uint64_t nanotime_now_max() { return UINT64_MAX; } #define NANOTIME_NOW_MAX_IMPLEMENTED #endif #endif #ifdef NANOTIME_IMPLEMENTATION uint64_t nanotime_interval(const uint64_t start, const uint64_t end, const uint64_t max) { assert(max > UINT64_C(0)); assert(start <= max); assert(end <= max); if (end >= start) { return end - start; } else { return end + (max - start) + UINT64_C(1); } } void nanotime_step_init( nanotime_step_data* const stepper, const uint64_t sleep_duration, const uint64_t now_max, uint64_t (* const now)(), void (* const sleep)(uint64_t nsec_count) ) { assert(stepper != NULL); assert(sleep_duration > UINT64_C(0)); assert(now_max > UINT64_C(0)); assert(now != NULL); assert(sleep != NULL); stepper->sleep_duration = sleep_duration; stepper->now_max = now_max; stepper->now = now; stepper->sleep = sleep; const uint64_t start = now(); sleep(UINT64_C(0)); stepper->zero_sleep_duration = nanotime_interval(start, now(), now_max); stepper->accumulator = UINT64_C(0); /* * This should be last here, so the sleep point is close to what it * should be. */ stepper->sleep_point = now(); } bool nanotime_step(nanotime_step_data* const stepper) { assert(stepper != NULL); const uint64_t start_point = stepper->now(); if (nanotime_interval(stepper->sleep_point, start_point, stepper->now_max) >= stepper->sleep_duration + NANOTIME_NSEC_PER_SEC / UINT64_C(10)) { stepper->sleep_point = start_point; stepper->accumulator = UINT64_C(0); } bool slept; if (stepper->accumulator < stepper->sleep_duration) { const uint64_t total_sleep_duration = stepper->sleep_duration - stepper->accumulator; uint64_t current_sleep_duration = total_sleep_duration; const uint64_t shift = UINT64_C(4); /* * The algorithm implemented here takes the assumption that a * sequence of repeated sleep requests of the same requested * duration end up being approximately of equal actual sleep * duration, even if they're all well above the requested * duration. In practice, such an assumption proves out to be * true on various platforms. */ /* * A big initial sleep lowers power usage on any platform, as * more small sleep requests use more power than fewer bigger, * equivalent sleep requests. In practice, operating systems * "actually sleep" when 1ms or more is requested, and 1ms is * the minimum request duration you can make on some platforms * (like older versions of Windows). Additionally, power usage * is nice and low when doing the number of 1ms sleeps that's * (hopefully) short of the target duration. * * But, the loop here maintains a maximum of the actual slept * durations, breaking out when the time remaining is greater * than or equal to the maximum found. By breaking out on the * maximum found rather than just 1ms-or-less remaining, * sleeping beyond the target deadline is reduced. */ { uint64_t max = NANOTIME_NSEC_PER_SEC / UINT64_C(1000); uint64_t start = stepper->now(); while (nanotime_interval(stepper->sleep_point, start, stepper->now_max) + max < total_sleep_duration) { stepper->sleep(NANOTIME_NSEC_PER_SEC / UINT64_C(1000)); const uint64_t next = stepper->now(); const uint64_t current_interval = nanotime_interval(start, next, stepper->now_max); if (current_interval > max) { max = current_interval; } start = next; } const uint64_t initial_duration = nanotime_interval(start_point, stepper->now(), stepper->now_max); if (initial_duration < current_sleep_duration) { current_sleep_duration -= initial_duration; } else { goto step_end; } } /* * This has the flavor of Zeno's dichotomous paradox of motion, * as it successively divides the time remaining to sleep, but * attempts to stop short of the deadline to hopefully be able * to precisely sleep up to the deadline below this loop. The * divisor is larger than two though, as it produces better * behavior, and seems to work fine in testing on real * hardware. The same method of keeping track of the max * duration per loop of same sleep request durations above is * used here. The overshoot possible in the loop below this one * won't overshoot much, or in the best case won't overshoot, * so the busyloop can finish up the sleep precisely. */ current_sleep_duration >>= shift; for ( uint64_t max = stepper->zero_sleep_duration; nanotime_interval(stepper->sleep_point, stepper->now(), stepper->now_max) + max < total_sleep_duration && current_sleep_duration > UINT64_C(0); current_sleep_duration >>= shift ) { max = stepper->zero_sleep_duration; uint64_t start; while (max < stepper->sleep_duration && nanotime_interval(stepper->sleep_point, start = stepper->now(), stepper->now_max) + max < total_sleep_duration) { stepper->sleep(current_sleep_duration); uint64_t slept_duration; if ((slept_duration = nanotime_interval(start, stepper->now(), stepper->now_max)) > max) { max = slept_duration; } } } if (nanotime_interval(stepper->sleep_point, stepper->now(), stepper->now_max) >= total_sleep_duration) { goto step_end; } { /* * After (hopefully) stopping short of the deadline by * a small amount, do small sleeps here to get closer * to the deadline, but again attempting to stop short * by an even smaller amount. It's best to do larger * sleeps as done in the above loops, to reduce * CPU/power usage, as each sleep iteration has a * more-or-less fixed overhead of CPU/power usage. * * In testing on an M1 Mac mini running macOS, power * usage is lower using zero-duration sleeps vs. * nanotime_yield(), with no loss of timing precision. * The same might be true for other hardwares/operating * systems. */ uint64_t max = stepper->zero_sleep_duration; uint64_t start; while (nanotime_interval(stepper->sleep_point, start = stepper->now(), stepper->now_max) + max < total_sleep_duration) { stepper->sleep(UINT64_C(0)); if ((stepper->zero_sleep_duration = nanotime_interval(start, stepper->now(), stepper->now_max)) > max) { max = stepper->zero_sleep_duration; } } } step_end: { /* * Finally, do a busyloop to precisely sleep up to the * deadline. The code above this loop attempts to * reduce the remaining time to sleep to a minimum via * process-yielding sleeps, so the amount of time spent * spinning here is hopefully quite low. * * In testing on an M1 Mac mini running macOS, * busylooping here produces the absolute greatest * precision possible on the hardware, down to the * sub-10ns-off-per-update range for longish stretches * during 60 Hz updates, but in the * hundreds-to-thousands of nanoseconds off when using * nanotime_yield() or zero-duration sleeps. And, * because the sleeping algorithm above does such a * good job of stopping very close to the deadline, * busylooping here has basically negligible difference * in power usage vs. yields/zero-duration sleeps. */ uint64_t current_time; uint64_t accumulated; while ((accumulated = nanotime_interval(stepper->sleep_point, current_time = stepper->now(), stepper->now_max)) < total_sleep_duration); stepper->accumulator += accumulated; stepper->sleep_point = current_time; slept = true; } } else { slept = false; } stepper->accumulator -= stepper->sleep_duration; return slept; } #endif #ifdef __cplusplus } #endif #endif /* _include_guard_nanotime_ */