<pre style='margin:0'>
Christopher Nielsen (mascguy) pushed a commit to branch master
in repository macports-legacy-support.
</pre>
<p><a href="https://github.com/macports/macports-legacy-support/commit/f462618cbefb5bc313e2ce9b47963469ea04fb0e">https://github.com/macports/macports-legacy-support/commit/f462618cbefb5bc313e2ce9b47963469ea04fb0e</a></p>
<pre style="white-space: pre; background: #F8F8F8"><span style='display:block; white-space:pre;color:#808000;'>commit f462618cbefb5bc313e2ce9b47963469ea04fb0e
</span>Author: Fred Wright <fw@fwright.net>
AuthorDate: Sun Feb 2 15:35:15 2025 -0800
<span style='display:block; white-space:pre;color:#404040;'> Rewrite mach_time scaling.
</span><span style='display:block; white-space:pre;color:#404040;'>
</span><span style='display:block; white-space:pre;color:#404040;'> This replaces all code related to scaling mach_time values with a new
</span><span style='display:block; white-space:pre;color:#404040;'> version which is both more accurate* and faster than the old version.
</span><span style='display:block; white-space:pre;color:#404040;'>
</span><span style='display:block; white-space:pre;color:#404040;'> * - The x86 case, with its 1/1 scale factor, was always accurate.
</span><span style='display:block; white-space:pre;color:#404040;'>
</span><span style='display:block; white-space:pre;color:#404040;'> TESTED:
</span><span style='display:block; white-space:pre;color:#404040;'> Passes new clock test on all platforms, including no longer reporting
</span><span style='display:block; white-space:pre;color:#404040;'> bad accuracy on PPC.
</span>---
src/time.c | 302 ++++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 257 insertions(+), 45 deletions(-)
<span style='display:block; white-space:pre;color:#808080;'>diff --git a/src/time.c b/src/time.c
</span><span style='display:block; white-space:pre;color:#808080;'>index 437c017..03e3142 100644
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>--- a/src/time.c
</span><span style='display:block; white-space:pre;background:#e0e0ff;'>+++ b/src/time.c
</span><span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -21,6 +21,7 @@
</span> #if __MPLS_LIB_SUPPORT_GETTIME__
#include <errno.h>
<span style='display:block; white-space:pre;background:#e0ffe0;'>+#include <math.h>
</span> #include <stddef.h>
#include <time.h>
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -33,6 +34,7 @@
</span> #include <mach/mach_time.h>
#include <mach/thread_act.h>
<span style='display:block; white-space:pre;background:#e0ffe0;'>+/* Constants for scaling time values */
</span> #define BILLION32 1000000000U
#define BILLION64 1000000000ULL
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -93,6 +95,240 @@ get_thread_usage(time_value_t *ut, time_value_t *st)
</span> return 0;
}
<span style='display:block; white-space:pre;background:#e0ffe0;'>+/*
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Mach timebase scaling
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Many time types use "mach_time", which is a timescale based on arbitrary
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * units that can be converted to nanoseconds via a separately provided
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * scale factor. Nothing in the Apple documentation of this function
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * indicates that this scale factor should be constant, and Apple's own
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * code for these functions fetches it on every call, but in 10.12+, that
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * function itself caches the scale factor. Because it's 10.12+, it's
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * known not to be applied to PowerPC, but if the scale factor for PowerPC
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * is ever updated at all after the initial boot (which is highly unlikely),
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * it would only change it by a small amount due to thermal variations, so we
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * assume that cacheing it is safe.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * The scale factor is provided as a rational number for maximum accuracy,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * with a 32-bit numerator and a 32-bit denominator. The observed values
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * on a few systems are:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * PowerPC: 1000000000 / <frequency in Hz>
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * x86: 1 / 1
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * arm64 (M1): 125 / 3
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * In the x86 case, the true scaling happens at a lower level, with mach_time
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * always being in nanoseconds. The numbers in the arm64 case are sufficiently
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * "round" that it's clear that they're based on a nominal value, rather than a
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * measured value. Only PowerPC actually measures the frequency, with a
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * value that changes slightly (on the order of 2ppm) based on temperature.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * To actually realize the full accuracy of the rational representation, it's
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * necessary to compute nanoseconds as:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * nanoseconds = (mach_time * numerator) / denominator
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * However, with some scale-factor values (e.g., PowerPC), the intermediate
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * result easily overflows 64 bits. Overflow can be avoided by using:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * nanoseconds = mach_time * (numerator / denominator)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * But this results in a significant inaccuracy in the PowerPC case.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Apple's code takes the former approach, but it's only present in OS
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * versions that don't support PowerPC, and neither the x86 nor the arm64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * values are overflow-prone.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * The only way to get maximum accuracy while avoiding overflow is to use
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * double-precision arithmetic (or floating point, but that's currently being
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * avoided). The conceptually straightforward approach would be to do
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * the same multiply-first calculation as above, but with double-precision
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * multiply and divide. But double-precision divide is messy and slow, so
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * it's attractive to consider a multiply-only approach.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Another possibility would be to scale the upper and lower halves of the
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * mach time separately and combine the results, but this would require
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * incorporating the remainder from the upper divide into the lower divide,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * again making things messy and slow, and again suggesting the multiply-only
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * approach.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * With a 64-bit normalized multiplier and a corresponding shift (effectively
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * software floating-point), the accuracy would be extreme, but that would
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * require a variable double-precision shift in addition to the double-precision
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * multiply. If we instead use an unnormalized multiplier chosen for the
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * desired result scale, then no shifting is needed (other than some 32-bit
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * shifts that aren't really shifts). With the actual observed scale factors,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * the maximum error from this approach is on the order of a couple of parts
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * per trillion.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * In this approach, the most convenient scaling is with a 64-bit multiplier
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * whose binary point is in the middle, i.e. a 32-bit integer part and a 32-bit
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * fractional part. Multiplying this by the 64-bit mach_time yields a 128-bit
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * product whose middle 64 bits are the result in nanoseconds. This scale
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * factor is easily computed as:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * scale = (numerator << 32) / denominator
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * or, with rounding:
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * scale = ((numerator << 32) + denominator / 2) / denominator
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * On x86, the scale becomes 1.0, which we check for when used to avoid the
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * superfluous multiply.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Although a fully-normalized multipler is inconvenient, we can improve the
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * accuracy some with a small and simple tweak. Since the maximum numerator
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * fits in 30 bits, we can shift it left an additional two bits when computing
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * the multiplier, producing a final nanosecond result shifted by two bits.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * To get the full 64-bit nanosecond result, we'd need to right shift the high
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * 96 bits of the 128-bit product by two bits. But since it takes over 143
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * years of uptime to overflow 62 bits, we can skip the upper part and just
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * right shift the middle 64 bits of the product.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Out of maximum paranoia, we check for the case where the numerator doesn't
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * fit in 30 bits, and apply the left shift *after* the divide, to get the
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * expected scale. We don't expect this code to be reached in practice.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * For the clock_gettime() case, the ultimate result is a timespec, with
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * separate seconds and nanoseconds. The straightforward mutiply-only
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * approach doesn't work out so well in this case, either in range or in
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * error magnitude, so we just compute nanoseconds as in the former case,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * and then divide to get seconds. To get the nanosecond remainder, we
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * multiply back and subtract, which is faster than using the modulo operator,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * and none of the *div() functions provdes the needed mixed-precision
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * operation needed here.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * The other use of mach_time scaling is for clock_getres(), where the
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * scale factor actually represents the resolution of all clocks based on
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * mach time. This function isn't time-critical, but for consistency
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * we just use the same flow as the other cases, with cacheing.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * The primary cached scale factors in all cases are the derived factors,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * not the OS-provided mach scale. But for maximum consistency, we also
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * share a single cached copy of the mach scale across all uses.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Apple's code has a somewhat convoluted structure in order to do the
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * mach scaling setup prior to obtaining the mach time value, presumably
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * to ensure that the time obtained is as close as possible to the function's
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * return, even though the scale factor is cached. It also avoids obtaining
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * the scale factor for clocks that don't need it, in spite of the cacheing.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Here we just always do the setup first, regardless of clock type, but
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * defer the reporting of any related error until the need is known.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+#define EXTRA_SHIFT 2
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+#define HIGH_SHIFT (32 + EXTRA_SHIFT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+#define HIGH_BITS (64 - HIGH_SHIFT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+#define NUMERATOR_MASK (~0U << HIGH_BITS)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+#define NULL_SCALE (1ULL << HIGH_SHIFT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* The cached mach_time scale factors */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static mach_timebase_info_data_t mach_scale = {0};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static uint64_t mach_mult = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static struct timespec res_mach = {0, 0};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* And the fixed microsecond resolution for timeval-based clocks */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static struct timespec res_micros = {0, 1000};
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* Obtain the mach_time scale factor if needed, or return an error */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+get_mach_scale(void)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (mach_scale.numer) return 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (mach_timebase_info(&mach_scale) != KERN_SUCCESS) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* On failure, make sure resulting scale is 0 */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mach_scale.numer = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mach_scale.denom = 1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return -1;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* Set up the mach->nanoseconds multiplier, or return an error */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static int
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+setup_mach_mult(void)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ret = get_mach_scale();
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Set up main multiplier (0 if error getting scale) */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (!(mach_scale.numer & NUMERATOR_MASK)) {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mach_mult = (((uint64_t) mach_scale.numer << HIGH_SHIFT)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ + mach_scale.denom / 2) / mach_scale.denom;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ } else {
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mach_mult = ((((uint64_t) mach_scale.numer << 32)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ + mach_scale.denom / 2) / mach_scale.denom) << EXTRA_SHIFT;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ }
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Also set up resolution as nanos/count rounded up */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ res_mach.tv_nsec = (mach_mult + (NULL_SCALE - 1)) >> HIGH_SHIFT;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return ret;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+#define MASK64LOW 0xFFFFFFFFULL
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/*
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * 64x64->128 multiply, returning middle 64
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * This code has been verified with a floating-zeroes/ones test, comparing
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * the results to Python's built-in multiprecision arithmetic.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static inline uint64_t
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+mmul64(uint64_t a, uint64_t b)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Split the operands into halves */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ uint32_t a_hi = a >> 32, a_lo = a;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ uint32_t b_hi = b >> 32, b_lo = b;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ uint64_t high, mid1, mid2, low;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Compute the four cross products */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ low = (uint64_t) a_lo * b_lo;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mid1 = (uint64_t) a_lo * b_hi;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mid2 = (uint64_t) a_hi * b_lo;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ high = (uint64_t) a_hi * b_hi;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Fold the results (must be in carry-propagation order) */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mid1 += (mid2 & MASK64LOW) + (low >> 32);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ high += (mid1 >> 32) + (mid2 >> 32); /* Shifts must precede add */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Combine and return the two middle chunks */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return (high << 32) + (mid1 & MASK64LOW);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* Convert mach units to nanoseconds */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static inline uint64_t
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+mach2nanos(uint64_t mach_time)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* If 1:1 scaling (x86), return as is */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (mach_mult == NULL_SCALE) return mach_time;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Otherwise, return appropriately scaled value */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return mmul64(mach_time, mach_mult) >> EXTRA_SHIFT;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* Convert nanoseconds to timespec */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static inline void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+nanos2timespec(uint64_t nanos, struct timespec *ts)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ uint64_t secs;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ uint32_t lownanos, lowsecs, nanorem;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Divide nanoseconds to get seconds */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ secs = nanos / BILLION32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /*
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * Multiply & subtract (all 32-bit) to get nanosecond remainder.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * This is more efficient than using the '%' operator on all platforms,
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * and there's no version of *div() for a 64-bit dividend and 32-bit
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * divisor. Since the divisor, and hence the remainder, are known to
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ * fit in 32 bits, the entire computation can be done in 32 bits.
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ lownanos = nanos; lowsecs = secs;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ nanorem = lownanos - lowsecs * BILLION32;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Return values as a timespec */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ ts->tv_sec = secs; ts->tv_nsec = nanorem;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* Convert mach units to timespec */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+static inline void
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+mach2timespec(uint64_t mach_time, struct timespec *ts)
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+{
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ nanos2timespec(mach2nanos(mach_time), ts);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+}
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+/* Now the actual public functions */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span> uint64_t
clock_gettime_nsec_np(clockid_t clk_id)
{
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -100,7 +336,9 @@ clock_gettime_nsec_np(clockid_t clk_id)
</span> struct timeval tod, bt;
struct rusage ru;
time_value_t ut, st;
<span style='display:block; white-space:pre;background:#ffe0e0;'>- static mach_timebase_info_data_t tbinfo;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Set up mach scaling early, whether we need it or not. */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (!mach_mult) setup_mach_mult();
</span>
switch (clk_id) {
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -136,30 +374,21 @@ clock_gettime_nsec_np(clockid_t clk_id)
</span> return 0;
}
<span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Obtain and cache mach_time scale factor (as a rational) */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (!tbinfo.numer || !tbinfo.denom) {
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (mach_timebase_info(&tbinfo)) return 0;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- }
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>-
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Scale mach_time to nanoseconds and return it */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>-
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Note that 1/1 is a common case worth special-casing */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (tbinfo.numer == tbinfo.denom) return mach_time;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>-
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Temporary low-accuracy conversion */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Multiplying first overflows on some old platforms */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- return mach_time * (tbinfo.numer / tbinfo.denom);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Return scaled mach_time (0 if scale unobtained) */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return mach2nanos(mach_time);
</span> }
int
clock_gettime(clockid_t clk_id, struct timespec *ts)
{
<span style='display:block; white-space:pre;background:#ffe0e0;'>- int ret;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- uint64_t mach_time;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int ret, mserr = 0;
</span> struct timeval tod, bt;
struct rusage ru;
time_value_t ut, st;
<span style='display:block; white-space:pre;background:#ffe0e0;'>- static mach_timebase_info_data_t tbinfo;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ uint64_t mach_time;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Set up mach scaling early, whether we need it or not. */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (!mach_mult) mserr = setup_mach_mult();
</span>
switch (clk_id) {
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -203,31 +432,18 @@ clock_gettime(clockid_t clk_id, struct timespec *ts)
</span> return -1;
}
<span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Obtain and cache mach_time scale factor (as a rational) */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (!tbinfo.numer || !tbinfo.denom) {
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (mach_timebase_info(&tbinfo)) return -1;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- }
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>-
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Scale mach_time to nanoseconds and return it as a timespec */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>-
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Note that 1/1 is a common case worth special-casing */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (tbinfo.numer != tbinfo.denom) {
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Temporary low-accuracy conversion */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Multiplying first overflows on some old platforms */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- mach_time *= tbinfo.numer / tbinfo.denom;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- }
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- ts->tv_sec = mach_time / BILLION32;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- ts->tv_nsec = mach_time % BILLION32;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- return 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Convert to timespec & return (error if scale couldn't be obtained) */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ mach2timespec(mach_time, ts);
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return mserr;
</span> }
int
clock_getres(clockid_t clk_id, struct timespec *res)
{
<span style='display:block; white-space:pre;background:#ffe0e0;'>- static mach_timebase_info_data_t tbinfo;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ int mserr = 0;
</span>
<span style='display:block; white-space:pre;background:#ffe0e0;'>- /* All results are less than one second. */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- res->tv_sec = 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Set up mach scale factor, whether we need it or not. */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ if (!res_mach.tv_nsec) mserr = setup_mach_mult();
</span>
switch (clk_id) {
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -236,10 +452,10 @@ clock_getres(clockid_t clk_id, struct timespec *res)
</span> case CLOCK_MONOTONIC:
case CLOCK_PROCESS_CPUTIME_ID:
case CLOCK_THREAD_CPUTIME_ID:
<span style='display:block; white-space:pre;background:#ffe0e0;'>- res->tv_nsec = 1000;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *res = res_micros;
</span> return 0;
<span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Everything based on mach_time has scale-dependent resolution. */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Everything based on mach_time has mach resolution. */
</span> case CLOCK_MONOTONIC_RAW:
case CLOCK_MONOTONIC_RAW_APPROX:
case CLOCK_UPTIME_RAW:
<span style='display:block; white-space:pre;background:#e0e0e0;'>@@ -251,13 +467,9 @@ clock_getres(clockid_t clk_id, struct timespec *res)
</span> return -1;
}
<span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Obtain and cache mach_time scale factor (as a rational) */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (!tbinfo.numer || !tbinfo.denom) {
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- if (mach_timebase_info(&tbinfo)) return -1;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- }
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- /* Compute nanoseconds per unit, rounding up */
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- res->tv_nsec = (tbinfo.numer + tbinfo.denom - 1) / tbinfo.denom;
</span><span style='display:block; white-space:pre;background:#ffe0e0;'>- return 0;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ /* Return proper scale (error if scale couldn't be obtained */
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ *res = res_mach;
</span><span style='display:block; white-space:pre;background:#e0ffe0;'>+ return mserr;
</span> }
int
</pre><pre style='margin:0'>
</pre>