hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit ca6549e62bafd818aeb242c7b85e97344769185d
parent a70840843b277c91e27f11d1ea0bad7dd50cbc9b
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date:   Sun, 22 Aug 2021 17:06:07 +0200

Add trig functions, uint functions and various other math functions

* Added uint functions
* Added isclose() and friends
* Added round() and friends
* Added trig functions such as sin(), sinh(), asin(), asinh() etc.
* Added log1(), log2(), log1p()
* Added modf64()
* Renamed old modf() to modfrac(), to avoid confusion with fmod(), which is
  named modf64()
* Added lots of new tests, based on Go's all_test.go
* Removed some trailing whitespace from some strconv:: functions

Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>

Diffstat:
Mmath/floats.ha | 134+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Mmath/math.ha | 746++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Amath/testdata.ha | 440+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amath/trig.ha | 1019+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amath/uints.ha | 543+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 2+-
Mstdlib.mk | 10++++++++--
Mstrconv/ftos.ha | 6+++---
Mstrconv/stof.ha | 4++--
9 files changed, 2700 insertions(+), 204 deletions(-)

diff --git a/math/floats.ha b/math/floats.ha @@ -99,6 +99,9 @@ def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111; def F32_EXP_ZERO: u32 = ((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32); +// The bits that represent the number 1f64. +def F64_ONE: u64 = 0x3FF0000000000000; + // Contains information about the structure of a specific floating point number // type. export type floatinfo = struct { @@ -176,10 +179,10 @@ export fn isneginf(n: f64) bool = { assert(!isinf(NAN)); assert(!isinf(1.23)); assert(!isinf(-1.23f32)); - assert(isposinf(math::INF)); - assert(!isposinf(-math::INF)); - assert(isneginf(-math::INF)); - assert(!isneginf(math::INF)); + assert(isposinf(INF)); + assert(!isposinf(-INF)); + assert(isneginf(-INF)); + assert(!isneginf(INF)); }; // Returns true if the given floating-point number is normal. @@ -276,11 +279,17 @@ export fn issubnormalf32(n: f32) bool = { // Returns the absolute value of f64 n. export fn absf64(n: f64) f64 = { + if (isnan(n)) { + return n; + }; return f64frombits(f64bits(n) & ~F64_SIGN_MASK); }; // Returns the absolute value of f32 n. export fn absf32(n: f32) f32 = { + if (isnan(n)) { + return n; + }; return f32frombits(f32bits(n) & ~F32_SIGN_MASK); }; @@ -293,11 +302,14 @@ export fn absf(n: types::floating) f64 = { }; @test fn absf() void = { - assert(absf64(2.0f64) == 2.0f64); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(absf(TEST_INPUTS[idx]) == TEST_ABSF[idx]); + }; + assert(absf64(2f64) == 2f64); assert(absf32(2.0f32) == 2.0f32); - assert(absf(2.0f64) == 2.0f64); - assert(absf(2.0f32) == 2.0f64); - assert(absf(-2.0f64) == 2.0f64); + assert(absf(2f64) == 2f64); + assert(absf(2.0f32) == 2f64); + assert(absf(-2f64) == 2f64); assert(absf(-2.0f32) == 2.0f32); assert(absf(0f32) == 0f32); assert(absf(0f64) == 0f64); @@ -333,16 +345,19 @@ export fn signf(x: types::floating) i64 = { }; @test fn signf() void = { - assert(signf(0.0f64) == 1i64); - assert(signf(-0.0f64) == -1i64); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(signf(TEST_INPUTS[idx]) == TEST_SIGNF[idx]); + }; + assert(signf(0f64) == 1i64); + assert(signf(-0f64) == -1i64); assert(signf(0.0f32) == 1i64); assert(signf(-0.0f32) == -1i64); assert(signf(1.5f64) == 1i64); assert(signf(-1.5f64) == -1i64); - assert(ispositive(1.0f64)); - assert(!ispositive(-1.0f64)); - assert(isnegative(-1.0f64)); - assert(!isnegative(1.0f64)); + assert(ispositive(1f64)); + assert(!ispositive(-1f64)); + assert(isnegative(-1f64)); + assert(!isnegative(1f64)); }; // Returns whether or not x is positive. @@ -394,16 +409,16 @@ export fn copysign(x: types::floating, y: types::floating) f64 = { }; @test fn copysign() void = { - assert(copysign(100.0f64, 1.0f64) == 100.0f64); - assert(copysign(100.0f64, -1.0f64) == -100.0f64); + assert(copysign(100f64, 1f64) == 100f64); + assert(copysign(100f64, -1f64) == -100f64); assert(copysign(100.0f32, 1.0f32) == 100.0f32); assert(copysign(100.0f32, -1.0f32) == -100.0f32); - assert(copysign(100.0f64, 0.0f64) == 100.0f64); - assert(copysign(100.0f64, -0.0f64) == -100.0f64); - assert(copysign(0.0f64, 100.0f64) == 0.0f64); - assert(signf(copysign(0.0f64, 100.0f64)) > 0); - assert(copysign(0.0f64, -100.0f64) == 0.0f64); - assert(signf(copysign(0.0f64, -100.0f64)) < 0); + assert(copysign(100f64, 0f64) == 100f64); + assert(copysign(100f64, -0f64) == -100f64); + assert(copysign(0f64, 100f64) == 0f64); + assert(signf(copysign(0f64, 100f64)) > 0); + assert(copysign(0f64, -100f64) == 0f64); + assert(signf(copysign(0f64, -100f64)) < 0); }; // Takes a potentially subnormal f64 n and returns a normal f64 normal_float @@ -484,7 +499,13 @@ export fn frexp(n: types::floating) (f64, i64) = { }; @test fn frexp() void = { - let res = frexp(3.0f64); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + let res = frexp(TEST_INPUTS[idx]); + let expected = TEST_FREXP[idx]; + assert(res.0 == expected.0); + assert(res.1 == expected.1); + }; + let res = frexp(3f64); assert(res.0 == 0.75f64); assert(res.1 == 2i64); res = frexp(2.42f64); @@ -511,21 +532,21 @@ export fn ldexpf64(mantissa: f64, exp: i64) f64 = { let res_exp = exp + normalization_exp + mantissa_exp; // Underflow if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) { - return copysign(0.0f64, mantissa); + return copysign(0f64, mantissa); }; // Overflow if (res_exp > (F64_EXPONENT_BIAS: i64)) { - if (mantissa < 0.0f64) { + if (mantissa < 0f64) { return -INF; } else { return INF; }; }; // Subnormal - let subnormal_factor = 1.0f64; + let subnormal_factor = 1f64; if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) { res_exp += (F64_MANTISSA_BITS: i64) - 1; - subnormal_factor = 1.0f64 / + subnormal_factor = 1f64 / ((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64); }; const res: u64 = (bits & F64_EXP_REMOVAL_MASK) | @@ -586,11 +607,11 @@ export fn ldexpf32(mantissa: f32, exp: i64) f32 = { const res64 = ldexpf64(parts.0, parts.1); assert(res64 == tests64[i]); }; - assert(ldexpf64(1.0f64, -1076i64) == 0.0f64); - assert(ldexpf64(-1.0f64, -1076i64) == -0.0f64); - assert(signf(ldexpf64(-1.0f64, -1076i64)) < 0); - assert(ldexpf64(2.0f64, 1024i64) == INF); - assert(ldexpf64(-2.0f64, 1024i64) == -INF); + assert(ldexpf64(1f64, -1076i64) == 0f64); + assert(ldexpf64(-1f64, -1076i64) == -0f64); + assert(signf(ldexpf64(-1f64, -1076i64)) < 0); + assert(ldexpf64(2f64, 1024i64) == INF); + assert(ldexpf64(-2f64, 1024i64) == -INF); const tests32: [_]f32 = [INF, -INF, 0.0, 1.0, -1.0, 2.42, 123456789.0, @@ -609,13 +630,13 @@ export fn ldexpf32(mantissa: f32, exp: i64) f32 = { }; // Returns the integer and fractional parts of an f64. -export fn modf64(n: f64) (i64, f64) = { - if (n < 1.0f64) { - if (n < 0.0f64) { - let positive_parts = modf64(-n); +export fn modfracf64(n: f64) (i64, f64) = { + if (n < 1f64) { + if (n < 0f64) { + let positive_parts = modfracf64(-n); return (-positive_parts.0, -positive_parts.1); }; - if (n == 0.0f64) { + if (n == 0f64) { return ((n: i64), n); }; return (0i64, n); @@ -636,10 +657,10 @@ export fn modf64(n: f64) (i64, f64) = { }; // Returns the integer and fractional parts of an f32. -export fn modf32(n: f32) (i32, f32) = { +export fn modfracf32(n: f32) (i32, f32) = { if (n < 1.0f32) { if (n < 0.0f32) { - let positive_parts = modf32(-n); + let positive_parts = modfracf32(-n); return (-positive_parts.0, -positive_parts.1); }; if (n == 0.0f32) { @@ -662,48 +683,53 @@ export fn modf32(n: f32) (i32, f32) = { return ((int_part: i32), frac_part); }; -@test fn modf() void = { +@test fn modfrac() void = { // 64 - let res = modf64(1.75f64); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + let res = modfracf64(TEST_INPUTS[idx]); + assert(res.0 == TEST_MODFRAC[idx].0); + assert(isclose(res.1, TEST_MODFRAC[idx].1)); + }; + let res = modfracf64(1.75f64); assert(res.0 == 1i64); assert(res.1 == 0.75f64); - res = modf64(0.75f64); + res = modfracf64(0.75f64); assert(res.0 == 0i64); assert(res.1 == 0.75f64); - res = modf64(-0.75f64); + res = modfracf64(-0.75f64); assert(res.0 == -0i64); assert(res.1 == -0.75f64); - res = modf64(0.0f64); + res = modfracf64(0f64); assert(res.0 == 0i64); - assert(res.1 == 0.0f64); + assert(res.1 == 0f64); assert(signf(res.1) > 0); - res = modf64(-0.0f64); + res = modfracf64(-0f64); assert(res.0 == -0i64); - assert(res.1 == -0.0f64); + assert(res.1 == -0f64); assert(signf(res.1) < 0); - res = modf64(23.50f64); + res = modfracf64(23.50f64); assert(res.0 == 23i64); assert(res.1 == 0.50f64); // 32 - let res = modf32(1.75f32); + let res = modfracf32(1.75f32); assert(res.0 == 1i32); assert(res.1 == 0.75f32); - res = modf32(0.75f32); + res = modfracf32(0.75f32); assert(res.0 == 0i32); assert(res.1 == 0.75f32); - res = modf32(-0.75f32); + res = modfracf32(-0.75f32); assert(res.0 == -0i32); assert(res.1 == -0.75f32); - res = modf32(0.0f32); + res = modfracf32(0.0f32); assert(res.0 == 0i32); assert(res.1 == 0.0f32); assert(signf(res.1) > 0); - res = modf32(-0.0f32); + res = modfracf32(-0.0f32); assert(res.0 == -0i32); assert(res.1 == -0.0f32); assert(signf(res.1) < 0); - res = modf32(23.50f32); + res = modfracf32(23.50f32); assert(res.0 == 23i32); assert(res.1 == 0.50f32); }; diff --git a/math/math.ha b/math/math.ha @@ -1,7 +1,7 @@ // Sections of the code below, in particular log() and exp(), are based on Go's // implementation, which is, in turn, based on FreeBSD's. The original C code, // as well as the respective comments and constants are from -// /usr/src/lib/msun/src/e_log.c. +// /usr/src/lib/msun/src/{e_log,e_exp}.c. // // The FreeBSD copyright notice: // ==================================================== @@ -46,13 +46,24 @@ use types; +// The standard tolerance used by isclose(), which is just an arbitrary way to +// measure whether two floating-point numbers are "sufficiently" close to each +// other. +export def STANDARD_TOL: f64 = 1e-14; + // Returns whether x and y are within tol of each other. export fn eqwithinf64(x: f64, y: f64, tol: f64) bool = { + if (isnan(x) || isnan(y) || isnan(tol)) { + return false; + }; return absf64(x - y) < tol; }; // Returns whether x and y are within tol of each other. export fn eqwithinf32(x: f32, y: f32, tol: f32) bool = { + if (isnan(x) || isnan(y) || isnan(tol)) { + return false; + }; return absf32(x - y) < tol; }; @@ -65,10 +76,31 @@ export fn eqwithin(x: types::floating, y: types::floating, }; }; -@test fn equalwithin() void = { - assert(eqwithin(1.0f64, 2.0f64, 2.0f64)); +// Returns whether x and y are within [[STANDARD_TOL]] of each other. +export fn isclosef64(x: f64, y: f64) bool = { + return eqwithinf64(x, y, STANDARD_TOL); +}; + +// Returns whether x and y are within [[STANDARD_TOL]] of each other. +export fn isclosef32(x: f32, y: f32) bool = { + return eqwithinf32(x, y, STANDARD_TOL); +}; + +// Returns whether x and y are within [[STANDARD_TOL]] of each other. +export fn isclose(x: types::floating, y: types::floating) bool = { + return match (x) { + n: f64 => isclosef64(n, y as f64), + n: f32 => isclosef32(n, y as f32), + }; +}; + +@test fn eqwithin() void = { + assert(eqwithin(1f64, 2f64, 2f64)); assert(eqwithin(1.0f32, 2.0f32, 2.0f32)); assert(!eqwithin(1.0005f32, 1.0004f32, 0.00001f32)); + assert(isclose(1f64, 1.0000000000000000000000000001f64)); + assert(isclose(1.0f32, 1.0000000000000000000000000001f32)); + assert(!isclose(1.0005f32, 1.0004f32)); }; // e - https://oeis.org/A001113 @@ -90,11 +122,11 @@ def LN_2: f64 = 0.69314718055994530941723212145817656807550013436025525412068000 def LN2_HI: f64 = 6.93147180369123816490e-01; def LN2_LO: f64 = 1.90821492927058770002e-10; // log_{2}(e) -def LOG2_E: f64 = 1.0f64 / LN_2; +def LOG2_E: f64 = 1f64 / LN_2; // ln(10) - https://oeis.org/A002392 def LN_10: f64 = 2.30258509299404568401799145468436420760110148862877297603332790; // log_{10}(e) -def LOG10_E: f64 = 1.0f64 / LN_10; +def LOG10_E: f64 = 1f64 / LN_10; // __ieee754_log(x) // Return the logarithm of x @@ -146,20 +178,20 @@ def LOG10_E: f64 = 1.0f64 / LN_10; // Returns the natural logarithm of x. export fn logf64(x: f64) f64 = { - const L1 = 6.666666666666735130e-01; // 3FE55555 55555593 - const L2 = 3.999999999940941908e-01; // 3FD99999 9997FA04 - const L3 = 2.857142874366239149e-01; // 3FD24924 94229359 - const L4 = 2.222219843214978396e-01; // 3FCC71C5 1D8E78AF - const L5 = 1.818357216161805012e-01; // 3FC74664 96CB03DE - const L6 = 1.531383769920937332e-01; // 3FC39A09 D078C69F - const L7 = 1.479819860511658591e-01; // 3FC2F112 DF3E5244 + const L1 = 6.666666666666735130e-01; // 3fe55555 55555593 + const L2 = 3.999999999940941908e-01; // 3fd99999 9997fa04 + const L3 = 2.857142874366239149e-01; // 3fd24924 94229359 + const L4 = 2.222219843214978396e-01; // 3fcc71c5 1d8e78af + const L5 = 1.818357216161805012e-01; // 3fc74664 96cb03de + const L6 = 1.531383769920937332e-01; // 3fc39a09 d078c69f + const L7 = 1.479819860511658591e-01; // 3fc2f112 df3e5244 // special cases if (isnan(x) || isposinf(x)) { return x; - } else if (x < 0.0f64) { + } else if (x < 0f64) { return NAN; - } else if (x == 0.0f64) { + } else if (x == 0f64) { return -INF; }; @@ -167,15 +199,15 @@ export fn logf64(x: f64) f64 = { const f1_and_ki = frexp(x); let f1 = f1_and_ki.0; let ki = f1_and_ki.1; - if (f1 < (SQRT_2 / 2.0f64)) { - f1 *= 2.0f64; + if (f1 < (SQRT_2 / 2f64)) { + f1 *= 2f64; ki -= 1i64; }; - let f = f1 - 1.0f64; + let f = f1 - 1f64; let k = (ki: f64); // Compute - const s = f / (2.0f64 + f); + const s = f / (2f64 + f); const s2 = s * s; const s4 = s2 * s2; const t1 = s2 * (L1 + s4 * (L3 + s4 * (L5 + s4 * L7))); @@ -186,14 +218,245 @@ export fn logf64(x: f64) f64 = { }; @test fn logf64() void = { - assert(logf64(E) == 1.0f64); - assert(logf64(54.598150033144239078110261202860878402790f64) == 4.0f64); - assert(isnan(logf64(-1.0f64))); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(isclose( + logf64(absf64(TEST_INPUTS[idx])), + TEST_LOG[idx])); + }; + assert(logf64(E) == 1f64); + assert(logf64(54.598150033144239078110261202860878402790f64) == 4f64); + assert(isnan(logf64(-1f64))); assert(isposinf(logf64(INF))); - assert(isneginf(logf64(0.0f64))); + assert(isneginf(logf64(0f64))); assert(isnan(logf64(NAN))); }; +// Returns the decimal logarithm of x. +export fn log10f64(x: f64) f64 = { + return logf64(x) * (1f64 / LN_10); +}; + +@test fn log10f64() void = { + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(isclose( + log10f64(absf64(TEST_INPUTS[idx])), + TEST_LOG10[idx])); + }; +}; + +// Returns the binary logarithm of x. +export fn log2f64(x: f64) f64 = { + const frexp_res = frexpf64(x); + let frac = frexp_res.0; + let exp = frexp_res.1; + // Make sure exact powers of two give an exact answer. + // Don't depend on log(0.5) * (1 / LN_2) + exp being exactly exp - 1. + if (frac == 0.5f64) { + return ((exp - 1): f64); + }; + return logf64(frac) * (1f64 / LN_2) + (exp: f64); +}; + +@test fn log2f64() void = { + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(isclose( + log2f64(absf64(TEST_INPUTS[idx])), + TEST_LOG2[idx])); + }; +}; + +// double log1p(double x) +// +// Method : +// 1. Argument Reduction: find k and f such that +// 1+x = 2**k * (1+f), +// where sqrt(2)/2 < 1+f < sqrt(2) . +// +// Note. If k=0, then f=x is exact. However, if k!=0, then f +// may not be representable exactly. In that case, a correction +// term is need. Let u=1+x rounded. Let c = (1+x)-u, then +// log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), +// and add back the correction term c/u. +// (Note: when x > 2**53, one can simply return log(x)) +// +// 2. Approximation of log1p(f). +// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) +// = 2s + 2/3 s**3 + 2/5 s**5 + ....., +// = 2s + s*R +// We use a special Reme algorithm on [0,0.1716] to generate +// a polynomial of degree 14 to approximate R The maximum error +// of this polynomial approximation is bounded by 2**-58.45. In +// other words, +// 2 4 6 8 10 12 14 +// R(z) ~ LP1*s +LP2*s +LP3*s +LP4*s +LP5*s +LP6*s +LP7*s +// (the values of LP1 to LP7 are listed in the program) +// and +// | 2 14 | -58.45 +// | LP1*s +...+LP7*s - R(z) | <= 2 +// | | +// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. +// In order to guarantee error in log below 1ulp, we compute log +// by +// log1p(f) = f - (hfsq - s*(hfsq+R)). +// +// 3. Finally, log1p(x) = k*ln2 + log1p(f). +// = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) +// Here ln2 is split into two floating point number: +// ln2_hi + ln2_lo, +// where n*ln2_hi is always exact for |n| < 2000. +// +// Special cases: +// log1p(x) is NaN with signal if x < -1 (including -INF) ; +// log1p(+INF) is +INF; log1p(-1) is -INF with signal; +// log1p(NaN) is that NaN with no signal. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// +// Note: Assuming log() return accurate answer, the following +// algorithm can be used to compute log1p(x) to within a few ULP: +// +// u = 1+x; +// if(u==1.0) return x ; else +// return log(u)*(x/(u-1.0)); +// +// See HP-15C Advanced Functions Handbook, p.193. + +// Returns the natural logarithm of 1 plus its argument x. +// It is more accurate than log(1 + x) when x is near zero. +export fn log1pf64(x: f64) f64 = { + // sqrt(2) - 1 + const SQRT2M1 = 4.142135623730950488017e-01; // 0x3fda827999fcef34 + // sqrt(2) / 2 - 1 + const SQRT2HALFM1 = -2.928932188134524755992e-01; // 0xbfd2bec333018866 + const SMALL = 1f64 / ((1i64 << 29): f64); // 2**-29 + const TINY = 1f64 / ((1i64 << 54): f64); // 2**-54 + const TWO53 = ((1i64 << 53): f64); // 2**53 + const LN2HI = 6.93147180369123816490e-01; // 3fe62e42fee00000 + const LN2LO = 1.90821492927058770002e-10; // 3dea39ef35793c76 + const LP1 = 6.666666666666735130e-01; // 3fe5555555555593 + const LP2 = 3.999999999940941908e-01; // 3fd999999997fa04 + const LP3 = 2.857142874366239149e-01; // 3fd2492494229359 + const LP4 = 2.222219843214978396e-01; // 3fcc71c51d8e78af + const LP5 = 1.818357216161805012e-01; // 3fc7466496cb03de + const LP6 = 1.531383769920937332e-01; // 3fc39a09d078c69f + const LP7 = 1.479819860511658591e-01; // 3fc2f112df3e5244 + + if (x < -1f64 || isnan(x)) { + return NAN; + } else if (x == -1f64) { + return -INF; + } else if (isposinf(x)) { + return INF; + }; + + const absx = absf64(x); + + let f = 0f64; + let iu = 0u64; + let k = 1i64; + if (absx < SQRT2M1) { // |x| < Sqrt(2)-1 + if (absx < SMALL) { // |x| < 2**-29 + if (absx < TINY) { // |x| < 2**-54 + return x; + }; + return x - (x * x * 0.5f64); + }; + if (x > SQRT2HALFM1) { // Sqrt(2)/2-1 < x + // (Sqrt(2)/2-1) < x < (Sqrt(2)-1) + k = 0; + f = x; + iu = 1; + }; + }; + let c = 0f64; + if (k != 0) { + let u = 0f64; + if (absx < TWO53) { // 1<<53 + u = 1.0 + x; + iu = f64bits(u); + k = (((iu >> 52) - 1023): i64); + // Correction term + if (k > 0) { + c = 1f64 - (u - x); + } else { + c = x - (u - 1f64); + }; + c /= u; + } else { + u = x; + iu = f64bits(u); + k = (((iu >> 52) - 1023): i64); + c = 0f64; + }; + iu &= 0x000fffffffffffff; + if (iu < 0x0006a09e667f3bcd) { // Mantissa of Sqrt(2) + // Normalize u + u = f64frombits(iu | 0x3ff0000000000000); + } else { + k += 1; + // Normalize u/2 + u = f64frombits(iu | 0x3fe0000000000000); + iu = (0x0010000000000000 - iu) >> 2; + }; + f = u - 1f64; // Sqrt(2)/2 < u < Sqrt(2) + }; + const hfsq = 0.5 * f * f; + let s = 0f64; + let R = 0f64; + let z = 0f64; + if (iu == 0) { // |f| < 2**-20 + if (f == 0f64) { + if (k == 0) { + return 0f64; + }; + c += (k: f64) * LN2LO; + return (k: f64) * LN2HI + c; + }; + R = hfsq * (1.0 - 0.66666666666666666 * f); // Avoid division + if (k == 0) { + return f - R; + }; + return (k: f64) * LN2HI - ((R - ((k: f64) * LN2LO + c)) - f); + }; + s = f / (2f64 + f); + z = s * s; + R = z * (LP1 + + z * (LP2 + + z * (LP3 + + z * (LP4 + + z * (LP5 + + z * (LP6 + + z * LP7)))))); + if (k == 0) { + return f - (hfsq - s * (hfsq + R)); + }; + return (k: f64) * LN2HI - + ((hfsq - (s * (hfsq + R) + ((k: f64) * LN2LO + c))) - f); +}; + +@test fn log1p() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + log1pf64(TEST_INPUTS[idx] / 100f64), + TEST_LOG1P[idx])); + }; + assert(isnan(log1pf64(-INF))); + assert(isnan(log1pf64(-PI))); + assert(log1pf64(-1f64) == -INF); + assert(log1pf64(-0f64) == -0f64); + assert(log1pf64(0f64) == 0f64); + assert(log1pf64(INF) == INF); + assert(isnan(log1pf64(NAN))); +}; + // exp(x) // Returns the exponential of x. // @@ -258,16 +521,16 @@ export fn logf64(x: f64) f64 = { // Returns e^r * 2^k where r = hi - lo and |r| <= (ln(2) / 2). export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = { - const P1 = 1.66666666666666657415e-01; // 0x3FC55555; 0x55555555 - const P2 = -2.77777777770155933842e-03; // 0xBF66C16C; 0x16BEBD93 - const P3 = 6.61375632143793436117e-05; // 0x3F11566A; 0xAF25DE2C - const P4 = -1.65339022054652515390e-06; // 0xBEBBBD41; 0xC5D26BF1 - const P5 = 4.13813679705723846039e-08; // 0x3E663769; 0x72BEA4D0 + const P1 = 1.66666666666666657415e-01; // 0x3fc55555; 0x55555555 + const P2 = -2.77777777770155933842e-03; // 0xbf66c16c; 0X16bebd9n + const P3 = 6.61375632143793436117e-05; // 0x3f11566a; 0Xaf25de2c + const P4 = -1.65339022054652515390e-06; // 0xbebbbd41; 0Xc5d26bf1 + const P5 = 4.13813679705723846039e-08; // 0x3e663769; 0X72bea4d0 let r = hi - lo; let t = r * r; let c = r - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); - let y = 1.0f64 - ((lo - (r * c) / (2.0f64 - c)) - hi); + const y = 1f64 - ((lo - (r * c) / (2f64 - c)) - hi); return ldexpf64(y, k); }; @@ -275,30 +538,29 @@ export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = { export fn expf64(x: f64) f64 = { const overflow = 7.09782712893383973096e+02; const underflow = -7.45133219101941108420e+02; - const near_zero = 1.0f64 / ((1i64 << 28i64): f64); + const near_zero = 1f64 / ((1i64 << 28i64): f64); - // Special cases if (isnan(x) || isposinf(x)) { return x; } else if (isneginf(x)) { - return 0.0f64; + return 0f64; } else if (x > overflow) { return INF; } else if (x < underflow) { - return 0.0f64; + return 0f64; } else if (-near_zero < x && x < near_zero) { - return 1.0f64 + x; + return 1f64 + x; }; // Reduce; computed as r = hi - lo for extra precision. let k = 0i64; - if (x < 0.0f64) { + if (x < 0f64) { k = (((LOG2_E * x) - 0.5): i64); - } else if (x > 0.0f64) { + } else if (x > 0f64) { k = (((LOG2_E * x) + 0.5): i64); }; - let hi = x - ((k: f64) * LN2_HI); - let lo = (k: f64) * LN2_LO; + const hi = x - ((k: f64) * LN2_HI); + const lo = (k: f64) * LN2_LO; // Compute return expmultif64(hi, lo, k); @@ -309,53 +571,58 @@ export fn exp2f64(x: f64) f64 = { const overflow = 1.0239999999999999e+03; const underflow = -1.0740e+03; - // Special cases if (isnan(x) || isposinf(x)) { return x; } else if (isneginf(x)) { - return 0.0f64; + return 0f64; } else if (x > overflow) { return INF; } else if (x < underflow) { - return 0.0f64; + return 0f64; }; // Argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2. // Computed as r = hi - lo for extra precision. let k = 0i64; - if (x > 0.0f64) { + if (x > 0f64) { k = ((x + 0.5): i64); - } else if (x < 0.0f64) { + } else if (x < 0f64) { k = ((x - 0.5): i64); }; - let t = x - (k: f64); - let hi = t * LN2_HI; - let lo = -t * LN2_LO; + const t = x - (k: f64); + const hi = t * LN2_HI; + const lo = -t * LN2_LO; // Compute return expmultif64(hi, lo, k); }; @test fn expf64() void = { - assert(expf64(1.0f64) == E); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(isclose(expf64(TEST_INPUTS[idx]), TEST_EXP[idx])); + }; + assert(expf64(1f64) == E); assert(isnan(expf64(NAN))); assert(isinf(expf64(INF))); - assert(expf64(-INF) == 0.0f64); - assert(isinf(expf64(99999.0f64))); - assert(expf64(-99999.0f64) == 0.0f64); - assert(expf64(0.5e-20) == 1.0f64); + assert(expf64(-INF) == 0f64); + assert(isinf(expf64(99999f64))); + assert(expf64(-99999f64) == 0f64); + assert(expf64(0.5e-20) == 1f64); }; @test fn exp2f64() void = { - assert(exp2f64(0.0f64) == 1.0f64); - assert(exp2f64(3.0f64) == 8.0f64); - assert(exp2f64(-2.0f64) == 0.25f64); - assert(!isinf(exp2f64(256.0f64))); - assert(isinf(exp2f64(99999.0f64))); - assert(exp2f64(-99999.0f64) == 0.0f64); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(isclose(exp2f64(TEST_INPUTS[idx]), TEST_EXP2[idx])); + }; + assert(exp2f64(0f64) == 1f64); + assert(exp2f64(3f64) == 8f64); + assert(exp2f64(-2f64) == 0.25f64); + assert(!isinf(exp2f64(256f64))); + assert(isinf(exp2f64(99999f64))); + assert(exp2f64(-99999f64) == 0f64); assert(isnan(exp2f64(NAN))); assert(isinf(exp2f64(INF))); - assert(exp2f64(-INF) == 0.0f64); + assert(exp2f64(-INF) == 0f64); }; // __ieee754_sqrt(x) @@ -420,12 +687,11 @@ export fn exp2f64(x: f64) f64 = { // Returns the square root of x. export fn sqrtf64(x: f64) f64 = { - // Special cases - if (x == 0.0f64) { + if (x == 0f64) { return x; } else if (isnan(x) || isposinf(x)) { return x; - } else if (x < 0.0f64) { + } else if (x < 0f64) { return NAN; }; @@ -459,7 +725,7 @@ export fn sqrtf64(x: f64) f64 = { // r = moving bit from MSB to LSB let r = ((1u64 << (F64_MANTISSA_BITS + 1u64)): u64); for (r != 0) { - let t = s + r; + const t = s + r; if (t <= bits) { s = t + r; bits -= t; @@ -482,88 +748,93 @@ export fn sqrtf64(x: f64) f64 = { }; @test fn sqrt() void = { - assert(sqrtf64(2.0f64) == SQRT_2); - assert(sqrtf64(4.0f64) == 2.0f64); - assert(sqrtf64(16.0f64) == 4.0f64); - assert(sqrtf64(65536.0f64) == 256.0f64); - assert(sqrtf64(powf64(123.0f64, 2.0f64)) == 123.0f64); - assert(sqrtf64(0.0f64) == 0.0f64); + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(isclose( + sqrtf64(absf64(TEST_INPUTS[idx])), + TEST_SQRT[idx])); + }; + assert(sqrtf64(2f64) == SQRT_2); + assert(sqrtf64(4f64) == 2f64); + assert(sqrtf64(16f64) == 4f64); + assert(sqrtf64(65536f64) == 256f64); + assert(sqrtf64(powf64(123f64, 2f64)) == 123f64); + assert(sqrtf64(0f64) == 0f64); assert(isnan(sqrtf64(NAN))); assert(isposinf(sqrtf64(INF))); - assert(isnan(sqrtf64(-2.0f64))); + assert(isnan(sqrtf64(-2f64))); }; fn is_f64_odd_int(x: f64) bool = { - let x_int_frac = modf64(x); - let x_int = x_int_frac.0; - let x_frac = x_int_frac.1; - let has_no_frac = (x_frac == 0.0f64); - let is_odd = ((x_int & 1i64) == 1i64); + const x_int_frac = modfracf64(x); + const x_int = x_int_frac.0; + const x_frac = x_int_frac.1; + const has_no_frac = (x_frac == 0f64); + const is_odd = ((x_int & 1i64) == 1i64); return has_no_frac && is_odd; }; // Returns x^p. export fn powf64(x: f64, p: f64) f64 = { - if (x == 1.0f64 || p == 0.0f64) { - return 1.0f64; - } else if (p == 1.0f64) { + if (x == 1f64 || p == 0f64) { + return 1f64; + } else if (p == 1f64) { return x; } else if (isnan(x)) { return NAN; } else if (isnan(p)) { return NAN; - } else if (x == 0.0f64) { - if (p < 0.0f64) { + } else if (x == 0f64) { + if (p < 0f64) { if (is_f64_odd_int(p)) { return copysignf64(INF, x); } else { return INF; }; - } else if (p > 0.0f64) { + } else if (p > 0f64) { if (is_f64_odd_int(p)) { return x; } else { - return 0.0f64; + return 0f64; }; }; } else if (isinf(p)) { - if (x == -1.0f64) { - return 1.0f64; - } else if ((absf64(x) < 1.0f64) == isposinf(p)) { - return 0.0f64; + if (x == -1f64) { + return 1f64; + } else if ((absf64(x) < 1f64) == isposinf(p)) { + return 0f64; }; return INF; } else if (isinf(x)) { if (isneginf(x)) { - return powf64(-0.0f64, -p); - } else if (p < 0.0f64) { - return 0.0f64; - } else if (p > 0.0f64) { + return powf64(-0f64, -p); + } else if (p < 0f64) { + return 0f64; + } else if (p > 0f64) { return INF; }; } else if (p == 0.5f64) { return sqrtf64(x); } else if (p == -0.5f64) { - return 1.0f64 / sqrtf64(x); + return 1f64 / sqrtf64(x); }; - let x_parts = frexp(x); + const x_parts = frexp(x); let x_mantissa = x_parts.0; let x_exp = x_parts.1; - let p_int_frac = modf64(absf64(p)); + const p_int_frac = modfracf64(absf64(p)); let p_int = p_int_frac.0; let p_frac = p_int_frac.1; - let res_mantissa = 1.0f64; + let res_mantissa = 1f64; let res_exp = 0i64; // The method used later in this function doesn't apply to fractional // powers, so we have to handle these separately with // x^p = e^{p * ln(x)} - if (p_frac != 0.0f64) { + if (p_frac != 0f64) { if (p_frac > 0.5f64) { - p_frac -= 1.0f64; + p_frac -= 1f64; p_int += 1i64; }; res_mantissa = expf64(p_frac * logf64(x)); @@ -575,7 +846,7 @@ export fn powf64(x: f64, p: f64) f64 = { for (let i = p_int; i != 0; i >>= 1) { // Check for over/underflow. if (x_exp <= -1i64 << (F64_EXPONENT_BITS: i64)) { - return 0.0f64; + return 0f64; }; if (x_exp >= 1i64 << (F64_EXPONENT_BITS: i64)) { return INF; @@ -594,97 +865,288 @@ export fn powf64(x: f64, p: f64) f64 = { }; }; - if (p < 0.0f64) { - res_mantissa = 1.0f64 / res_mantissa; + if (p < 0f64) { + res_mantissa = 1f64 / res_mantissa; res_exp = -res_exp; }; - let res = ldexpf64(res_mantissa, res_exp); - return res; + return ldexpf64(res_mantissa, res_exp); }; @test fn powf64() void = { + for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { + assert(eqwithin( + powf64(10f64, TEST_INPUTS[idx]), + TEST_POW[idx], + 1e-8f64)); + }; // Positive integer - assert(powf64(2.0f64, 2.0f64) == 4.0f64); - assert(powf64(3.0f64, 3.0f64) == 27.0f64); + assert(powf64(2f64, 2f64) == 4f64); + assert(powf64(3f64, 3f64) == 27f64); // Very large positive integer - assert(!isinf(powf64(2.0f64, 1020.0f64))); - assert(isinf(powf64(2.0f64, 1050.0f64))); + assert(!isinf(powf64(2f64, 1020f64))); + assert(isinf(powf64(2f64, 1050f64))); // Negative integer - assert(powf64(2.0f64, -1.0f64) == 0.5f64); - assert(powf64(2.0f64, -2.0f64) == 0.25f64); + assert(powf64(2f64, -1f64) == 0.5f64); + assert(powf64(2f64, -2f64) == 0.25f64); // Very small negative integer - assert(powf64(2.0f64, -1020.0f64) > 0.0f64); - assert(powf64(2.0f64, -1080.0f64) == 0.0f64); + assert(powf64(2f64, -1020f64) > 0f64); + assert(powf64(2f64, -1080f64) == 0f64); // Positive fractional powers - assert(eqwithin(powf64(2.0f64, 1.5f64), + assert(eqwithin(powf64(2f64, 1.5f64), 2.8284271247461900976033774f64, 1e-10f64)); - assert(eqwithin(powf64(2.0f64, 5.5f64), + assert(eqwithin(powf64(2f64, 5.5f64), 45.254833995939041561654039f64, 1e-10f64)); // Negative fractional powers - assert(eqwithin(powf64(2.0f64, -1.5f64), + assert(eqwithin(powf64(2f64, -1.5f64), 0.3535533905932737622004221f64, 1e-10f64)); - assert(eqwithin(powf64(2.0f64, -5.5f64), + assert(eqwithin(powf64(2f64, -5.5f64), 0.0220970869120796101375263f64, 1e-10f64)); // Special cases // pow(x, ±0) = 1 for any x - assert(powf64(123.0f64, 0.0f64) == 1.0f64); + assert(powf64(123f64, 0f64) == 1f64); // pow(1, y) = 1 for any y - assert(powf64(1.0f64, 123.0f64) == 1.0f64); + assert(powf64(1f64, 123f64) == 1f64); // pow(x, 1) = x for any x - assert(powf64(123.0f64, 1.0f64) == 123.0f64); + assert(powf64(123f64, 1f64) == 123f64); // pow(NaN, y) = NaN - assert(isnan(powf64(NAN, 123.0f64))); + assert(isnan(powf64(NAN, 123f64))); // pow(x, NaN) = NaN - assert(isnan(powf64(123.0f64, NAN))); + assert(isnan(powf64(123f64, NAN))); // pow(±0, y) = ±Inf for y an odd integer < 0 - assert(isposinf(powf64(0.0f64, -3.0f64))); - assert(isneginf(powf64(-0.0f64, -3.0f64))); + assert(isposinf(powf64(0f64, -3f64))); + assert(isneginf(powf64(-0f64, -3f64))); // pow(±0, -Inf) = +Inf - assert(isposinf(powf64(0.0f64, -INF))); - assert(isposinf(powf64(-0.0f64, -INF))); + assert(isposinf(powf64(0f64, -INF))); + assert(isposinf(powf64(-0f64, -INF))); // pow(±0, +Inf) = +0 - assert(powf64(0.0f64, INF) == 0.0f64); - assert(powf64(-0.0f64, INF) == 0.0f64); + assert(powf64(0f64, INF) == 0f64); + assert(powf64(-0f64, INF) == 0f64); // pow(±0, y) = +Inf for finite y < 0 and not an odd integer - assert(isposinf(powf64(0.0f64, -2.0f64))); - assert(isposinf(powf64(-0.0f64, -2.0f64))); + assert(isposinf(powf64(0f64, -2f64))); + assert(isposinf(powf64(-0f64, -2f64))); //pow(±0, y) = ±0 for y an odd integer > 0 - assert(powf64(0.0f64, 123.0f64) == 0.0f64); - let neg_zero = powf64(-0.0f64, 123.0f64); - assert(neg_zero == 0.0f64); + assert(powf64(0f64, 123f64) == 0f64); + const neg_zero = powf64(-0f64, 123f64); + assert(neg_zero == 0f64); assert(isnegative(neg_zero)); // pow(±0, y) = +0 for finite y > 0 and not an odd integer - assert(powf64(0.0f64, 8.0f64) == 0.0f64); + assert(powf64(0f64, 8f64) == 0f64); // pow(-1, ±Inf) = 1 - assert(powf64(-1.0f64, INF) == 1.0f64); - assert(powf64(-1.0f64, -INF) == 1.0f64); + assert(powf64(-1f64, INF) == 1f64); + assert(powf64(-1f64, -INF) == 1f64); // pow(x, +Inf) = +Inf for |x| > 1 - assert(isposinf(powf64(123.0f64, INF))); + assert(isposinf(powf64(123f64, INF))); // pow(x, -Inf) = +0 for |x| > 1 - assert(powf64(123.0f64, -INF) == 0.0f64); + assert(powf64(123f64, -INF) == 0f64); // pow(x, +Inf) = +0 for |x| < 1 - assert(powf64(0.5f64, INF) == 0.0f64); - assert(powf64(-0.5f64, INF) == 0.0f64); + assert(powf64(0.5f64, INF) == 0f64); + assert(powf64(-0.5f64, INF) == 0f64); // pow(x, -Inf) = +Inf for |x| < 1 assert(isposinf(powf64(0.5f64, -INF))); assert(isposinf(powf64(-0.5f64, -INF))); // pow(+Inf, y) = +Inf for y > 0 - assert(isposinf(powf64(INF, 123.0f64))); + assert(isposinf(powf64(INF, 123f64))); // pow(+Inf, y) = +0 for y < 0 - assert(powf64(INF, -1.0f64) == 0.0f64); + assert(powf64(INF, -1f64) == 0f64); // pow(-Inf, y) = pow(-0, -y) - assert(powf64(-INF, 123.0f64) == powf64(-0.0f64, -123.0f64)); + assert(powf64(-INF, 123f64) == powf64(-0f64, -123f64)); // pow(x, y) = NaN for finite x < 0 and finite non-integer y - assert(isnan(powf64(-2.0f64, 1.23f64))); + assert(isnan(powf64(-2f64, 1.23f64))); // sqrt - assert(powf64(4.0f64, 0.5f64) == sqrtf64(4.0f64)); - assert(powf64(4.0f64, 0.5f64) == 2.0f64); - assert(powf64(4.0f64, -0.5f64) == (1.0f64 / sqrtf64(4.0f64))); - assert(powf64(4.0f64, -0.5f64) == (1.0f64 / 2.0f64)); + assert(powf64(4f64, 0.5f64) == sqrtf64(4f64)); + assert(powf64(4f64, 0.5f64) == 2f64); + assert(powf64(4f64, -0.5f64) == (1f64 / sqrtf64(4f64))); + assert(powf64(4f64, -0.5f64) == (1f64 / 2f64)); +}; + +// Returns the greatest integer value less than or equal to x. +export fn floorf64(x: f64) f64 = { + if (x == 0f64 || isnan(x) || isinf(x)) { + return x; + }; + if (x < 0f64) { + const modfrac_res = modfracf64(-x); + let int_part = modfrac_res.0; + const frac_part = modfrac_res.1; + if (frac_part != 0f64) { + int_part += 1; + }; + return -(int_part: f64); + }; + const modfrac_res = modfracf64(x); + return (modfrac_res.0: f64); +}; + +@test fn floor() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + floorf64(TEST_INPUTS[idx]), + TEST_FLOOR[idx])); + }; + assert(floorf64(-INF) == -INF); + assert(floorf64(-0f64) == -0f64); + assert(floorf64(0f64) == 0f64); + assert(floorf64(INF) == INF); + assert(isnan(floorf64(NAN))); +}; + +// Returns the least integer value greater than or equal to x. +export fn ceilf64(x: f64) f64 = -floorf64(-x); + +@test fn ceil() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + ceilf64(TEST_INPUTS[idx]), + TEST_CEIL[idx])); + }; + assert(ceilf64(-INF) == -INF); + assert(ceilf64(-0f64) == -0f64); + assert(ceilf64(0f64) == 0f64); + assert(ceilf64(INF) == INF); + assert(isnan(ceilf64(NAN))); +}; + +// Returns the integer value of x. +export fn truncf64(x: f64) f64 = { + if (x == 0f64 || isnan(x) || isinf(x)) { + return x; + }; + const modfrac_res = modfracf64(x); + return (modfrac_res.0: f64); +}; + +@test fn trunc() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + truncf64(TEST_INPUTS[idx]), + TEST_TRUNC[idx], + 1e-6f64)); + }; + assert(truncf64(-INF) == -INF); + assert(truncf64(-0f64) == -0f64); + assert(truncf64(0f64) == 0f64); + assert(truncf64(INF) == INF); + assert(isnan(truncf64(NAN))); +}; + +// Returns the nearest integer, rounding half away from zero. +export fn roundf64(x: f64) f64 = { + let bits = f64bits(x); + let e = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK; + if (e < F64_EXPONENT_BIAS) { + // Round abs(x) < 1 including denormals. + bits &= F64_SIGN_MASK; // +-0 + if (e == F64_EXPONENT_BIAS - 1) { + bits |= F64_ONE; // +-1 + }; + } else if (e < F64_EXPONENT_BIAS + F64_MANTISSA_BITS) { + // Round any abs(x) >= 1 containing a fractional component + // [0,1). + // Numbers with larger exponents are returned unchanged since + // they must be either an integer, infinity, or NaN. + const half = 1 << (F64_MANTISSA_BITS - 1); + e -= F64_EXPONENT_BIAS; + bits += half >> e; + bits = bits & ~(F64_MANTISSA_MASK >> e); + }; + return f64frombits(bits); +}; + +@test fn round() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + roundf64(TEST_INPUTS[idx]), + TEST_ROUND[idx])); + }; + assert(roundf64(-INF) == -INF); + assert(roundf64(-0f64) == -0f64); + assert(roundf64(0f64) == 0f64); + assert(roundf64(INF) == INF); + assert(isnan(roundf64(NAN))); +}; + +// Returns the floating-point remainder of x / y. The magnitude of the result +// is less than y and its sign agrees with that of x. +export fn modf64(x: f64, y: f64) f64 = { + if (y == 0f64) { + return NAN; + }; + if (isinf(x) || isnan(x) || isnan(y)) { + return NAN; + }; + + y = absf64(y); + + const y_frexp = frexpf64(y); + const y_frac = y_frexp.0; + const y_exp = y_frexp.1; + let r = x; + if (x < 0f64) { + r = -x; + }; + + for (r >= y) { + const r_frexp = frexpf64(r); + const r_frac = r_frexp.0; + let r_exp = r_frexp.1; + if (r_frac < y_frac) { + r_exp -= 1i64; + }; + r = r - ldexpf64(y, r_exp - y_exp); + }; + if (x < 0f64) { + r = -r; + }; + return r; +}; + +@test fn modf64() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + modf64(10f64, TEST_INPUTS[idx]), + TEST_MODF[idx])); + }; + + assert(isnan(modf64(-INF, -INF))); + assert(isnan(modf64(-INF, -PI))); + assert(isnan(modf64(-INF, 0f64))); + assert(isnan(modf64(-INF, PI))); + assert(isnan(modf64(-INF, INF))); + assert(isnan(modf64(-INF, NAN))); + assert(modf64(-PI, -INF) == -PI); + assert(isnan(modf64(-PI, 0f64))); + assert(modf64(-PI, INF) == -PI); + assert(isnan(modf64(-PI, NAN))); + assert(modf64(-0f64, -INF) == -0f64); + assert(isnan(modf64(-0f64, 0f64))); + assert(modf64(-0f64, INF) == -0f64); + assert(isnan(modf64(-0f64, NAN))); + assert(modf64(0f64, -INF) == 0f64); + assert(isnan(modf64(0f64, 0f64))); + assert(modf64(0f64, INF) == 0f64); + assert(isnan(modf64(0f64, NAN))); + assert(modf64(PI, -INF) == PI); + assert(isnan(modf64(PI, 0f64))); + assert(modf64(PI, INF) == PI); + assert(isnan(modf64(PI, NAN))); + assert(isnan(modf64(INF, -INF))); + assert(isnan(modf64(INF, -PI))); + assert(isnan(modf64(INF, 0f64))); + assert(isnan(modf64(INF, PI))); + assert(isnan(modf64(INF, INF))); + assert(isnan(modf64(INF, NAN))); + assert(isnan(modf64(NAN, -INF))); + assert(isnan(modf64(NAN, -PI))); + assert(isnan(modf64(NAN, 0f64))); + assert(isnan(modf64(NAN, PI))); + assert(isnan(modf64(NAN, INF))); + assert(isnan(modf64(NAN, NAN))); + assert(modf64(5.9790119248836734e+200, 1.1258465975523544) == + 0.6447968302508578); }; diff --git a/math/testdata.ha b/math/testdata.ha @@ -0,0 +1,440 @@ +// The test data below is based on Go's implementation, and came with the +// following note and copyright notice: +// +// The expected results below were computed by the high precision calculators +// at https://keisan.casio.com/. More exact input values (array vf[], above) +// were obtained by printing them with "%.26f". The answers were calculated +// to 26 digits (by using the "Digit number" drop-down control of each +// calculator). +// +// The Go copyright notice: +// ==================================================== +// Copyright (c) 2009 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ==================================================== + +export const TEST_INPUTS: [_]f64 = [ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + -2.7688005719200159e-01, + -5.0106036182710749e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + -8.6859247685756013e+00, +]; + +export const TEST_ACOS: [_]f64 = [ + 1.0496193546107222142571536e+00, + 6.8584012813664425171660692e-01, + 1.5984878714577160325521819e+00, + 2.0956199361475859327461799e+00, + 2.7053008467824138592616927e-01, + 1.2738121680361776018155625e+00, + 1.0205369421140629186287407e+00, + 1.2945003481781246062157835e+00, + 1.3872364345374451433846657e+00, + 2.6231510803970463967294145e+00, +]; +export const TEST_ACOSH: [_]f64 = [ + 2.4743347004159012494457618e+00, + 2.8576385344292769649802701e+00, + 7.2796961502981066190593175e-01, + 2.4796794418831451156471977e+00, + 3.0552020742306061857212962e+00, + 2.044238592688586588942468e+00, + 2.5158701513104513595766636e+00, + 1.99050839282411638174299e+00, + 1.6988625798424034227205445e+00, + 2.9611454842470387925531875e+00, +]; +export const TEST_ASIN: [_]f64 = [ + 5.2117697218417440497416805e-01, + 8.8495619865825236751471477e-01, + -02.769154466281941332086016e-02, + -5.2482360935268931351485822e-01, + 1.3002662421166552333051524e+00, + 2.9698415875871901741575922e-01, + 5.5025938468083370060258102e-01, + 2.7629597861677201301553823e-01, + 1.83559892257451475846656e-01, + -1.0523547536021497774980928e+00, +]; +export const TEST_ASINH: [_]f64 = [ + 2.3083139124923523427628243e+00, + 2.743551594301593620039021e+00, + -2.7345908534880091229413487e-01, + -2.3145157644718338650499085e+00, + 2.9613652154015058521951083e+00, + 1.7949041616585821933067568e+00, + 2.3564032905983506405561554e+00, + 1.7287118790768438878045346e+00, + 1.3626658083714826013073193e+00, + -2.8581483626513914445234004e+00, +]; +export const TEST_ATAN: [_]f64 = [ + 1.372590262129621651920085e+00, + 1.442290609645298083020664e+00, + -2.7011324359471758245192595e-01, + -1.3738077684543379452781531e+00, + 1.4673921193587666049154681e+00, + 1.2415173565870168649117764e+00, + 1.3818396865615168979966498e+00, + 1.2194305844639670701091426e+00, + 1.0696031952318783760193244e+00, + -1.4561721938838084990898679e+00, +]; +export const TEST_ATANH: [_]f64 = [ + 5.4651163712251938116878204e-01, + 1.0299474112843111224914709e+00, + -2.7695084420740135145234906e-02, + -5.5072096119207195480202529e-01, + 1.9943940993171843235906642e+00, + 3.01448604578089708203017e-01, + 5.8033427206942188834370595e-01, + 2.7987997499441511013958297e-01, + 1.8459947964298794318714228e-01, + -1.3273186910532645867272502e+00, +]; +export const TEST_CEIL: [_]f64 = [ + 5.0000000000000000e+00, + 8.0000000000000000e+00, + -0.0000000000000000e+00, + -5.0000000000000000e+00, + 1.0000000000000000e+01, + 3.0000000000000000e+00, + 6.0000000000000000e+00, + 3.0000000000000000e+00, + 2.0000000000000000e+00, + -8.0000000000000000e+00, +]; +export const TEST_COS: [_]f64 = [ + 2.634752140995199110787593e-01, + 1.148551260848219865642039e-01, + 9.6191297325640768154550453e-01, + 2.938141150061714816890637e-01, + -9.777138189897924126294461e-01, + -9.7693041344303219127199518e-01, + 4.940088096948647263961162e-01, + -9.1565869021018925545016502e-01, + -2.517729313893103197176091e-01, + -7.39241351595676573201918e-01, +]; +// Results for 100000 * PI + TEST_INPUTS[i] +export const TEST_COSLARGE: [_]f64 = [ + 2.634752141185559426744e-01, + 1.14855126055543100712e-01, + 9.61912973266488928113e-01, + 2.9381411499556122552e-01, + -9.777138189880161924641e-01, + -9.76930413445147608049e-01, + 4.940088097314976789841e-01, + -9.15658690217517835002e-01, + -2.51772931436786954751e-01, + -7.3924135157173099849e-01, +]; +export const TEST_COSH: [_]f64 = [ + 7.2668796942212842775517446e+01, + 1.1479413465659254502011135e+03, + 1.0385767908766418550935495e+00, + 7.5000957789658051428857788e+01, + 7.655246669605357888468613e+03, + 9.3567491758321272072888257e+00, + 9.331351599270605471131735e+01, + 7.6833430994624643209296404e+00, + 3.1829371625150718153881164e+00, + 2.9595059261916188501640911e+03, +]; +export const TEST_EXP: [_]f64 = [ + 1.4533071302642137507696589e+02, + 2.2958822575694449002537581e+03, + 7.5814542574851666582042306e-01, + 6.6668778421791005061482264e-03, + 1.5310493273896033740861206e+04, + 1.8659907517999328638667732e+01, + 1.8662167355098714543942057e+02, + 1.5301332413189378961665788e+01, + 6.2047063430646876349125085e+00, + 1.6894712385826521111610438e-04, +]; +export const TEST_EXP2: [_]f64 = [ + 3.1537839463286288034313104e+01, + 2.1361549283756232296144849e+02, + 8.2537402562185562902577219e-01, + 3.1021158628740294833424229e-02, + 7.9581744110252191462569661e+02, + 7.6019905892596359262696423e+00, + 3.7506882048388096973183084e+01, + 6.6250893439173561733216375e+00, + 3.5438267900243941544605339e+00, + 2.4281533133513300984289196e-03, +]; +export const TEST_ABSF: [_]f64 = [ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + 2.7688005719200159e-01, + 5.0106036182710749e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + 8.6859247685756013e+00, +]; +export const TEST_FLOOR: [_]f64 = [ + 4.0000000000000000e+00, + 7.0000000000000000e+00, + -1.0000000000000000e+00, + -6.0000000000000000e+00, + 9.0000000000000000e+00, + 2.0000000000000000e+00, + 5.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + -9.0000000000000000e+00, +]; +export const TEST_MODF: [_]f64 = [ + 4.197615023265299782906368e-02, + 2.261127525421895434476482e+00, + 3.231794108794261433104108e-02, + 4.989396381728925078391512e+00, + 3.637062928015826201999516e-01, + 1.220868282268106064236690e+00, + 4.770916568540693347699744e+00, + 1.816180268691969246219742e+00, + 8.734595415957246977711748e-01, + 1.314075231424398637614104e+00, +]; +export const TEST_FREXP: [_](f64, i64) = [ + (6.2237649061045918750e-01, 3), + (9.6735905932226306250e-01, 3), + (-5.5376011438400318000e-01, -1), + (-6.2632545228388436250e-01, 3), + (6.02268356699901081250e-01, 4), + (7.3159430981099115000e-01, 2), + (6.5363542893241332500e-01, 3), + (6.8198497760900255000e-01, 2), + (9.1265404584042750000e-01, 1), + (-5.4287029803597508250e-01, 4), +]; +export const TEST_LOG: [_]f64 = [ + 1.605231462693062999102599e+00, + 2.0462560018708770653153909e+00, + -1.2841708730962657801275038e+00, + 1.6115563905281545116286206e+00, + 2.2655365644872016636317461e+00, + 1.0737652208918379856272735e+00, + 1.6542360106073546632707956e+00, + 1.0035467127723465801264487e+00, + 6.0174879014578057187016475e-01, + 2.161703872847352815363655e+00, +]; +export const TEST_LOG10: [_]f64 = [ + 6.9714316642508290997617083e-01, + 8.886776901739320576279124e-01, + -5.5770832400658929815908236e-01, + 6.998900476822994346229723e-01, + 9.8391002850684232013281033e-01, + 4.6633031029295153334285302e-01, + 7.1842557117242328821552533e-01, + 4.3583479968917773161304553e-01, + 2.6133617905227038228626834e-01, + 9.3881606348649405716214241e-01, +]; +export const TEST_LOG2: [_]f64 = [ + 2.3158594707062190618898251e+00, + 2.9521233862883917703341018e+00, + -1.8526669502700329984917062e+00, + 2.3249844127278861543568029e+00, + 3.268478366538305087466309e+00, + 1.5491157592596970278166492e+00, + 2.3865580889631732407886495e+00, + 1.447811865817085365540347e+00, + 8.6813999540425116282815557e-01, + 3.118679457227342224364709e+00, +]; +export const TEST_LOG1P: []f64 = [ + 4.8590257759797794104158205e-02, + 7.4540265965225865330849141e-02, + -2.7726407903942672823234024e-03, + -5.1404917651627649094953380e-02, + 9.1998280672258624681335010e-02, + 2.8843762576593352865894824e-02, + 5.0969534581863707268992645e-02, + 2.6913947602193238458458594e-02, + 1.8088493239630770262045333e-02, + -9.0865245631588989681559268e-02, +]; +export const TEST_MODFRAC: [_](i64, f64) = [ + (4i64, 9.7901192488367350108546816e-01), + (7i64, 7.3887247457810456552351752e-01), + (-0i64, -2.7688005719200159404635997e-01), + (-5i64, -1.060361827107492160848778e-02), + (9i64, 6.3629370719841737980004837e-01), + (2i64, 9.2637723924396464525443662e-01), + (5i64, 2.2908343145930665230025625e-01), + (2i64, 7.2793991043601025126008608e-01), + (1i64, 8.2530809168085506044576505e-01), + (-8i64, -6.8592476857560136238589621e-01), +]; +export const TEST_POW: [_]f64 = [ + 9.5282232631648411840742957e+04, + 5.4811599352999901232411871e+07, + 5.2859121715894396531132279e-01, + 9.7587991957286474464259698e-06, + 4.328064329346044846740467e+09, + 8.4406761805034547437659092e+02, + 1.6946633276191194947742146e+05, + 5.3449040147551939075312879e+02, + 6.688182138451414936380374e+01, + 2.0609869004248742886827439e-09, +]; +export const TEST_ROUND: [_]f64 = [ + 5.0f64, + 8.0f64, + -0.0f64, + -5.0f64, + 10.0f64, + 3.0f64, + 5.0f64, + 3.0f64, + 2.0f64, + -9.0f64, +]; +export const TEST_SIGNF: [_]i8 = [ + 1, + 1, + -1, + -1, + 1, + 1, + 1, + 1, + 1, + -1, +]; +export const TEST_SIN: [_]f64 = [ + -9.6466616586009283766724726e-01, + 9.9338225271646545763467022e-01, + -2.7335587039794393342449301e-01, + 9.5586257685042792878173752e-01, + -2.099421066779969164496634e-01, + 2.135578780799860532750616e-01, + -8.694568971167362743327708e-01, + 4.019566681155577786649878e-01, + 9.6778633541687993721617774e-01, + -6.734405869050344734943028e-01, +]; +// Results for 100000 * PI + TEST_INPUTS[i] +export const TEST_SINLARGE: [_]f64 = [ + -9.646661658548936063912e-01, + 9.933822527198506903752e-01, + -2.7335587036246899796e-01, + 9.55862576853689321268e-01, + -2.099421066862688873691e-01, + 2.13557878070308981163e-01, + -8.694568970959221300497e-01, + 4.01956668098863248917e-01, + 9.67786335404528727927e-01, + -6.7344058693131973066e-01, +]; +export const TEST_SINH: [_]f64 = [ + 7.2661916084208532301448439e+01, + 1.1479409110035194500526446e+03, + -2.8043136512812518927312641e-01, + -7.499429091181587232835164e+01, + 7.6552466042906758523925934e+03, + 9.3031583421672014313789064e+00, + 9.330815755828109072810322e+01, + 7.6179893137269146407361477e+00, + 3.021769180549615819524392e+00, + -2.95950575724449499189888e+03, +]; +export const TEST_SQRT: [_]f64 = [ + 2.2313699659365484748756904e+00, + 2.7818829009464263511285458e+00, + 5.2619393496314796848143251e-01, + 2.2384377628763938724244104e+00, + 3.1042380236055381099288487e+00, + 1.7106657298385224403917771e+00, + 2.286718922705479046148059e+00, + 1.6516476350711159636222979e+00, + 1.3510396336454586262419247e+00, + 2.9471892997524949215723329e+00, +]; +export const TEST_TAN: [_]f64 = [ + -3.661316565040227801781974e+00, + 8.64900232648597589369854e+00, + -2.8417941955033612725238097e-01, + 3.253290185974728640827156e+00, + 2.147275640380293804770778e-01, + -2.18600910711067004921551e-01, + -1.760002817872367935518928e+00, + -4.389808914752818126249079e-01, + -3.843885560201130679995041e+00, + 9.10988793377685105753416e-01, +]; +// Results for 100000 * PI + TEST_INPUTS[i] +export const TEST_TANLARGE: [_]f64 = [ + -3.66131656475596512705e+00, + 8.6490023287202547927e+00, + -2.841794195104782406e-01, + 3.2532901861033120983e+00, + 2.14727564046880001365e-01, + -2.18600910700688062874e-01, + -1.760002817699722747043e+00, + -4.38980891453536115952e-01, + -3.84388555942723509071e+00, + 9.1098879344275101051e-01, +]; +export const TEST_TANH: [_]f64 = [ + 9.9990531206936338549262119e-01, + 9.9999962057085294197613294e-01, + -2.7001505097318677233756845e-01, + -9.9991110943061718603541401e-01, + 9.9999999146798465745022007e-01, + 9.9427249436125236705001048e-01, + 9.9994257600983138572705076e-01, + 9.9149409509772875982054701e-01, + 9.4936501296239685514466577e-01, + -9.9999994291374030946055701e-01, +]; +export const TEST_TRUNC: [_]f64 = [ + 4.0000000000000000e+00, + 7.0000000000000000e+00, + -0.0000000000000000e+00, + -5.0000000000000000e+00, + 9.0000000000000000e+00, + 2.0000000000000000e+00, + 5.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + -8.0000000000000000e+00, +]; diff --git a/math/trig.ha b/math/trig.ha @@ -0,0 +1,1019 @@ +// Sections of the code below are based on Go's implementation, which is, in +// turn, based on: +// * the Cephes Mathematical Library (cephes/cmath/{sin,..}), available from +// http://www.netlib.org/cephes/cmath.tgz. +// * FreeBSD's /usr/src/lib/msun/src/{s_asinh.c,...} +// The original C code, as well as the respective comments and constants are +// from these libraries. +// +// The Cephes copyright notice: +// ==================================================== +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov +// ==================================================== +// +// The FreeBSD copyright notice: +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// The Go copyright notice: +// ==================================================== +// Copyright (c) 2009 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ==================================================== + +// sin coefficients +const SIN_CF: [_]f64 = [ + 1.58962301576546568060e-10, // 0x3de5d8fd1fd19ccd + -2.50507477628578072866e-8, // 0xbe5ae5e5a9291f5d + 2.75573136213857245213e-6, // 0x3ec71de3567d48a1 + -1.98412698295895385996e-4, // 0xbf2a01a019bfdf03 + 8.33333333332211858878e-3, // 0x3f8111111110f7d0 + -1.66666666666666307295e-1, // 0xbfc5555555555548 +]; + +// cos coefficients +const COS_CF: [_]f64 = [ + -1.13585365213876817300e-11, // 0xbda8fa49a0861a9b + 2.08757008419747316778e-9, // 0x3e21ee9d7b4e3f05 + -2.75573141792967388112e-7, // 0xbe927e4f7eac4bc6 + 2.48015872888517045348e-5, // 0x3efa01a019c844f5 + -1.38888888888730564116e-3, // 0xbf56c16c16c14f91 + 4.16666666666665929218e-2, // 0x3fa555555555554b +]; + +// PI / 4 split into three parts +def PI4A: f64 = 7.85398125648498535156e-1; // 0x3fe921fb40000000 +def PI4B: f64 = 3.77489470793079817668e-8; // 0x3e64442d00000000 +def PI4C: f64 = 2.69515142907905952645e-15; // 0x3ce8469898cc5170 + +// reduce_threshold is the maximum value of x where the reduction using PI/4 +// in 3 float64 parts still gives accurate results. This threshold +// is set by y*C being representable as a float64 without error +// where y is given by y = floor(x * (4 / PI)) and C is the leading partial +// terms of 4/PI. Since the leading terms (PI4A and PI4B in sin.go) have 30 +// and 32 trailing zero bits, y should have less than 30 significant bits. +// y < 1<<30 -> floor(x*4/PI) < 1<<30 -> x < (1<<30 - 1) * PI/4 +// So, conservatively we can take x < 1<<29. +// Above this threshold Payne-Hanek range reduction must be used. +def REDUCE_THRESHOLD: f64 = ((1u64 << 29): f64); + +// MPI4 is the binary digits of 4/pi as a uint64 array, +// that is, 4/pi = Sum MPI4[i]*2^(-64*i) +// 19 64-bit digits and the leading one bit give 1217 bits +// of precision to handle the largest possible float64 exponent. +const MPI4: [_]u64 = [ + 0x0000000000000001, + 0x45f306dc9c882a53, + 0xf84eafa3ea69bb81, + 0xb6c52b3278872083, + 0xfca2c757bd778ac3, + 0x6e48dc74849ba5c0, + 0x0c925dd413a32439, + 0xfc3bd63962534e7d, + 0xd1046bea5d768909, + 0xd338e04d68befc82, + 0x7323ac7306a673e9, + 0x3908bf177bf25076, + 0x3ff12fffbc0b301f, + 0xde5e2316b414da3e, + 0xda6cfd9e4f96136e, + 0x9e8c7ecd3cbfd45a, + 0xea4f758fd7cbe2f6, + 0x7a0e73ef14a525d4, + 0xd7f6bf623f1aba10, + 0xac06608df8f6d757, +]; + +// trig_reduce implements Payne-Hanek range reduction by PI/4 for x > 0. It +// returns the integer part mod 8 (j) and the fractional part (z) of x / (PI/4). +fn trig_reduce(x: f64) (u64, f64) = { + // The implementation is based on: "ARGUMENT REDUCTION FOR HUGE + // ARGUMENTS: Good to the Last Bit" K. C. Ng et al, March 24, 1992 + // The simulated multi-precision calculation of x * B uses 64-bit + // integer arithmetic. + const PI4 = PI / 4f64; + if (x < PI4) { + return (0u64, x); + }; + // Extract out the integer and exponent such that x = ix * 2 ** exp + let ix = f64bits(x); + const exp = + ((ix >> F64_MANTISSA_BITS & + F64_EXPONENT_MASK): i64) - + (F64_EXPONENT_BIAS: i64) - + (F64_MANTISSA_BITS: i64); + ix = ix & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS); + ix |= 1 << F64_MANTISSA_BITS; + // Use the exponent to extract the 3 appropriate uint64 digits from + // MPI4, B ~ (z0, z1, z2), such that the product leading digit has the + // exponent -61. Note, exp >= -53 since x >= PI4 and exp < 971 for + // maximum float64. + const digit = ((exp + 61): u64) / 64; + const bitshift = ((exp + 61): u64) % 64; + const z0 = (MPI4[digit] << bitshift) | + (MPI4[digit + 1] >> (64 - bitshift)); + const z1 = (MPI4[digit + 1] << bitshift) | + (MPI4[digit + 2] >> (64 - bitshift)); + const z2 = (MPI4[digit + 2] << bitshift) | + (MPI4[digit + 3] >> (64 - bitshift)); + // Multiply mantissa by the digits and extract the upper two digits + // (hi, lo). + const z2 = mulu64(z2, ix); + const z2hi = z2.0; + const z1 = mulu64(z1, ix); + const z1hi = z1.0; + const z1lo = z1.1; + const z0lo = z0 * ix; + const add1 = addu64(z1lo, z2hi, 0); + const lo = add1.0; + const c = add1.1; + const add2 = addu64(z0lo, z1hi, c); + let hi = add2.0; + // The top 3 bits are j. + let j = hi >> 61; + // Extract the fraction and find its magnitude. + hi = hi << 3 | lo >> 61; + const lz = ((leading_zeros_u64(hi)): uint); + const e = ((F64_EXPONENT_BIAS - (lz + 1)): u64); + // Clear implicit mantissa bit and shift into place. + hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1))); + hi >>= 64 - F64_MANTISSA_BITS; + // Include the exponent and convert to a float. + hi |= e << F64_MANTISSA_BITS; + let z = f64frombits(hi); + // Map zeros to origin. + if (j & 1 == 1) { + j += 1; + j &= 7; + z -= 1f64; + }; + // Multiply the fractional part by pi/4. + return (j, z * PI4); +}; + +@test fn trig_reduce() void = { + for (let idx = 0z; idx < 10; idx += 1) { + const reduced = trig_reduce(TEST_INPUTS[idx]); + const j = reduced.0; + const z = reduced.1; + const xred = (j: i64: f64) * (PI / 4f64) + z; + assert(eqwithin( + sinf64(TEST_INPUTS[idx]), + sinf64(xred), + 1e-6f64)); + }; +}; + +// cos.c +// +// Circular cosine +// +// SYNOPSIS: +// +// double x, y, cos(); +// y = cos( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the cosine is approximated by +// 1 - x**2 Q(x**2). +// Between pi/4 and pi/2 the sine is represented as +// x + x**3 P(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// DEC 0,+1.07e9 17000 3.0e-17 7.2e-18 + +// Returns the cosine of x, in radians. +export fn cosf64(x: f64) f64 = { + if (isnan(x) || isinf(x)) { + return NAN; + }; + + // Make argument positive + let is_negative = false; + x = absf(x); + + let j = 0u64; + let y = 0f64; + let z = 0f64; + if (x >= REDUCE_THRESHOLD) { + const reduce_res = trig_reduce(x); + j = reduce_res.0; + z = reduce_res.1; + } else { + // Integer part of x/(PI/4), as integer for tests on the phase + // angle + j = ((x * (4f64 / PI)): i64: u64); + // Integer part of x/(PI/4), as float + y = (j: i64: f64); + + // Map zeros to origin + if (j & 1 == 1) { + j += 1; + y += 1f64; + }; + // Octant modulo 2PI radians (360 degrees) + j &= 7; + // Extended precision modular arithmetic + z = ((x - (y * PI4A)) - (y * PI4B)) - (y * PI4C); + }; + + if (j > 3) { + j -= 4; + is_negative = !is_negative; + }; + if (j > 1) { + is_negative = !is_negative; + }; + + const zz = z * z; + if (j == 1 || j == 2) { + y = z + z * zz * ((((((SIN_CF[0] * zz) + + SIN_CF[1]) * zz + + SIN_CF[2]) * zz + + SIN_CF[3]) * zz + + SIN_CF[4]) * zz + + SIN_CF[5]); + } else { + y = 1.0 - 0.5 * zz + zz * zz * ((((((COS_CF[0] * zz) + + COS_CF[1]) * zz + + COS_CF[2]) * zz + + COS_CF[3]) * zz + + COS_CF[4]) * zz + + COS_CF[5]); + }; + if (is_negative) { + y = -y; + }; + return y; +}; + +@test fn cos() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + cosf64(TEST_INPUTS[idx]), TEST_COS[idx])); + }; + assert(isnan(cosf64(-INF))); + assert(isnan(cosf64(INF))); + assert(isnan(cosf64(NAN))); +}; + +// sin.c +// +// Circular sine +// +// SYNOPSIS: +// +// double x, y, sin(); +// y = sin( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the sine is approximated by +// x + x**3 P(x**2). +// Between pi/4 and pi/2 the cosine is represented as +// 1 - x**2 Q(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC 0, 10 150000 3.0e-17 7.8e-18 +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// +// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss +// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may +// be meaningless for x > 2**49 = 5.6e14. + +// Returns the sine of x, in radians. +export fn sinf64(x: f64) f64 = { + if (x == 0f64 || isnan(x)) { + return x; + } else if (isinf(x)) { + return NAN; + }; + + // Make argument positive but save the sign + let is_negative = false; + if (x < 0f64) { + x = -x; + is_negative = true; + }; + + let j = 0u64; + let y = 0f64; + let z = 0f64; + if (x >= REDUCE_THRESHOLD) { + const reduce_res = trig_reduce(x); + j = reduce_res.0; + z = reduce_res.1; + } else { + // Integer part of x/(PI/4), as integer for tests on the phase + // angle + j = ((x * (4f64 / PI)): i64: u64); + // Integer part of x/(PI/4), as float + y = (j: i64: f64); + + // Map zeros to origin + if (j & 1 == 1) { + j += 1; + y += 1f64; + }; + // Octant modulo 2PI radians (360 degrees) + j &= 7; + // Extended precision modular arithmetic + z = ((x - (y * PI4A)) - (y * PI4B)) - (y * PI4C); + }; + + // Reflect in x axis + if (j > 3) { + j -= 4; + is_negative = !is_negative; + }; + + const zz = z * z; + if (j == 1 || j == 2) { + y = 1.0 - 0.5 * zz + zz * zz * + ((((((COS_CF[0] * zz) + + COS_CF[1]) * zz + + COS_CF[2]) * zz + + COS_CF[3]) * zz + + COS_CF[4]) * zz + + COS_CF[5]); + } else { + y = z + z * zz * + ((((((SIN_CF[0] * zz) + + SIN_CF[1]) * zz + + SIN_CF[2]) * zz + + SIN_CF[3]) * zz + + SIN_CF[4]) * zz + + SIN_CF[5]); + }; + if (is_negative) { + y = -y; + }; + return y; +}; + +@test fn sin() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + sinf64(TEST_INPUTS[idx]), TEST_SIN[idx])); + }; + assert(isnan(sinf64(-INF))); + assert(sinf64(-0f64) == -0f64); + assert(sinf64(0f64) == 0f64); + assert(isnan(sinf64(INF))); + assert(isnan(sinf64(NAN))); +}; + +// tan.c +// +// Circular tangent +// +// SYNOPSIS: +// +// double x, y, tan(); +// y = tan( x ); +// +// DESCRIPTION: +// +// Returns the circular tangent of the radian argument x. +// +// Range reduction is modulo pi/4. A rational function +// x + x**3 P(x**2)/Q(x**2) +// is employed in the basic interval [0, pi/4]. +// +// ACCURACY: +// Relative error: +// arithmetic domain # trials peak rms +// DEC +-1.07e9 44000 4.1e-17 1.0e-17 +// IEEE +-1.07e9 30000 2.9e-16 8.1e-17 +// +// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss +// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may +// be meaningless for x > 2**49 = 5.6e14. +// [Accuracy loss statement from sin.go comments.] + +// tan coefficients +const TAN_P: [_]f64 = [ + -1.30936939181383777646e4, // 0xc0c992d8d24f3f38 + 1.15351664838587416140e6, // 0x413199eca5fc9ddd + -1.79565251976484877988e7, // 0xc1711fead3299176 +]; +const TAN_Q: [_]f64 = [ + 1.00000000000000000000e0, + 1.36812963470692954678e4, // 0x40cab8a5eeb36572 + -1.32089234440210967447e6, // 0xc13427bc582abc96 + 2.50083801823357915839e7, // 0x4177d98fc2ead8ef + -5.38695755929454629881e7, // 0xc189afe03cbe5a31 +]; + +// Returns the tangent of x, in radians. +export fn tanf64(x: f64) f64 = { + if (x == 0f64 || isnan(x)) { + return x; + } else if (isinf(x)) { + return NAN; + }; + + // Make argument positive but save the sign + let is_negative = false; + if (x < 0f64) { + x = -x; + is_negative = true; + }; + let j = 0u64; + let y = 0f64; + let z = 0f64; + if (x >= REDUCE_THRESHOLD) { + const reduce_res = trig_reduce(x); + j = reduce_res.0; + z = reduce_res.1; + } else { + // Integer part of x/(PI/4), as integer for tests on the phase + // angle + j = ((x * (4f64 / PI)): i64: u64); + // Integer part of x/(PI/4), as float + y = (j: i64: f64); + + // Map zeros and singularities to origin + if (j & 1 == 1) { + j += 1; + y += 1f64; + }; + + z = ((x - (y * PI4A)) - (y * PI4B)) - (y * PI4C); + }; + const zz = z * z; + + if (zz > 1e-14) { + y = z + z * (zz * + (((TAN_P[0] * zz) + TAN_P[1]) * zz + TAN_P[2]) / + ((((zz + TAN_Q[1]) * zz + + TAN_Q[2]) * zz + + TAN_Q[3]) * zz + + TAN_Q[4])); + } else { + y = z; + }; + if (j & 2 == 2) { + y = -1f64 / y; + }; + if (is_negative) { + y = -y; + }; + return y; +}; + +@test fn tan() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + tanf64(TEST_INPUTS[idx]), TEST_TAN[idx])); + }; + assert(isnan(sinf64(-INF))); + assert(sinf64(-0f64) == -0f64); + assert(sinf64(0f64) == 0f64); + assert(isnan(sinf64(INF))); + assert(isnan(sinf64(NAN))); +}; + +// Evaluates a series valid in the range [0, 0.66]. +fn xatan(x: f64) f64 = { + const P0 = -8.750608600031904122785e-01; + const P1 = -1.615753718733365076637e+01; + const P2 = -7.500855792314704667340e+01; + const P3 = -1.228866684490136173410e+02; + const P4 = -6.485021904942025371773e+01; + const Q0 = +2.485846490142306297962e+01; + const Q1 = +1.650270098316988542046e+02; + const Q2 = +4.328810604912902668951e+02; + const Q3 = +4.853903996359136964868e+02; + const Q4 = +1.945506571482613964425e+02; + let z = x * x; + z = z * ((((P0 * z + P1) * z + P2) * z + P3) * z + P4) / + (((((z + Q0) * z + Q1) * z + Q2) * z + Q3) * z + Q4); + z = (x * z) + x; + return z; +}; + +// Reduces argument (known to be positive) to the range [0, 0.66] and calls +// xatan. +fn satan(x: f64) f64 = { + // pi / 2 = PIO2 + morebits + const morebits = 6.123233995736765886130e-17; + // tan(3 * pi / 8) + const tan3pio8 = 2.41421356237309504880; + if (x <= 0.66) { + return xatan(x); + }; + if (x > tan3pio8) { + return (PI / 2f64) - xatan(1f64 / x) + morebits; + }; + return (PI / 4f64) + + xatan((x - 1f64) / (x + 1f64)) + + (0.5f64 * morebits); +}; + +// Returns the arcsine, in radians, of x. +export fn asinf64(x: f64) f64 = { + if (x == 0f64) { + return x; + }; + let is_negative = false; + if (x < 0.064) { + x = -x; + is_negative = true; + }; + if (x > 1f64) { + return NAN; + }; + let temp = sqrtf64(1f64 - x * x); + if (x > 0.7f64) { + temp = PI / 2f64 - satan(temp / x); + } else { + temp = satan(x / temp); + }; + + if (is_negative) { + temp = -temp; + }; + return temp; +}; + +@test fn asin() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + asinf64(TEST_INPUTS[idx] / 10f64), + TEST_ASIN[idx], + 1e-6f64)); + }; + assert(isnan(asinf64(-PI))); + assert(asinf64(-0f64) == -0f64); + assert(asinf64(0f64) == 0f64); + assert(isnan(asinf64(PI))); + assert(isnan(asinf64(NAN))); +}; + +// Returns the arccosine, in radians, of x. +export fn acosf64(x: f64) f64 = { + return PI / 2f64 - asinf64(x); +}; + +@test fn acos() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + acosf64(TEST_INPUTS[idx] / 10f64), + TEST_ACOS[idx], + 1e-6f64)); + }; + assert(isnan(acosf64(-PI))); + assert(acosf64(1f64) == 0f64); + assert(isnan(acosf64(PI))); + assert(isnan(acosf64(NAN))); +}; + +// atan.c +// Inverse circular tangent (arctangent) +// +// SYNOPSIS: +// double x, y, atan(); +// y = atan( x ); +// +// DESCRIPTION: +// Returns radian angle between -pi/2 and +pi/2 whose tangent is x. +// +// Range reduction is from three intervals into the interval from zero to 0.66. +// The approximant uses a rational function of degree 4/5 of the form +// x + x**3 P(x)/Q(x). +// +// ACCURACY: +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10, 10 50000 2.4e-17 8.3e-18 +// IEEE -10, 10 10^6 1.8e-16 5.0e-17 + +// Returns the arctangent, in radians, of x. +export fn atanf64(x: f64) f64 = { + if (x == 0f64) { + return x; + }; + if (x > 0f64) { + return satan(x); + }; + return -satan(-x); +}; + +@test fn atan() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + atanf64(TEST_INPUTS[idx]), + TEST_ATAN[idx], + 1e-6f64)); + }; + assert(atanf64(-INF) == -PI / 2f64); + assert(atanf64(-0f64) == -0f64); + assert(atanf64(0f64) == 0f64); + assert(atanf64(INF) == PI / 2f64); + assert(isnan(atanf64(NAN))); +}; + +// Floating-point hyperbolic sine and cosine. +// The exponential func is called for arguments greater in magnitude than 0.5. +// A series is used for arguments smaller in magnitude than 0.5. +// Cosh(x) is computed from the exponential func for all arguments. + +// Returns the hyperbolic sine of x. +export fn sinhf64(x: f64) f64 = { + // The coefficients are #2029 from Hart & Cheney. (20.36D) + const P0 = -0.6307673640497716991184787251e+6; + const P1 = -0.8991272022039509355398013511e+5; + const P2 = -0.2894211355989563807284660366e+4; + const P3 = -0.2630563213397497062819489e+2; + const Q0 = -0.6307673640497716991212077277e+6; + const Q1 = 0.1521517378790019070696485176e+5; + const Q2 = -0.173678953558233699533450911e+3; + + let is_negative = false; + if (x < 0f64) { + x = -x; + is_negative = true; + }; + + let temp = 0f64; + if (x > 21f64) { + temp = expf64(x) * 0.5f64; + } else if (x > 0.5f64) { + const ex = expf64(x); + temp = (ex - (1f64 / ex)) * 0.5f64; + } else { + const sq = x * x; + temp = (((P3 * sq + P2) * sq + P1) * sq + P0) * x; + temp = temp / (((sq + Q2) * sq + Q1) * sq + Q0); + }; + + if (is_negative) { + temp = -temp; + }; + + return temp; +}; + +@test fn sinh() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + sinhf64(TEST_INPUTS[idx]), + TEST_SINH[idx], + 1e-6f64)); + }; + assert(sinhf64(-INF) == -INF); + assert(sinhf64(-0f64) == -0f64); + assert(sinhf64(0f64) == 0f64); + assert(sinhf64(INF) == INF); + assert(isnan(sinhf64(NAN))); +}; + +// Returns the hyperbolic cosine of x. +export fn coshf64(x: f64) f64 = { + x = absf64(x); + if (x > 21f64) { + return expf64(x) * 0.5f64; + }; + const ex = expf64(x); + return (ex + 1f64 / ex) * 0.5f64; +}; + +@test fn cosh() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + coshf64(TEST_INPUTS[idx]), + TEST_COSH[idx], + 1e-6f64)); + }; + assert(coshf64(-INF) == INF); + assert(coshf64(-0f64) == 1f64); + assert(coshf64(0f64) == 1f64); + assert(coshf64(INF) == INF); + assert(isnan(coshf64(NAN))); +}; + +// tanh.c +// +// Hyperbolic tangent +// +// SYNOPSIS: +// +// double x, y, tanh(); +// +// y = tanh( x ); +// +// DESCRIPTION: +// +// Returns hyperbolic tangent of argument in the range MINLOG to MAXLOG. +// MAXLOG = 8.8029691931113054295988e+01 = log(2**127) +// MINLOG = -8.872283911167299960540e+01 = log(2**-128) +// +// A rational function is used for |x| < 0.625. The form +// x + x**3 P(x)/Q(x) of Cody & Waite is employed. +// Otherwise, +// tanh(x) = sinh(x)/cosh(x) = 1 - 2/(exp(2x) + 1). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -2,2 30000 2.5e-16 5.8e-17 + +// tanh coefficients +const TANH_P: [_]f64 = [ + -9.64399179425052238628e-1, + -9.92877231001918586564e1, + -1.61468768441708447952e3, +]; +const TANH_Q: [_]f64 = [ + 1.12811678491632931402e2, + 2.23548839060100448583e3, + 4.84406305325125486048e3, +]; + +// Returns the hyperbolic tangent of x. +export fn tanhf64(x: f64) f64 = { + const MAXLOG = 8.8029691931113054295988e+01; // log(2**127) + let z = absf64(x); + if (z > 0.5f64 * MAXLOG) { + if (x < 0f64) { + return -1f64; + }; + return 1f64; + } else if (z >= 0.625f64) { + const s = expf64(2f64 * z); + z = 1f64 - 2f64 / (s + 1f64); + if (x < 0f64) { + z = -z; + }; + } else { + if (x == 0f64) { + return x; + }; + const s = x * x; + z = x + x * s * ((TANH_P[0] * s + TANH_P[1]) * s + TANH_P[2]) / + (((s + TANH_Q[0]) * s + TANH_Q[1]) * s + TANH_Q[2]); + }; + return z; +}; + +@test fn tanh() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + tanhf64(TEST_INPUTS[idx]), + TEST_TANH[idx], + 1e-6f64)); + }; + assert(tanhf64(-INF) == -1f64); + assert(tanhf64(-0f64) == -0f64); + assert(tanhf64(0f64) == 0f64); + assert(tanhf64(INF) == 1f64); + assert(isnan(tanhf64(NAN))); +}; + +// asinh(x) +// Method : +// Based on +// asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] +// we have +// asinh(x) := x if 1+x*x=1, +// := sign(x)*(log(x)+ln2)) for large |x|, else +// := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else +// := sign(x)*log1p(|x| + x**2/(1 + sqrt(1+x**2))) +// + +// Returns the inverse hyperbolic sine of x. +export fn asinhf64(x: f64) f64 = { + const NEAR_ZERO = 1f64 / ((1i64 << 28): f64); + const LARGE = ((1i64 << 28): f64); + + if (isnan(x) || isinf(x)) { + return x; + }; + + let is_negative = false; + if (x < 0f64) { + x = -x; + is_negative = true; + }; + + let temp = 0f64; + + if (x > LARGE) { + // |x| > 2**28 + temp = logf64(x) + LN_2; + } else if (x > 2f64) { + // 2**28 > |x| > 2.0 + temp = logf64(2f64 * x + + 1f64 / (sqrtf64(x * x + 1f64) + x)); + } else if (x < NEAR_ZERO) { + // |x| < 2**-28 + temp = x; + } else { + // 2.0 > |x| > 2**-28 + temp = log1pf64(x + x * x / + (1f64 + sqrtf64(1f64 + x * x))); + }; + if (is_negative) { + temp = -temp; + }; + return temp; +}; + +@test fn asinh() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + asinhf64(TEST_INPUTS[idx]), + TEST_ASINH[idx], + 1e-6f64)); + }; + assert(asinhf64(-INF) == -INF); + assert(asinhf64(-0f64) == -0f64); + assert(asinhf64(0f64) == 0f64); + assert(asinhf64(INF) == INF); + assert(isnan(asinhf64(NAN))); +}; + +// __ieee754_acosh(x) +// Method : +// Based on +// acosh(x) = log [ x + sqrt(x*x-1) ] +// we have +// acosh(x) := log(x)+ln2, if x is large; else +// acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else +// acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1. +// +// Special cases: +// acosh(x) is NaN with signal if x<1. +// acosh(NaN) is NaN without signal. +// + +// Returns the inverse hyperbolic cosine of x. +export fn acoshf64(x: f64) f64 = { + const LARGE = ((1i64 << 28): f64); + + if (x < 1f64 || isnan(x)) { + return NAN; + } else if (x == 1f64) { + return 0f64; + } else if (x >= LARGE) { + // x > 2**28 + return logf64(x) + LN_2; + } else if (x > 2f64) { + // 2**28 > x > 2 + return logf64(2f64 * x - 1f64 / + (x + sqrtf64(x * x - 1f64))); + }; + const t = x - 1f64; + // 2 >= x > 1 + return log1pf64(t + sqrtf64(2f64 * t + t * t)); +}; + +@test fn acosh() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(eqwithin( + acoshf64(1f64 + absf64(TEST_INPUTS[idx])), + TEST_ACOSH[idx], + 1e-6f64)); + }; + assert(isnan(acoshf64(-INF))); + assert(isnan(acoshf64(0.5f64))); + assert(acoshf64(1f64) == 0f64); + assert(acoshf64(INF) == INF); + assert(isnan(acoshf64(NAN))); +}; + +// __ieee754_atanh(x) +// Method : +// 1. Reduce x to positive by atanh(-x) = -atanh(x) +// 2. For x>=0.5 +// 1 2x x +// atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) +// 2 1 - x 1 - x +// +// For x<0.5 +// atanh(x) = 0.5*log1p(2x+2x*x/(1-x)) +// +// Special cases: +// atanh(x) is NaN if |x| > 1 with signal; +// atanh(NaN) is that NaN with no signal; +// atanh(+-1) is +-INF with signal. +// + +// Returns the inverse hyperbolic tangent of x. +export fn atanhf64(x: f64) f64 = { + const NEAR_ZERO = 1f64 / ((1i64 << 28): f64); + + if (x < -1f64 || x > 1.064) { + return NAN; + } else if (isnan(x)) { + return NAN; + } else if (x == 1f64) { + return INF; + } else if (x == -1f64) { + return -INF; + }; + + let is_negative = false; + + if (x < 0f64) { + x = -x; + is_negative = true; + }; + + let temp = 0f64; + + if (x < NEAR_ZERO) { + temp = x; + } else if (x < 0.5f64) { + temp = x + x; + temp = 0.5f64 * log1pf64(temp + temp * x / (1f64 - x)); + } else { + temp = 0.5f64 * log1pf64((x + x) / (1f64 - x)); + }; + if (is_negative) { + temp = -temp; + }; + return temp; +}; + +@test fn atanh() void = { + for (let idx = 0z; idx < 10; idx += 1) { + assert(isclose( + atanhf64(TEST_INPUTS[idx] / 10f64), + TEST_ATANH[idx])); + }; + assert(isnan(atanhf64(-INF))); + assert(isnan(atanhf64(-PI))); + assert(atanhf64(-1f64) == -INF); + assert(atanhf64(-0f64) == -0f64); + assert(atanhf64(0f64) == 0f64); + assert(atanhf64(1f64) == INF); + assert(isnan(atanhf64(PI))); + assert(isnan(atanhf64(INF))); + assert(isnan(atanhf64(NAN))); +}; diff --git a/math/uints.ha b/math/uints.ha @@ -0,0 +1,543 @@ +// Sections of the code below are based on Go's implementation, in particular +// https://raw.githubusercontent.com/golang/go/master/src/math/bits/bits.go. +// +// The Go copyright notice: +// ==================================================== +// Copyright (c) 2009 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ==================================================== + +// The number of bits required to represent the first 256 numbers. +const LEN8TAB: [256]u8 = [ + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, +]; + +// The size of an unsigned int: 32 or 64 +def UINT_SIZE: u8 = (size(uint): u8) * 8; + +// Returns the minimum number of bits required to represent x. +export fn bit_size_u8(x: u8) u8 = { + return LEN8TAB[x]; +}; + +// Returns the minimum number of bits required to represent x. +export fn bit_size_u16(x: u16) u8 = { + let res = 0u8; + if (x >= 1u16 << 8) { + x >>= 8; + res += 8; + }; + return res + LEN8TAB[x]; +}; + +// Returns the minimum number of bits required to represent x. +export fn bit_size_u32(x: u32) u8 = { + let res = 0u8; + if (x >= 1u32 << 16) { + x >>= 16; + res += 16; + }; + if (x >= 1u32 << 8) { + x >>= 8; + res += 8; + }; + return res + LEN8TAB[x]; +}; + +// Returns the minimum number of bits required to represent x. +export fn bit_size_u64(x: u64) u8 = { + let res = 0u8; + if (x >= 1u64 << 32) { + x >>= 32; + res += 32; + }; + if (x >= 1u64 << 16) { + x >>= 16; + res += 16; + }; + if (x >= 1u64 << 8) { + x >>= 8; + res += 8; + }; + return res + LEN8TAB[x]; +}; + +// Returns the minimum number of bits required to represent x. +export fn bit_size_u(x: uint) u8 = { + if (UINT_SIZE == 32) { + return bit_size_u32(x: u32); + }; + return bit_size_u64(x: u64); +}; + +@test fn bit_size_u() void = { + assert(bit_size_u8(0) == 0); + assert(bit_size_u8(2) == 2); + assert(bit_size_u8(5) == 3); + assert(bit_size_u16(5) == 3); + assert(bit_size_u32(5) == 3); + assert(bit_size_u64(5) == 3); + assert(bit_size_u16(31111) == 15); + assert(bit_size_u32(536870911) == 29); + assert(bit_size_u64(8589934591) == 33); + assert(bit_size_u(0) == 0); + assert(bit_size_u(1) == 1); +}; + + +// Returns the number of leading zero bits in x +// The result is UINT_SIZE for x == 0. +export fn leading_zeros_u(x: uint) u8 = UINT_SIZE - bit_size_u(x); + +// Returns the number of leading zero bits in x +// The result is 8 for x == 0. +export fn leading_zeros_u8(x: u8) u8 = 8 - bit_size_u8(x); + +// Returns the number of leading zero bits in x +// The result is 16 for x == 0. +export fn leading_zeros_u16(x: u16) u8 = 16 - bit_size_u16(x); + +// Returns the number of leading zero bits in x +// The result is 32 for x == 0. +export fn leading_zeros_u32(x: u32) u8 = 32 - bit_size_u32(x); + +// Returns the number of leading zero bits in x +// The result is 64 for x == 0. +export fn leading_zeros_u64(x: u64) u8 = 64 - bit_size_u64(x); + +@test fn leading_zeros_u() void = { + assert(leading_zeros_u(0) == UINT_SIZE); + assert(leading_zeros_u(1) == UINT_SIZE - 1); + assert(leading_zeros_u8(0) == 8); + assert(leading_zeros_u8(1) == 8 - 1); + assert(leading_zeros_u16(0) == 16); + assert(leading_zeros_u16(1) == 16 - 1); + assert(leading_zeros_u32(0) == 32); + assert(leading_zeros_u32(1) == 32 - 1); + assert(leading_zeros_u64(0) == 64); + assert(leading_zeros_u64(1) == 64 - 1); +}; + +// Returns the sum with carry of x, y and carry: sum = x + y + carry. +// The carry input must be 0 or 1, otherwise the behavior is undefined. +// The carry_out output is guaranteed to be 0 or 1. +export fn addu32(x: u32, y: u32, carry: u32) (u32, u32) = { + const sum64 = (x: u64) + (y: u64) + (carry: u64); + const sum = (sum64: u32); + const carry_out = ((sum64 >> 32): u32); + return (sum, carry_out); +}; + +// Returns the sum with carry of x, y and carry: sum = x + y + carry. +// The carry input must be 0 or 1, otherwise the behavior is undefined. +// The carry_out output is guaranteed to be 0 or 1. +export fn addu64(x: u64, y: u64, carry: u64) (u64, u64) = { + const sum = x + y + carry; + // The sum will overflow if both top bits are set (x & y) or if one of + // them is (x | y), and a carry from the lower place happened. If such a + // carry happens, the top bit will be 1 + 0 + 1 = 0 (& ~sum). + const carry_out = ((x & y) | ((x | y) & ~sum)) >> 63; + return (sum, carry_out); +}; + +// Calls either addu32() or addu64() depending on UINT_SIZE. +export fn addu(x: uint, y: uint, carry: uint) (uint, uint) = { + if (UINT_SIZE == 32) { + const res = addu32((x: u32), (y: u32), (carry: u32)); + return ((res.0: uint), (res.1: uint)); + }; + const res = addu64((x: u64), (y: u64), (carry: u64)); + return ((res.0: uint), (res.1: uint)); +}; + +@test fn addu() void = { + // 32 + let res = addu32(2u32, 2u32, 0u32); + assert(res.0 == 4u32); + assert(res.1 == 0u32); + let res = addu32(2u32, 2u32, 1u32); + assert(res.0 == 5u32); + assert(res.1 == 0u32); + let res = addu32(~0u32, 0u32, 0u32); + assert(res.0 == ~0u32); + assert(res.1 == 0u32); + let res = addu32(~0u32, 1u32, 0u32); + assert(res.0 == 0u32); + assert(res.1 == 1u32); + + // 64 + let res = addu64(2u64, 2u64, 0u64); + assert(res.0 == 4u64); + assert(res.1 == 0u64); + let res = addu64(2u64, 2u64, 1u64); + assert(res.0 == 5u64); + assert(res.1 == 0u64); + let res = addu64(~0u64, 0u64, 0u64); + assert(res.0 == ~0u64); + assert(res.1 == 0u64); + let res = addu64(~0u64, 1u64, 0u64); + assert(res.0 == 0u64); + assert(res.1 == 1u64); + + // addu() + let res = addu(2u, 2u, 0u); + assert(res.0 == 4u); + assert(res.1 == 0u); + let res = addu(2u, 2u, 1u); + assert(res.0 == 5u); + assert(res.1 == 0u); +}; + +// Returns the difference of x, y and borrow, diff = x - y - borrow. +// The borrow input must be 0 or 1, otherwise the behavior is undefined. +// The borrow_out output is guaranteed to be 0 or 1. +export fn subu32(x: u32, y: u32, borrow: u32) (u32, u32) = { + const diff = x - y - borrow; + // The difference will underflow if the top bit of x is not set and the + // top bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a + // borrow from the lower place happens. If that borrow happens, the + // result will be 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff). + const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 31; + return (diff, borrow_out); +}; + +// Returns the difference of x, y and borrow, diff = x - y - borrow. +// The borrow input must be 0 or 1, otherwise the behavior is undefined. +// The borrow_out output is guaranteed to be 0 or 1. +export fn subu64(x: u64, y: u64, borrow: u64) (u64, u64) = { + const diff = x - y - borrow; + // See subu32 for the bit logic. + const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 63; + return (diff, borrow_out); +}; + +// Calls either mulu32() or mulu64() depending on UINT_SIZE. +export fn subu(x: uint, y: uint, carry: uint) (uint, uint) = { + if (UINT_SIZE == 32) { + const res = subu32((x: u32), (y: u32), (carry: u32)); + return ((res.0: uint), (res.1: uint)); + }; + const res = subu64((x: u64), (y: u64), (carry: u64)); + return ((res.0: uint), (res.1: uint)); +}; + +@test fn subu() void = { + // 32 + let res = subu32(4u32, 2u32, 0u32); + assert(res.0 == 2u32); + assert(res.1 == 0u32); + let res = subu32(4u32, 2u32, 1u32); + assert(res.0 == 1u32); + assert(res.1 == 0u32); + let res = subu32(0u32, 0u32, 0u32); + assert(res.0 == 0u32); + assert(res.1 == 0u32); + let res = subu32(0u32, 1u32, 0u32); + assert(res.0 == ~0u32); + assert(res.1 == 1u32); + + // 64 + let res = subu64(4u64, 2u64, 0u64); + assert(res.0 == 2u64); + assert(res.1 == 0u64); + let res = subu64(4u64, 2u64, 1u64); + assert(res.0 == 1u64); + assert(res.1 == 0u64); + let res = subu64(0u64, 0u64, 0u64); + assert(res.0 == 0u64); + assert(res.1 == 0u64); + let res = subu64(0u64, 1u64, 0u64); + assert(res.0 == ~0u64); + assert(res.1 == 1u64); + + // subu() + let res = subu(4u, 2u, 0u); + assert(res.0 == 2u); + assert(res.1 == 0u); + let res = subu(4u, 2u, 1u); + assert(res.0 == 1u); + assert(res.1 == 0u); +}; + +// Returns the 64-bit product of x and y: (hi, lo) = x * y +// with the product bits' upper half returned in hi and the lower +// half returned in lo. +export fn mulu32(x: u32, y: u32) (u32, u32) = { + const product = (x: u64) * (y: u64); + const hi = ((product >> 32): u32); + const lo = (product: u32); + return (hi, lo); +}; + +// Returns the 128-bit product of x and y: (hi, lo) = x * y +// with the product bits' upper half returned in hi and the lower +// half returned in lo. +export fn mulu64(x: u64, y: u64) (u64, u64) = { + const mask32 = (1u64 << 32) - 1; + const x0 = x & mask32; + const x1 = x >> 32; + const y0 = y & mask32; + const y1 = y >> 32; + const w0 = x0 * y0; + const t = (x1 * y0) + (w0 >> 32); + let w1 = t & mask32; + const w2 = t >> 32; + w1 += x0 * y1; + const hi = (x1 * y1) + w2 + (w1 >> 32); + const lo = x * y; + return (hi, lo); +}; + +// Calls either mulu32() or mulu64() depending on UINT_SIZE. +export fn mulu(x: uint, y: uint) (uint, uint) = { + if (UINT_SIZE == 32) { + const res = mulu32((x: u32), (y: u32)); + return ((res.0: uint), (res.1: uint)); + }; + const res = mulu64((x: u64), (y: u64)); + return ((res.0: uint), (res.1: uint)); +}; + +@test fn mulu() void = { + // 32 + let res = mulu32(2u32, 3u32); + assert(res.0 == 0u32); + assert(res.1 == 6u32); + let res = mulu32(~0u32, 2u32); + assert(res.0 == 1u32); + assert(res.1 == ~0u32 - 1); + + // 64 + let res = mulu64(2u64, 3u64); + assert(res.0 == 0u64); + assert(res.1 == 6u64); + let res = mulu64(~0u64, 2u64); + assert(res.0 == 1u64); + assert(res.1 == ~0u64 - 1); + + // mulu() + let res = mulu(2u, 3u); + assert(res.0 == 0u); + assert(res.1 == 6u); + let res = mulu(~0u, 2u); + assert(res.0 == 1u); + assert(res.1 == ~0u - 1); +}; + +// Returns the quotient and remainder of (hi, lo) divided by y: +// quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper +// half in parameter hi and the lower half in parameter lo. +// Panics for y == 0 (division by zero) or y <= hi (quotient overflow). +export fn divu32(hi: u32, lo: u32, y: u32) (u32, u32) = { + assert(y != 0); + assert(y > hi); + const z = (hi: u64) << 32 | (lo: u64); + const quo = ((z / (y: u64)): u32); + const rem = ((z % (y: u64)): u32); + return (quo, rem); +}; + +// Returns the quotient and remainder of (hi, lo) divided by y: +// quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper +// half in parameter hi and the lower half in parameter lo. +// Panics for y == 0 (division by zero) or y <= hi (quotient overflow). +export fn divu64(hi: u64, lo: u64, y: u64) (u64, u64) = { + const two32 = 1u64 << 32; + const mask32 = two32 - 1; + assert(y != 0); + assert(y > hi); + + const s = leading_zeros_u64(y); + y <<= s; + + const yn1 = y >> 32; + const yn0 = y & mask32; + const un32 = (hi << s) | (lo >> (64 - s)); + const un10 = lo << s; + const un1 = un10 >> 32; + const un0 = un10 & mask32; + let q1 = un32 / yn1; + let rhat = un32 - (q1 * yn1); + + for (q1 >= two32 || (q1 * yn0) > ((two32 * rhat) + un1)) { + q1 -= 1; + rhat += yn1; + if (rhat >= two32) { + break; + }; + }; + + const un21 = (un32 * two32) + un1 - (q1 * y); + let q0 = un21 / yn1; + rhat = un21 - (q0 * yn1); + + for (q0 >= two32 || (q0 * yn0) > ((two32 * rhat) + un0)) { + q0 -= 1; + rhat += yn1; + if (rhat >= two32) { + break; + }; + }; + + const quo = (q1 * two32) + q0; + const rem = ((un21 * two32) + un0 - (q0 * y)) >> s; + return (quo, rem); +}; + +// Calls either divu32() or divu64() depending on UINT_SIZE. +export fn divu(hi: uint, lo: uint, y: uint) (uint, uint) = { + if (UINT_SIZE == 32) { + const res = divu32((hi: u32), (lo: u32), (y: u32)); + return ((res.0: uint), (res.1: uint)); + }; + const res = divu64((hi: u64), (lo: u64), (y: u64)); + return ((res.0: uint), (res.1: uint)); +}; + +@test fn divu() void = { + // 32 + let res = divu32(0u32, 4u32, 2u32); + assert(res.0 == 2u32); + assert(res.1 == 0u32); + let res = divu32(0u32, 5u32, 2u32); + assert(res.0 == 2u32); + assert(res.1 == 1u32); + let res = divu32(1u32, 0u32, 2u32); + assert(res.0 == (1u32 << 31)); + assert(res.1 == 0u32); + // These should panic. + // let res = divu32(1u32, 1u32, 0u32); + // let res = divu32(1u32, 0u32, 1u32); + + // 64 + let res = divu64(0u64, 4u64, 2u64); + assert(res.0 == 2u64); + assert(res.1 == 0u64); + let res = divu64(0u64, 5u64, 2u64); + assert(res.0 == 2u64); + assert(res.1 == 1u64); + let res = divu64(1u64, 0u64, 2u64); + assert(res.0 == (1u64 << 63)); + assert(res.1 == 0u64); + // These should panic. + // let res = divu64(1u64, 1u64, 0u64); + // let res = divu64(1u64, 0u64, 1u64); + + // divu() + let res = divu(0u, 4u, 2u); + assert(res.0 == 2u); + assert(res.1 == 0u); + let res = divu(0u, 5u, 2u); + assert(res.0 == 2u); + assert(res.1 == 1u); + let res = divu(1u, 0u, 2u); + assert(res.0 == (1u << 31)); + assert(res.1 == 0u); + // These should panic. + // divu(1u, 1u, 0u); + // divu(1u, 0u, 1u); +}; + +// Returns the remainder of (hi, lo) divided by y. +// Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on +// a quotient overflow. +export fn remu32(hi: u32, lo: u32, y: u32) u32 = { + assert(y != 0); + const res = ((hi: u64) << 32 | (lo: u64)) % (y: u64); + return (res: u32); +}; + +// Returns the remainder of (hi, lo) divided by y. +// Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on +// a quotient overflow. +export fn remu64(hi: u64, lo: u64, y: u64) u64 = { + assert(y != 0); + // We scale down hi so that hi < y, then use divu() to compute the + // rem with the guarantee that it won't panic on quotient overflow. + // Given that + // hi ≡ hi%y (mod y) + // we have + // hi<<64 + lo ≡ (hi%y)<<64 + lo (mod y) + const res = divu64(hi % y, lo, y); + return res.1; +}; + +// Calls either remu32() or remu64() depending on UINT_SIZE. +export fn remu(hi: uint, lo: uint, y: uint) uint = { + if (UINT_SIZE == 32) { + return (remu32((hi: u32), (lo: u32), (y: u32)): uint); + }; + return (remu64((hi: u64), (lo: u64), (y: u64)): uint); +}; + +@test fn remu() void = { + // 32 + assert(remu32(0u32, 4u32, 2u32) == 0u32); + assert(remu32(0u32, 5u32, 2u32) == 1u32); + assert(remu32(0u32, 5u32, 3u32) == 2u32); + assert(remu32(1u32, 1u32, 2u32) == 1u32); + // These should panic. + // remu32(0u32, 4u32, 0u32); + + // 64 + assert(remu64(0u64, 4u64, 2u64) == 0u64); + assert(remu64(0u64, 5u64, 2u64) == 1u64); + assert(remu64(0u64, 5u64, 3u64) == 2u64); + assert(remu64(1u32, 1u32, 2u32) == 1u32); + // These should panic. + // remu64(0u64, 4u64, 0u64); + + // remu() + assert(remu(0u, 4u, 2u) == 0u); + assert(remu(0u, 5u, 2u) == 1u); + assert(remu(0u, 5u, 3u) == 2u); + assert(remu(1u, 1u, 2u) == 1u); + // These should panic. + // remu(0u, 4u, 0u); +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -569,7 +569,7 @@ linux_vdso() { math() { printf '# math\n' gen_srcs math \ - math.ha floats.ha ints.ha + testdata.ha math.ha floats.ha ints.ha uints.ha trig.ha gen_ssa math types } diff --git a/stdlib.mk b/stdlib.mk @@ -852,9 +852,12 @@ $(HARECACHE)/linux/vdso/linux_vdso.ssa: $(stdlib_linux_vdso_srcs) $(stdlib_rt) $ # math # math stdlib_math_srcs= \ + $(STDLIB)/math/testdata.ha \ $(STDLIB)/math/math.ha \ $(STDLIB)/math/floats.ha \ - $(STDLIB)/math/ints.ha + $(STDLIB)/math/ints.ha \ + $(STDLIB)/math/uints.ha \ + $(STDLIB)/math/trig.ha $(HARECACHE)/math/math.ssa: $(stdlib_math_srcs) $(stdlib_rt) $(stdlib_types) @printf 'HAREC \t$@\n' @@ -2080,9 +2083,12 @@ $(TESTCACHE)/linux/vdso/linux_vdso.ssa: $(testlib_linux_vdso_srcs) $(testlib_rt) # math # math testlib_math_srcs= \ + $(STDLIB)/math/testdata.ha \ $(STDLIB)/math/math.ha \ $(STDLIB)/math/floats.ha \ - $(STDLIB)/math/ints.ha + $(STDLIB)/math/ints.ha \ + $(STDLIB)/math/uints.ha \ + $(STDLIB)/math/trig.ha $(TESTCACHE)/math/math.ssa: $(testlib_math_srcs) $(testlib_rt) $(testlib_types) @printf 'HAREC \t$@\n' diff --git a/strconv/ftos.ha b/strconv/ftos.ha @@ -368,7 +368,7 @@ fn f64todecf64(mantissa: u64, exponent: u32) decf64 = { // round to even last_removed_digit = 4; }; - output = vr + ibool((vr == vm && + output = vr + ibool((vr == vm && (!accept_bounds || !vm_trailing_zeros)) || last_removed_digit >= 5); } else { @@ -451,7 +451,7 @@ fn f32todecf32(mantissa: u32, exponent: u32) decf32 = { vp = mulpow5_divpow2(mp, i, j); vm = mulpow5_divpow2(mm, i, j); if (q != 0 && (vp - 1) / 10 <= vm / 10) { - j = (q: i32) - 1 - (pow5bits(i + 1): i32 - + j = (q: i32) - 1 - (pow5bits(i + 1): i32 - F32_POW5_BITCOUNT: i32); last_removed_digit = (mulpow5_divpow2(mv, (i + 1), j) % 10): u8; @@ -650,7 +650,7 @@ export fn f32tos(n: f32) const str = { // The biggest string produced by a f32 number in base 10 would have the // negative sign, followed by a digit and decimal point, and then seven // more decimal digits, followed by 'e' and another negative sign and - // the maximum of two digits for exponent. + // the maximum of two digits for exponent. // (1 + 1 + 1 + 7 + 1 + 1 + 2) = 14 static let buf: [16]u8 = [0...]; const bits = math::f32bits(n); diff --git a/strconv/stof.ha b/strconv/stof.ha @@ -146,7 +146,7 @@ fn decimal_shift(d: *decimal, k: int) void = { fn should_round_up(d: *decimal, nd: uint) bool = if (nd < d.num_digits) { if (d.digits[nd] == 5 && nd + 1 == d.num_digits) { return d.truncated || - (nd > 0 && d.digits[nd - 1] & 1 != 0); + (nd > 0 && d.digits[nd - 1] & 1 != 0); } else return d.digits[nd] >= 5; } else false; @@ -436,7 +436,7 @@ fn floatbits(d: *decimal, f: *math::floatinfo) (u64 | overflow) = { }; for (d.decimal_point <= 0) { const n: int = if (d.decimal_point == 0) { - if (d.digits[0] >= 5) break; + if (d.digits[0] >= 5) break; if (d.digits[0] < 2) 2 else 1; } else if (-d.decimal_point >= len(powtab): i32) maxshift: int