commit ca6549e62bafd818aeb242c7b85e97344769185d
parent a70840843b277c91e27f11d1ea0bad7dd50cbc9b
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date: Sun, 22 Aug 2021 17:06:07 +0200
Add trig functions, uint functions and various other math functions
* Added uint functions
* Added isclose() and friends
* Added round() and friends
* Added trig functions such as sin(), sinh(), asin(), asinh() etc.
* Added log1(), log2(), log1p()
* Added modf64()
* Renamed old modf() to modfrac(), to avoid confusion with fmod(), which is
named modf64()
* Added lots of new tests, based on Go's all_test.go
* Removed some trailing whitespace from some strconv:: functions
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
Diffstat:
9 files changed, 2700 insertions(+), 204 deletions(-)
diff --git a/math/floats.ha b/math/floats.ha
@@ -99,6 +99,9 @@ def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111;
def F32_EXP_ZERO: u32 =
((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32);
+// The bits that represent the number 1f64.
+def F64_ONE: u64 = 0x3FF0000000000000;
+
// Contains information about the structure of a specific floating point number
// type.
export type floatinfo = struct {
@@ -176,10 +179,10 @@ export fn isneginf(n: f64) bool = {
assert(!isinf(NAN));
assert(!isinf(1.23));
assert(!isinf(-1.23f32));
- assert(isposinf(math::INF));
- assert(!isposinf(-math::INF));
- assert(isneginf(-math::INF));
- assert(!isneginf(math::INF));
+ assert(isposinf(INF));
+ assert(!isposinf(-INF));
+ assert(isneginf(-INF));
+ assert(!isneginf(INF));
};
// Returns true if the given floating-point number is normal.
@@ -276,11 +279,17 @@ export fn issubnormalf32(n: f32) bool = {
// Returns the absolute value of f64 n.
export fn absf64(n: f64) f64 = {
+ if (isnan(n)) {
+ return n;
+ };
return f64frombits(f64bits(n) & ~F64_SIGN_MASK);
};
// Returns the absolute value of f32 n.
export fn absf32(n: f32) f32 = {
+ if (isnan(n)) {
+ return n;
+ };
return f32frombits(f32bits(n) & ~F32_SIGN_MASK);
};
@@ -293,11 +302,14 @@ export fn absf(n: types::floating) f64 = {
};
@test fn absf() void = {
- assert(absf64(2.0f64) == 2.0f64);
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(absf(TEST_INPUTS[idx]) == TEST_ABSF[idx]);
+ };
+ assert(absf64(2f64) == 2f64);
assert(absf32(2.0f32) == 2.0f32);
- assert(absf(2.0f64) == 2.0f64);
- assert(absf(2.0f32) == 2.0f64);
- assert(absf(-2.0f64) == 2.0f64);
+ assert(absf(2f64) == 2f64);
+ assert(absf(2.0f32) == 2f64);
+ assert(absf(-2f64) == 2f64);
assert(absf(-2.0f32) == 2.0f32);
assert(absf(0f32) == 0f32);
assert(absf(0f64) == 0f64);
@@ -333,16 +345,19 @@ export fn signf(x: types::floating) i64 = {
};
@test fn signf() void = {
- assert(signf(0.0f64) == 1i64);
- assert(signf(-0.0f64) == -1i64);
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(signf(TEST_INPUTS[idx]) == TEST_SIGNF[idx]);
+ };
+ assert(signf(0f64) == 1i64);
+ assert(signf(-0f64) == -1i64);
assert(signf(0.0f32) == 1i64);
assert(signf(-0.0f32) == -1i64);
assert(signf(1.5f64) == 1i64);
assert(signf(-1.5f64) == -1i64);
- assert(ispositive(1.0f64));
- assert(!ispositive(-1.0f64));
- assert(isnegative(-1.0f64));
- assert(!isnegative(1.0f64));
+ assert(ispositive(1f64));
+ assert(!ispositive(-1f64));
+ assert(isnegative(-1f64));
+ assert(!isnegative(1f64));
};
// Returns whether or not x is positive.
@@ -394,16 +409,16 @@ export fn copysign(x: types::floating, y: types::floating) f64 = {
};
@test fn copysign() void = {
- assert(copysign(100.0f64, 1.0f64) == 100.0f64);
- assert(copysign(100.0f64, -1.0f64) == -100.0f64);
+ assert(copysign(100f64, 1f64) == 100f64);
+ assert(copysign(100f64, -1f64) == -100f64);
assert(copysign(100.0f32, 1.0f32) == 100.0f32);
assert(copysign(100.0f32, -1.0f32) == -100.0f32);
- assert(copysign(100.0f64, 0.0f64) == 100.0f64);
- assert(copysign(100.0f64, -0.0f64) == -100.0f64);
- assert(copysign(0.0f64, 100.0f64) == 0.0f64);
- assert(signf(copysign(0.0f64, 100.0f64)) > 0);
- assert(copysign(0.0f64, -100.0f64) == 0.0f64);
- assert(signf(copysign(0.0f64, -100.0f64)) < 0);
+ assert(copysign(100f64, 0f64) == 100f64);
+ assert(copysign(100f64, -0f64) == -100f64);
+ assert(copysign(0f64, 100f64) == 0f64);
+ assert(signf(copysign(0f64, 100f64)) > 0);
+ assert(copysign(0f64, -100f64) == 0f64);
+ assert(signf(copysign(0f64, -100f64)) < 0);
};
// Takes a potentially subnormal f64 n and returns a normal f64 normal_float
@@ -484,7 +499,13 @@ export fn frexp(n: types::floating) (f64, i64) = {
};
@test fn frexp() void = {
- let res = frexp(3.0f64);
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ let res = frexp(TEST_INPUTS[idx]);
+ let expected = TEST_FREXP[idx];
+ assert(res.0 == expected.0);
+ assert(res.1 == expected.1);
+ };
+ let res = frexp(3f64);
assert(res.0 == 0.75f64);
assert(res.1 == 2i64);
res = frexp(2.42f64);
@@ -511,21 +532,21 @@ export fn ldexpf64(mantissa: f64, exp: i64) f64 = {
let res_exp = exp + normalization_exp + mantissa_exp;
// Underflow
if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) {
- return copysign(0.0f64, mantissa);
+ return copysign(0f64, mantissa);
};
// Overflow
if (res_exp > (F64_EXPONENT_BIAS: i64)) {
- if (mantissa < 0.0f64) {
+ if (mantissa < 0f64) {
return -INF;
} else {
return INF;
};
};
// Subnormal
- let subnormal_factor = 1.0f64;
+ let subnormal_factor = 1f64;
if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) {
res_exp += (F64_MANTISSA_BITS: i64) - 1;
- subnormal_factor = 1.0f64 /
+ subnormal_factor = 1f64 /
((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64);
};
const res: u64 = (bits & F64_EXP_REMOVAL_MASK) |
@@ -586,11 +607,11 @@ export fn ldexpf32(mantissa: f32, exp: i64) f32 = {
const res64 = ldexpf64(parts.0, parts.1);
assert(res64 == tests64[i]);
};
- assert(ldexpf64(1.0f64, -1076i64) == 0.0f64);
- assert(ldexpf64(-1.0f64, -1076i64) == -0.0f64);
- assert(signf(ldexpf64(-1.0f64, -1076i64)) < 0);
- assert(ldexpf64(2.0f64, 1024i64) == INF);
- assert(ldexpf64(-2.0f64, 1024i64) == -INF);
+ assert(ldexpf64(1f64, -1076i64) == 0f64);
+ assert(ldexpf64(-1f64, -1076i64) == -0f64);
+ assert(signf(ldexpf64(-1f64, -1076i64)) < 0);
+ assert(ldexpf64(2f64, 1024i64) == INF);
+ assert(ldexpf64(-2f64, 1024i64) == -INF);
const tests32: [_]f32 = [INF, -INF,
0.0, 1.0, -1.0, 2.42, 123456789.0,
@@ -609,13 +630,13 @@ export fn ldexpf32(mantissa: f32, exp: i64) f32 = {
};
// Returns the integer and fractional parts of an f64.
-export fn modf64(n: f64) (i64, f64) = {
- if (n < 1.0f64) {
- if (n < 0.0f64) {
- let positive_parts = modf64(-n);
+export fn modfracf64(n: f64) (i64, f64) = {
+ if (n < 1f64) {
+ if (n < 0f64) {
+ let positive_parts = modfracf64(-n);
return (-positive_parts.0, -positive_parts.1);
};
- if (n == 0.0f64) {
+ if (n == 0f64) {
return ((n: i64), n);
};
return (0i64, n);
@@ -636,10 +657,10 @@ export fn modf64(n: f64) (i64, f64) = {
};
// Returns the integer and fractional parts of an f32.
-export fn modf32(n: f32) (i32, f32) = {
+export fn modfracf32(n: f32) (i32, f32) = {
if (n < 1.0f32) {
if (n < 0.0f32) {
- let positive_parts = modf32(-n);
+ let positive_parts = modfracf32(-n);
return (-positive_parts.0, -positive_parts.1);
};
if (n == 0.0f32) {
@@ -662,48 +683,53 @@ export fn modf32(n: f32) (i32, f32) = {
return ((int_part: i32), frac_part);
};
-@test fn modf() void = {
+@test fn modfrac() void = {
// 64
- let res = modf64(1.75f64);
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ let res = modfracf64(TEST_INPUTS[idx]);
+ assert(res.0 == TEST_MODFRAC[idx].0);
+ assert(isclose(res.1, TEST_MODFRAC[idx].1));
+ };
+ let res = modfracf64(1.75f64);
assert(res.0 == 1i64);
assert(res.1 == 0.75f64);
- res = modf64(0.75f64);
+ res = modfracf64(0.75f64);
assert(res.0 == 0i64);
assert(res.1 == 0.75f64);
- res = modf64(-0.75f64);
+ res = modfracf64(-0.75f64);
assert(res.0 == -0i64);
assert(res.1 == -0.75f64);
- res = modf64(0.0f64);
+ res = modfracf64(0f64);
assert(res.0 == 0i64);
- assert(res.1 == 0.0f64);
+ assert(res.1 == 0f64);
assert(signf(res.1) > 0);
- res = modf64(-0.0f64);
+ res = modfracf64(-0f64);
assert(res.0 == -0i64);
- assert(res.1 == -0.0f64);
+ assert(res.1 == -0f64);
assert(signf(res.1) < 0);
- res = modf64(23.50f64);
+ res = modfracf64(23.50f64);
assert(res.0 == 23i64);
assert(res.1 == 0.50f64);
// 32
- let res = modf32(1.75f32);
+ let res = modfracf32(1.75f32);
assert(res.0 == 1i32);
assert(res.1 == 0.75f32);
- res = modf32(0.75f32);
+ res = modfracf32(0.75f32);
assert(res.0 == 0i32);
assert(res.1 == 0.75f32);
- res = modf32(-0.75f32);
+ res = modfracf32(-0.75f32);
assert(res.0 == -0i32);
assert(res.1 == -0.75f32);
- res = modf32(0.0f32);
+ res = modfracf32(0.0f32);
assert(res.0 == 0i32);
assert(res.1 == 0.0f32);
assert(signf(res.1) > 0);
- res = modf32(-0.0f32);
+ res = modfracf32(-0.0f32);
assert(res.0 == -0i32);
assert(res.1 == -0.0f32);
assert(signf(res.1) < 0);
- res = modf32(23.50f32);
+ res = modfracf32(23.50f32);
assert(res.0 == 23i32);
assert(res.1 == 0.50f32);
};
diff --git a/math/math.ha b/math/math.ha
@@ -1,7 +1,7 @@
// Sections of the code below, in particular log() and exp(), are based on Go's
// implementation, which is, in turn, based on FreeBSD's. The original C code,
// as well as the respective comments and constants are from
-// /usr/src/lib/msun/src/e_log.c.
+// /usr/src/lib/msun/src/{e_log,e_exp}.c.
//
// The FreeBSD copyright notice:
// ====================================================
@@ -46,13 +46,24 @@
use types;
+// The standard tolerance used by isclose(), which is just an arbitrary way to
+// measure whether two floating-point numbers are "sufficiently" close to each
+// other.
+export def STANDARD_TOL: f64 = 1e-14;
+
// Returns whether x and y are within tol of each other.
export fn eqwithinf64(x: f64, y: f64, tol: f64) bool = {
+ if (isnan(x) || isnan(y) || isnan(tol)) {
+ return false;
+ };
return absf64(x - y) < tol;
};
// Returns whether x and y are within tol of each other.
export fn eqwithinf32(x: f32, y: f32, tol: f32) bool = {
+ if (isnan(x) || isnan(y) || isnan(tol)) {
+ return false;
+ };
return absf32(x - y) < tol;
};
@@ -65,10 +76,31 @@ export fn eqwithin(x: types::floating, y: types::floating,
};
};
-@test fn equalwithin() void = {
- assert(eqwithin(1.0f64, 2.0f64, 2.0f64));
+// Returns whether x and y are within [[STANDARD_TOL]] of each other.
+export fn isclosef64(x: f64, y: f64) bool = {
+ return eqwithinf64(x, y, STANDARD_TOL);
+};
+
+// Returns whether x and y are within [[STANDARD_TOL]] of each other.
+export fn isclosef32(x: f32, y: f32) bool = {
+ return eqwithinf32(x, y, STANDARD_TOL);
+};
+
+// Returns whether x and y are within [[STANDARD_TOL]] of each other.
+export fn isclose(x: types::floating, y: types::floating) bool = {
+ return match (x) {
+ n: f64 => isclosef64(n, y as f64),
+ n: f32 => isclosef32(n, y as f32),
+ };
+};
+
+@test fn eqwithin() void = {
+ assert(eqwithin(1f64, 2f64, 2f64));
assert(eqwithin(1.0f32, 2.0f32, 2.0f32));
assert(!eqwithin(1.0005f32, 1.0004f32, 0.00001f32));
+ assert(isclose(1f64, 1.0000000000000000000000000001f64));
+ assert(isclose(1.0f32, 1.0000000000000000000000000001f32));
+ assert(!isclose(1.0005f32, 1.0004f32));
};
// e - https://oeis.org/A001113
@@ -90,11 +122,11 @@ def LN_2: f64 = 0.69314718055994530941723212145817656807550013436025525412068000
def LN2_HI: f64 = 6.93147180369123816490e-01;
def LN2_LO: f64 = 1.90821492927058770002e-10;
// log_{2}(e)
-def LOG2_E: f64 = 1.0f64 / LN_2;
+def LOG2_E: f64 = 1f64 / LN_2;
// ln(10) - https://oeis.org/A002392
def LN_10: f64 = 2.30258509299404568401799145468436420760110148862877297603332790;
// log_{10}(e)
-def LOG10_E: f64 = 1.0f64 / LN_10;
+def LOG10_E: f64 = 1f64 / LN_10;
// __ieee754_log(x)
// Return the logarithm of x
@@ -146,20 +178,20 @@ def LOG10_E: f64 = 1.0f64 / LN_10;
// Returns the natural logarithm of x.
export fn logf64(x: f64) f64 = {
- const L1 = 6.666666666666735130e-01; // 3FE55555 55555593
- const L2 = 3.999999999940941908e-01; // 3FD99999 9997FA04
- const L3 = 2.857142874366239149e-01; // 3FD24924 94229359
- const L4 = 2.222219843214978396e-01; // 3FCC71C5 1D8E78AF
- const L5 = 1.818357216161805012e-01; // 3FC74664 96CB03DE
- const L6 = 1.531383769920937332e-01; // 3FC39A09 D078C69F
- const L7 = 1.479819860511658591e-01; // 3FC2F112 DF3E5244
+ const L1 = 6.666666666666735130e-01; // 3fe55555 55555593
+ const L2 = 3.999999999940941908e-01; // 3fd99999 9997fa04
+ const L3 = 2.857142874366239149e-01; // 3fd24924 94229359
+ const L4 = 2.222219843214978396e-01; // 3fcc71c5 1d8e78af
+ const L5 = 1.818357216161805012e-01; // 3fc74664 96cb03de
+ const L6 = 1.531383769920937332e-01; // 3fc39a09 d078c69f
+ const L7 = 1.479819860511658591e-01; // 3fc2f112 df3e5244
// special cases
if (isnan(x) || isposinf(x)) {
return x;
- } else if (x < 0.0f64) {
+ } else if (x < 0f64) {
return NAN;
- } else if (x == 0.0f64) {
+ } else if (x == 0f64) {
return -INF;
};
@@ -167,15 +199,15 @@ export fn logf64(x: f64) f64 = {
const f1_and_ki = frexp(x);
let f1 = f1_and_ki.0;
let ki = f1_and_ki.1;
- if (f1 < (SQRT_2 / 2.0f64)) {
- f1 *= 2.0f64;
+ if (f1 < (SQRT_2 / 2f64)) {
+ f1 *= 2f64;
ki -= 1i64;
};
- let f = f1 - 1.0f64;
+ let f = f1 - 1f64;
let k = (ki: f64);
// Compute
- const s = f / (2.0f64 + f);
+ const s = f / (2f64 + f);
const s2 = s * s;
const s4 = s2 * s2;
const t1 = s2 * (L1 + s4 * (L3 + s4 * (L5 + s4 * L7)));
@@ -186,14 +218,245 @@ export fn logf64(x: f64) f64 = {
};
@test fn logf64() void = {
- assert(logf64(E) == 1.0f64);
- assert(logf64(54.598150033144239078110261202860878402790f64) == 4.0f64);
- assert(isnan(logf64(-1.0f64)));
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(isclose(
+ logf64(absf64(TEST_INPUTS[idx])),
+ TEST_LOG[idx]));
+ };
+ assert(logf64(E) == 1f64);
+ assert(logf64(54.598150033144239078110261202860878402790f64) == 4f64);
+ assert(isnan(logf64(-1f64)));
assert(isposinf(logf64(INF)));
- assert(isneginf(logf64(0.0f64)));
+ assert(isneginf(logf64(0f64)));
assert(isnan(logf64(NAN)));
};
+// Returns the decimal logarithm of x.
+export fn log10f64(x: f64) f64 = {
+ return logf64(x) * (1f64 / LN_10);
+};
+
+@test fn log10f64() void = {
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(isclose(
+ log10f64(absf64(TEST_INPUTS[idx])),
+ TEST_LOG10[idx]));
+ };
+};
+
+// Returns the binary logarithm of x.
+export fn log2f64(x: f64) f64 = {
+ const frexp_res = frexpf64(x);
+ let frac = frexp_res.0;
+ let exp = frexp_res.1;
+ // Make sure exact powers of two give an exact answer.
+ // Don't depend on log(0.5) * (1 / LN_2) + exp being exactly exp - 1.
+ if (frac == 0.5f64) {
+ return ((exp - 1): f64);
+ };
+ return logf64(frac) * (1f64 / LN_2) + (exp: f64);
+};
+
+@test fn log2f64() void = {
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(isclose(
+ log2f64(absf64(TEST_INPUTS[idx])),
+ TEST_LOG2[idx]));
+ };
+};
+
+// double log1p(double x)
+//
+// Method :
+// 1. Argument Reduction: find k and f such that
+// 1+x = 2**k * (1+f),
+// where sqrt(2)/2 < 1+f < sqrt(2) .
+//
+// Note. If k=0, then f=x is exact. However, if k!=0, then f
+// may not be representable exactly. In that case, a correction
+// term is need. Let u=1+x rounded. Let c = (1+x)-u, then
+// log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
+// and add back the correction term c/u.
+// (Note: when x > 2**53, one can simply return log(x))
+//
+// 2. Approximation of log1p(f).
+// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+// = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+// = 2s + s*R
+// We use a special Reme algorithm on [0,0.1716] to generate
+// a polynomial of degree 14 to approximate R The maximum error
+// of this polynomial approximation is bounded by 2**-58.45. In
+// other words,
+// 2 4 6 8 10 12 14
+// R(z) ~ LP1*s +LP2*s +LP3*s +LP4*s +LP5*s +LP6*s +LP7*s
+// (the values of LP1 to LP7 are listed in the program)
+// and
+// | 2 14 | -58.45
+// | LP1*s +...+LP7*s - R(z) | <= 2
+// | |
+// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+// In order to guarantee error in log below 1ulp, we compute log
+// by
+// log1p(f) = f - (hfsq - s*(hfsq+R)).
+//
+// 3. Finally, log1p(x) = k*ln2 + log1p(f).
+// = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
+// Here ln2 is split into two floating point number:
+// ln2_hi + ln2_lo,
+// where n*ln2_hi is always exact for |n| < 2000.
+//
+// Special cases:
+// log1p(x) is NaN with signal if x < -1 (including -INF) ;
+// log1p(+INF) is +INF; log1p(-1) is -INF with signal;
+// log1p(NaN) is that NaN with no signal.
+//
+// Accuracy:
+// according to an error analysis, the error is always less than
+// 1 ulp (unit in the last place).
+//
+// Constants:
+// The hexadecimal values are the intended ones for the following
+// constants. The decimal values may be used, provided that the
+// compiler will convert from decimal to binary accurately enough
+// to produce the hexadecimal values shown.
+//
+// Note: Assuming log() return accurate answer, the following
+// algorithm can be used to compute log1p(x) to within a few ULP:
+//
+// u = 1+x;
+// if(u==1.0) return x ; else
+// return log(u)*(x/(u-1.0));
+//
+// See HP-15C Advanced Functions Handbook, p.193.
+
+// Returns the natural logarithm of 1 plus its argument x.
+// It is more accurate than log(1 + x) when x is near zero.
+export fn log1pf64(x: f64) f64 = {
+ // sqrt(2) - 1
+ const SQRT2M1 = 4.142135623730950488017e-01; // 0x3fda827999fcef34
+ // sqrt(2) / 2 - 1
+ const SQRT2HALFM1 = -2.928932188134524755992e-01; // 0xbfd2bec333018866
+ const SMALL = 1f64 / ((1i64 << 29): f64); // 2**-29
+ const TINY = 1f64 / ((1i64 << 54): f64); // 2**-54
+ const TWO53 = ((1i64 << 53): f64); // 2**53
+ const LN2HI = 6.93147180369123816490e-01; // 3fe62e42fee00000
+ const LN2LO = 1.90821492927058770002e-10; // 3dea39ef35793c76
+ const LP1 = 6.666666666666735130e-01; // 3fe5555555555593
+ const LP2 = 3.999999999940941908e-01; // 3fd999999997fa04
+ const LP3 = 2.857142874366239149e-01; // 3fd2492494229359
+ const LP4 = 2.222219843214978396e-01; // 3fcc71c51d8e78af
+ const LP5 = 1.818357216161805012e-01; // 3fc7466496cb03de
+ const LP6 = 1.531383769920937332e-01; // 3fc39a09d078c69f
+ const LP7 = 1.479819860511658591e-01; // 3fc2f112df3e5244
+
+ if (x < -1f64 || isnan(x)) {
+ return NAN;
+ } else if (x == -1f64) {
+ return -INF;
+ } else if (isposinf(x)) {
+ return INF;
+ };
+
+ const absx = absf64(x);
+
+ let f = 0f64;
+ let iu = 0u64;
+ let k = 1i64;
+ if (absx < SQRT2M1) { // |x| < Sqrt(2)-1
+ if (absx < SMALL) { // |x| < 2**-29
+ if (absx < TINY) { // |x| < 2**-54
+ return x;
+ };
+ return x - (x * x * 0.5f64);
+ };
+ if (x > SQRT2HALFM1) { // Sqrt(2)/2-1 < x
+ // (Sqrt(2)/2-1) < x < (Sqrt(2)-1)
+ k = 0;
+ f = x;
+ iu = 1;
+ };
+ };
+ let c = 0f64;
+ if (k != 0) {
+ let u = 0f64;
+ if (absx < TWO53) { // 1<<53
+ u = 1.0 + x;
+ iu = f64bits(u);
+ k = (((iu >> 52) - 1023): i64);
+ // Correction term
+ if (k > 0) {
+ c = 1f64 - (u - x);
+ } else {
+ c = x - (u - 1f64);
+ };
+ c /= u;
+ } else {
+ u = x;
+ iu = f64bits(u);
+ k = (((iu >> 52) - 1023): i64);
+ c = 0f64;
+ };
+ iu &= 0x000fffffffffffff;
+ if (iu < 0x0006a09e667f3bcd) { // Mantissa of Sqrt(2)
+ // Normalize u
+ u = f64frombits(iu | 0x3ff0000000000000);
+ } else {
+ k += 1;
+ // Normalize u/2
+ u = f64frombits(iu | 0x3fe0000000000000);
+ iu = (0x0010000000000000 - iu) >> 2;
+ };
+ f = u - 1f64; // Sqrt(2)/2 < u < Sqrt(2)
+ };
+ const hfsq = 0.5 * f * f;
+ let s = 0f64;
+ let R = 0f64;
+ let z = 0f64;
+ if (iu == 0) { // |f| < 2**-20
+ if (f == 0f64) {
+ if (k == 0) {
+ return 0f64;
+ };
+ c += (k: f64) * LN2LO;
+ return (k: f64) * LN2HI + c;
+ };
+ R = hfsq * (1.0 - 0.66666666666666666 * f); // Avoid division
+ if (k == 0) {
+ return f - R;
+ };
+ return (k: f64) * LN2HI - ((R - ((k: f64) * LN2LO + c)) - f);
+ };
+ s = f / (2f64 + f);
+ z = s * s;
+ R = z * (LP1 +
+ z * (LP2 +
+ z * (LP3 +
+ z * (LP4 +
+ z * (LP5 +
+ z * (LP6 +
+ z * LP7))))));
+ if (k == 0) {
+ return f - (hfsq - s * (hfsq + R));
+ };
+ return (k: f64) * LN2HI -
+ ((hfsq - (s * (hfsq + R) + ((k: f64) * LN2LO + c))) - f);
+};
+
+@test fn log1p() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ log1pf64(TEST_INPUTS[idx] / 100f64),
+ TEST_LOG1P[idx]));
+ };
+ assert(isnan(log1pf64(-INF)));
+ assert(isnan(log1pf64(-PI)));
+ assert(log1pf64(-1f64) == -INF);
+ assert(log1pf64(-0f64) == -0f64);
+ assert(log1pf64(0f64) == 0f64);
+ assert(log1pf64(INF) == INF);
+ assert(isnan(log1pf64(NAN)));
+};
+
// exp(x)
// Returns the exponential of x.
//
@@ -258,16 +521,16 @@ export fn logf64(x: f64) f64 = {
// Returns e^r * 2^k where r = hi - lo and |r| <= (ln(2) / 2).
export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = {
- const P1 = 1.66666666666666657415e-01; // 0x3FC55555; 0x55555555
- const P2 = -2.77777777770155933842e-03; // 0xBF66C16C; 0x16BEBD93
- const P3 = 6.61375632143793436117e-05; // 0x3F11566A; 0xAF25DE2C
- const P4 = -1.65339022054652515390e-06; // 0xBEBBBD41; 0xC5D26BF1
- const P5 = 4.13813679705723846039e-08; // 0x3E663769; 0x72BEA4D0
+ const P1 = 1.66666666666666657415e-01; // 0x3fc55555; 0x55555555
+ const P2 = -2.77777777770155933842e-03; // 0xbf66c16c; 0X16bebd9n
+ const P3 = 6.61375632143793436117e-05; // 0x3f11566a; 0Xaf25de2c
+ const P4 = -1.65339022054652515390e-06; // 0xbebbbd41; 0Xc5d26bf1
+ const P5 = 4.13813679705723846039e-08; // 0x3e663769; 0X72bea4d0
let r = hi - lo;
let t = r * r;
let c = r - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
- let y = 1.0f64 - ((lo - (r * c) / (2.0f64 - c)) - hi);
+ const y = 1f64 - ((lo - (r * c) / (2f64 - c)) - hi);
return ldexpf64(y, k);
};
@@ -275,30 +538,29 @@ export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = {
export fn expf64(x: f64) f64 = {
const overflow = 7.09782712893383973096e+02;
const underflow = -7.45133219101941108420e+02;
- const near_zero = 1.0f64 / ((1i64 << 28i64): f64);
+ const near_zero = 1f64 / ((1i64 << 28i64): f64);
- // Special cases
if (isnan(x) || isposinf(x)) {
return x;
} else if (isneginf(x)) {
- return 0.0f64;
+ return 0f64;
} else if (x > overflow) {
return INF;
} else if (x < underflow) {
- return 0.0f64;
+ return 0f64;
} else if (-near_zero < x && x < near_zero) {
- return 1.0f64 + x;
+ return 1f64 + x;
};
// Reduce; computed as r = hi - lo for extra precision.
let k = 0i64;
- if (x < 0.0f64) {
+ if (x < 0f64) {
k = (((LOG2_E * x) - 0.5): i64);
- } else if (x > 0.0f64) {
+ } else if (x > 0f64) {
k = (((LOG2_E * x) + 0.5): i64);
};
- let hi = x - ((k: f64) * LN2_HI);
- let lo = (k: f64) * LN2_LO;
+ const hi = x - ((k: f64) * LN2_HI);
+ const lo = (k: f64) * LN2_LO;
// Compute
return expmultif64(hi, lo, k);
@@ -309,53 +571,58 @@ export fn exp2f64(x: f64) f64 = {
const overflow = 1.0239999999999999e+03;
const underflow = -1.0740e+03;
- // Special cases
if (isnan(x) || isposinf(x)) {
return x;
} else if (isneginf(x)) {
- return 0.0f64;
+ return 0f64;
} else if (x > overflow) {
return INF;
} else if (x < underflow) {
- return 0.0f64;
+ return 0f64;
};
// Argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2.
// Computed as r = hi - lo for extra precision.
let k = 0i64;
- if (x > 0.0f64) {
+ if (x > 0f64) {
k = ((x + 0.5): i64);
- } else if (x < 0.0f64) {
+ } else if (x < 0f64) {
k = ((x - 0.5): i64);
};
- let t = x - (k: f64);
- let hi = t * LN2_HI;
- let lo = -t * LN2_LO;
+ const t = x - (k: f64);
+ const hi = t * LN2_HI;
+ const lo = -t * LN2_LO;
// Compute
return expmultif64(hi, lo, k);
};
@test fn expf64() void = {
- assert(expf64(1.0f64) == E);
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(isclose(expf64(TEST_INPUTS[idx]), TEST_EXP[idx]));
+ };
+ assert(expf64(1f64) == E);
assert(isnan(expf64(NAN)));
assert(isinf(expf64(INF)));
- assert(expf64(-INF) == 0.0f64);
- assert(isinf(expf64(99999.0f64)));
- assert(expf64(-99999.0f64) == 0.0f64);
- assert(expf64(0.5e-20) == 1.0f64);
+ assert(expf64(-INF) == 0f64);
+ assert(isinf(expf64(99999f64)));
+ assert(expf64(-99999f64) == 0f64);
+ assert(expf64(0.5e-20) == 1f64);
};
@test fn exp2f64() void = {
- assert(exp2f64(0.0f64) == 1.0f64);
- assert(exp2f64(3.0f64) == 8.0f64);
- assert(exp2f64(-2.0f64) == 0.25f64);
- assert(!isinf(exp2f64(256.0f64)));
- assert(isinf(exp2f64(99999.0f64)));
- assert(exp2f64(-99999.0f64) == 0.0f64);
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(isclose(exp2f64(TEST_INPUTS[idx]), TEST_EXP2[idx]));
+ };
+ assert(exp2f64(0f64) == 1f64);
+ assert(exp2f64(3f64) == 8f64);
+ assert(exp2f64(-2f64) == 0.25f64);
+ assert(!isinf(exp2f64(256f64)));
+ assert(isinf(exp2f64(99999f64)));
+ assert(exp2f64(-99999f64) == 0f64);
assert(isnan(exp2f64(NAN)));
assert(isinf(exp2f64(INF)));
- assert(exp2f64(-INF) == 0.0f64);
+ assert(exp2f64(-INF) == 0f64);
};
// __ieee754_sqrt(x)
@@ -420,12 +687,11 @@ export fn exp2f64(x: f64) f64 = {
// Returns the square root of x.
export fn sqrtf64(x: f64) f64 = {
- // Special cases
- if (x == 0.0f64) {
+ if (x == 0f64) {
return x;
} else if (isnan(x) || isposinf(x)) {
return x;
- } else if (x < 0.0f64) {
+ } else if (x < 0f64) {
return NAN;
};
@@ -459,7 +725,7 @@ export fn sqrtf64(x: f64) f64 = {
// r = moving bit from MSB to LSB
let r = ((1u64 << (F64_MANTISSA_BITS + 1u64)): u64);
for (r != 0) {
- let t = s + r;
+ const t = s + r;
if (t <= bits) {
s = t + r;
bits -= t;
@@ -482,88 +748,93 @@ export fn sqrtf64(x: f64) f64 = {
};
@test fn sqrt() void = {
- assert(sqrtf64(2.0f64) == SQRT_2);
- assert(sqrtf64(4.0f64) == 2.0f64);
- assert(sqrtf64(16.0f64) == 4.0f64);
- assert(sqrtf64(65536.0f64) == 256.0f64);
- assert(sqrtf64(powf64(123.0f64, 2.0f64)) == 123.0f64);
- assert(sqrtf64(0.0f64) == 0.0f64);
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(isclose(
+ sqrtf64(absf64(TEST_INPUTS[idx])),
+ TEST_SQRT[idx]));
+ };
+ assert(sqrtf64(2f64) == SQRT_2);
+ assert(sqrtf64(4f64) == 2f64);
+ assert(sqrtf64(16f64) == 4f64);
+ assert(sqrtf64(65536f64) == 256f64);
+ assert(sqrtf64(powf64(123f64, 2f64)) == 123f64);
+ assert(sqrtf64(0f64) == 0f64);
assert(isnan(sqrtf64(NAN)));
assert(isposinf(sqrtf64(INF)));
- assert(isnan(sqrtf64(-2.0f64)));
+ assert(isnan(sqrtf64(-2f64)));
};
fn is_f64_odd_int(x: f64) bool = {
- let x_int_frac = modf64(x);
- let x_int = x_int_frac.0;
- let x_frac = x_int_frac.1;
- let has_no_frac = (x_frac == 0.0f64);
- let is_odd = ((x_int & 1i64) == 1i64);
+ const x_int_frac = modfracf64(x);
+ const x_int = x_int_frac.0;
+ const x_frac = x_int_frac.1;
+ const has_no_frac = (x_frac == 0f64);
+ const is_odd = ((x_int & 1i64) == 1i64);
return has_no_frac && is_odd;
};
// Returns x^p.
export fn powf64(x: f64, p: f64) f64 = {
- if (x == 1.0f64 || p == 0.0f64) {
- return 1.0f64;
- } else if (p == 1.0f64) {
+ if (x == 1f64 || p == 0f64) {
+ return 1f64;
+ } else if (p == 1f64) {
return x;
} else if (isnan(x)) {
return NAN;
} else if (isnan(p)) {
return NAN;
- } else if (x == 0.0f64) {
- if (p < 0.0f64) {
+ } else if (x == 0f64) {
+ if (p < 0f64) {
if (is_f64_odd_int(p)) {
return copysignf64(INF, x);
} else {
return INF;
};
- } else if (p > 0.0f64) {
+ } else if (p > 0f64) {
if (is_f64_odd_int(p)) {
return x;
} else {
- return 0.0f64;
+ return 0f64;
};
};
} else if (isinf(p)) {
- if (x == -1.0f64) {
- return 1.0f64;
- } else if ((absf64(x) < 1.0f64) == isposinf(p)) {
- return 0.0f64;
+ if (x == -1f64) {
+ return 1f64;
+ } else if ((absf64(x) < 1f64) == isposinf(p)) {
+ return 0f64;
};
return INF;
} else if (isinf(x)) {
if (isneginf(x)) {
- return powf64(-0.0f64, -p);
- } else if (p < 0.0f64) {
- return 0.0f64;
- } else if (p > 0.0f64) {
+ return powf64(-0f64, -p);
+ } else if (p < 0f64) {
+ return 0f64;
+ } else if (p > 0f64) {
return INF;
};
} else if (p == 0.5f64) {
return sqrtf64(x);
} else if (p == -0.5f64) {
- return 1.0f64 / sqrtf64(x);
+ return 1f64 / sqrtf64(x);
};
- let x_parts = frexp(x);
+ const x_parts = frexp(x);
let x_mantissa = x_parts.0;
let x_exp = x_parts.1;
- let p_int_frac = modf64(absf64(p));
+ const p_int_frac = modfracf64(absf64(p));
let p_int = p_int_frac.0;
let p_frac = p_int_frac.1;
- let res_mantissa = 1.0f64;
+ let res_mantissa = 1f64;
let res_exp = 0i64;
// The method used later in this function doesn't apply to fractional
// powers, so we have to handle these separately with
// x^p = e^{p * ln(x)}
- if (p_frac != 0.0f64) {
+ if (p_frac != 0f64) {
if (p_frac > 0.5f64) {
- p_frac -= 1.0f64;
+ p_frac -= 1f64;
p_int += 1i64;
};
res_mantissa = expf64(p_frac * logf64(x));
@@ -575,7 +846,7 @@ export fn powf64(x: f64, p: f64) f64 = {
for (let i = p_int; i != 0; i >>= 1) {
// Check for over/underflow.
if (x_exp <= -1i64 << (F64_EXPONENT_BITS: i64)) {
- return 0.0f64;
+ return 0f64;
};
if (x_exp >= 1i64 << (F64_EXPONENT_BITS: i64)) {
return INF;
@@ -594,97 +865,288 @@ export fn powf64(x: f64, p: f64) f64 = {
};
};
- if (p < 0.0f64) {
- res_mantissa = 1.0f64 / res_mantissa;
+ if (p < 0f64) {
+ res_mantissa = 1f64 / res_mantissa;
res_exp = -res_exp;
};
- let res = ldexpf64(res_mantissa, res_exp);
- return res;
+ return ldexpf64(res_mantissa, res_exp);
};
@test fn powf64() void = {
+ for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) {
+ assert(eqwithin(
+ powf64(10f64, TEST_INPUTS[idx]),
+ TEST_POW[idx],
+ 1e-8f64));
+ };
// Positive integer
- assert(powf64(2.0f64, 2.0f64) == 4.0f64);
- assert(powf64(3.0f64, 3.0f64) == 27.0f64);
+ assert(powf64(2f64, 2f64) == 4f64);
+ assert(powf64(3f64, 3f64) == 27f64);
// Very large positive integer
- assert(!isinf(powf64(2.0f64, 1020.0f64)));
- assert(isinf(powf64(2.0f64, 1050.0f64)));
+ assert(!isinf(powf64(2f64, 1020f64)));
+ assert(isinf(powf64(2f64, 1050f64)));
// Negative integer
- assert(powf64(2.0f64, -1.0f64) == 0.5f64);
- assert(powf64(2.0f64, -2.0f64) == 0.25f64);
+ assert(powf64(2f64, -1f64) == 0.5f64);
+ assert(powf64(2f64, -2f64) == 0.25f64);
// Very small negative integer
- assert(powf64(2.0f64, -1020.0f64) > 0.0f64);
- assert(powf64(2.0f64, -1080.0f64) == 0.0f64);
+ assert(powf64(2f64, -1020f64) > 0f64);
+ assert(powf64(2f64, -1080f64) == 0f64);
// Positive fractional powers
- assert(eqwithin(powf64(2.0f64, 1.5f64),
+ assert(eqwithin(powf64(2f64, 1.5f64),
2.8284271247461900976033774f64,
1e-10f64));
- assert(eqwithin(powf64(2.0f64, 5.5f64),
+ assert(eqwithin(powf64(2f64, 5.5f64),
45.254833995939041561654039f64,
1e-10f64));
// Negative fractional powers
- assert(eqwithin(powf64(2.0f64, -1.5f64),
+ assert(eqwithin(powf64(2f64, -1.5f64),
0.3535533905932737622004221f64,
1e-10f64));
- assert(eqwithin(powf64(2.0f64, -5.5f64),
+ assert(eqwithin(powf64(2f64, -5.5f64),
0.0220970869120796101375263f64,
1e-10f64));
// Special cases
// pow(x, ±0) = 1 for any x
- assert(powf64(123.0f64, 0.0f64) == 1.0f64);
+ assert(powf64(123f64, 0f64) == 1f64);
// pow(1, y) = 1 for any y
- assert(powf64(1.0f64, 123.0f64) == 1.0f64);
+ assert(powf64(1f64, 123f64) == 1f64);
// pow(x, 1) = x for any x
- assert(powf64(123.0f64, 1.0f64) == 123.0f64);
+ assert(powf64(123f64, 1f64) == 123f64);
// pow(NaN, y) = NaN
- assert(isnan(powf64(NAN, 123.0f64)));
+ assert(isnan(powf64(NAN, 123f64)));
// pow(x, NaN) = NaN
- assert(isnan(powf64(123.0f64, NAN)));
+ assert(isnan(powf64(123f64, NAN)));
// pow(±0, y) = ±Inf for y an odd integer < 0
- assert(isposinf(powf64(0.0f64, -3.0f64)));
- assert(isneginf(powf64(-0.0f64, -3.0f64)));
+ assert(isposinf(powf64(0f64, -3f64)));
+ assert(isneginf(powf64(-0f64, -3f64)));
// pow(±0, -Inf) = +Inf
- assert(isposinf(powf64(0.0f64, -INF)));
- assert(isposinf(powf64(-0.0f64, -INF)));
+ assert(isposinf(powf64(0f64, -INF)));
+ assert(isposinf(powf64(-0f64, -INF)));
// pow(±0, +Inf) = +0
- assert(powf64(0.0f64, INF) == 0.0f64);
- assert(powf64(-0.0f64, INF) == 0.0f64);
+ assert(powf64(0f64, INF) == 0f64);
+ assert(powf64(-0f64, INF) == 0f64);
// pow(±0, y) = +Inf for finite y < 0 and not an odd integer
- assert(isposinf(powf64(0.0f64, -2.0f64)));
- assert(isposinf(powf64(-0.0f64, -2.0f64)));
+ assert(isposinf(powf64(0f64, -2f64)));
+ assert(isposinf(powf64(-0f64, -2f64)));
//pow(±0, y) = ±0 for y an odd integer > 0
- assert(powf64(0.0f64, 123.0f64) == 0.0f64);
- let neg_zero = powf64(-0.0f64, 123.0f64);
- assert(neg_zero == 0.0f64);
+ assert(powf64(0f64, 123f64) == 0f64);
+ const neg_zero = powf64(-0f64, 123f64);
+ assert(neg_zero == 0f64);
assert(isnegative(neg_zero));
// pow(±0, y) = +0 for finite y > 0 and not an odd integer
- assert(powf64(0.0f64, 8.0f64) == 0.0f64);
+ assert(powf64(0f64, 8f64) == 0f64);
// pow(-1, ±Inf) = 1
- assert(powf64(-1.0f64, INF) == 1.0f64);
- assert(powf64(-1.0f64, -INF) == 1.0f64);
+ assert(powf64(-1f64, INF) == 1f64);
+ assert(powf64(-1f64, -INF) == 1f64);
// pow(x, +Inf) = +Inf for |x| > 1
- assert(isposinf(powf64(123.0f64, INF)));
+ assert(isposinf(powf64(123f64, INF)));
// pow(x, -Inf) = +0 for |x| > 1
- assert(powf64(123.0f64, -INF) == 0.0f64);
+ assert(powf64(123f64, -INF) == 0f64);
// pow(x, +Inf) = +0 for |x| < 1
- assert(powf64(0.5f64, INF) == 0.0f64);
- assert(powf64(-0.5f64, INF) == 0.0f64);
+ assert(powf64(0.5f64, INF) == 0f64);
+ assert(powf64(-0.5f64, INF) == 0f64);
// pow(x, -Inf) = +Inf for |x| < 1
assert(isposinf(powf64(0.5f64, -INF)));
assert(isposinf(powf64(-0.5f64, -INF)));
// pow(+Inf, y) = +Inf for y > 0
- assert(isposinf(powf64(INF, 123.0f64)));
+ assert(isposinf(powf64(INF, 123f64)));
// pow(+Inf, y) = +0 for y < 0
- assert(powf64(INF, -1.0f64) == 0.0f64);
+ assert(powf64(INF, -1f64) == 0f64);
// pow(-Inf, y) = pow(-0, -y)
- assert(powf64(-INF, 123.0f64) == powf64(-0.0f64, -123.0f64));
+ assert(powf64(-INF, 123f64) == powf64(-0f64, -123f64));
// pow(x, y) = NaN for finite x < 0 and finite non-integer y
- assert(isnan(powf64(-2.0f64, 1.23f64)));
+ assert(isnan(powf64(-2f64, 1.23f64)));
// sqrt
- assert(powf64(4.0f64, 0.5f64) == sqrtf64(4.0f64));
- assert(powf64(4.0f64, 0.5f64) == 2.0f64);
- assert(powf64(4.0f64, -0.5f64) == (1.0f64 / sqrtf64(4.0f64)));
- assert(powf64(4.0f64, -0.5f64) == (1.0f64 / 2.0f64));
+ assert(powf64(4f64, 0.5f64) == sqrtf64(4f64));
+ assert(powf64(4f64, 0.5f64) == 2f64);
+ assert(powf64(4f64, -0.5f64) == (1f64 / sqrtf64(4f64)));
+ assert(powf64(4f64, -0.5f64) == (1f64 / 2f64));
+};
+
+// Returns the greatest integer value less than or equal to x.
+export fn floorf64(x: f64) f64 = {
+ if (x == 0f64 || isnan(x) || isinf(x)) {
+ return x;
+ };
+ if (x < 0f64) {
+ const modfrac_res = modfracf64(-x);
+ let int_part = modfrac_res.0;
+ const frac_part = modfrac_res.1;
+ if (frac_part != 0f64) {
+ int_part += 1;
+ };
+ return -(int_part: f64);
+ };
+ const modfrac_res = modfracf64(x);
+ return (modfrac_res.0: f64);
+};
+
+@test fn floor() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ floorf64(TEST_INPUTS[idx]),
+ TEST_FLOOR[idx]));
+ };
+ assert(floorf64(-INF) == -INF);
+ assert(floorf64(-0f64) == -0f64);
+ assert(floorf64(0f64) == 0f64);
+ assert(floorf64(INF) == INF);
+ assert(isnan(floorf64(NAN)));
+};
+
+// Returns the least integer value greater than or equal to x.
+export fn ceilf64(x: f64) f64 = -floorf64(-x);
+
+@test fn ceil() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ ceilf64(TEST_INPUTS[idx]),
+ TEST_CEIL[idx]));
+ };
+ assert(ceilf64(-INF) == -INF);
+ assert(ceilf64(-0f64) == -0f64);
+ assert(ceilf64(0f64) == 0f64);
+ assert(ceilf64(INF) == INF);
+ assert(isnan(ceilf64(NAN)));
+};
+
+// Returns the integer value of x.
+export fn truncf64(x: f64) f64 = {
+ if (x == 0f64 || isnan(x) || isinf(x)) {
+ return x;
+ };
+ const modfrac_res = modfracf64(x);
+ return (modfrac_res.0: f64);
+};
+
+@test fn trunc() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ truncf64(TEST_INPUTS[idx]),
+ TEST_TRUNC[idx],
+ 1e-6f64));
+ };
+ assert(truncf64(-INF) == -INF);
+ assert(truncf64(-0f64) == -0f64);
+ assert(truncf64(0f64) == 0f64);
+ assert(truncf64(INF) == INF);
+ assert(isnan(truncf64(NAN)));
+};
+
+// Returns the nearest integer, rounding half away from zero.
+export fn roundf64(x: f64) f64 = {
+ let bits = f64bits(x);
+ let e = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK;
+ if (e < F64_EXPONENT_BIAS) {
+ // Round abs(x) < 1 including denormals.
+ bits &= F64_SIGN_MASK; // +-0
+ if (e == F64_EXPONENT_BIAS - 1) {
+ bits |= F64_ONE; // +-1
+ };
+ } else if (e < F64_EXPONENT_BIAS + F64_MANTISSA_BITS) {
+ // Round any abs(x) >= 1 containing a fractional component
+ // [0,1).
+ // Numbers with larger exponents are returned unchanged since
+ // they must be either an integer, infinity, or NaN.
+ const half = 1 << (F64_MANTISSA_BITS - 1);
+ e -= F64_EXPONENT_BIAS;
+ bits += half >> e;
+ bits = bits & ~(F64_MANTISSA_MASK >> e);
+ };
+ return f64frombits(bits);
+};
+
+@test fn round() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ roundf64(TEST_INPUTS[idx]),
+ TEST_ROUND[idx]));
+ };
+ assert(roundf64(-INF) == -INF);
+ assert(roundf64(-0f64) == -0f64);
+ assert(roundf64(0f64) == 0f64);
+ assert(roundf64(INF) == INF);
+ assert(isnan(roundf64(NAN)));
+};
+
+// Returns the floating-point remainder of x / y. The magnitude of the result
+// is less than y and its sign agrees with that of x.
+export fn modf64(x: f64, y: f64) f64 = {
+ if (y == 0f64) {
+ return NAN;
+ };
+ if (isinf(x) || isnan(x) || isnan(y)) {
+ return NAN;
+ };
+
+ y = absf64(y);
+
+ const y_frexp = frexpf64(y);
+ const y_frac = y_frexp.0;
+ const y_exp = y_frexp.1;
+ let r = x;
+ if (x < 0f64) {
+ r = -x;
+ };
+
+ for (r >= y) {
+ const r_frexp = frexpf64(r);
+ const r_frac = r_frexp.0;
+ let r_exp = r_frexp.1;
+ if (r_frac < y_frac) {
+ r_exp -= 1i64;
+ };
+ r = r - ldexpf64(y, r_exp - y_exp);
+ };
+ if (x < 0f64) {
+ r = -r;
+ };
+ return r;
+};
+
+@test fn modf64() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ modf64(10f64, TEST_INPUTS[idx]),
+ TEST_MODF[idx]));
+ };
+
+ assert(isnan(modf64(-INF, -INF)));
+ assert(isnan(modf64(-INF, -PI)));
+ assert(isnan(modf64(-INF, 0f64)));
+ assert(isnan(modf64(-INF, PI)));
+ assert(isnan(modf64(-INF, INF)));
+ assert(isnan(modf64(-INF, NAN)));
+ assert(modf64(-PI, -INF) == -PI);
+ assert(isnan(modf64(-PI, 0f64)));
+ assert(modf64(-PI, INF) == -PI);
+ assert(isnan(modf64(-PI, NAN)));
+ assert(modf64(-0f64, -INF) == -0f64);
+ assert(isnan(modf64(-0f64, 0f64)));
+ assert(modf64(-0f64, INF) == -0f64);
+ assert(isnan(modf64(-0f64, NAN)));
+ assert(modf64(0f64, -INF) == 0f64);
+ assert(isnan(modf64(0f64, 0f64)));
+ assert(modf64(0f64, INF) == 0f64);
+ assert(isnan(modf64(0f64, NAN)));
+ assert(modf64(PI, -INF) == PI);
+ assert(isnan(modf64(PI, 0f64)));
+ assert(modf64(PI, INF) == PI);
+ assert(isnan(modf64(PI, NAN)));
+ assert(isnan(modf64(INF, -INF)));
+ assert(isnan(modf64(INF, -PI)));
+ assert(isnan(modf64(INF, 0f64)));
+ assert(isnan(modf64(INF, PI)));
+ assert(isnan(modf64(INF, INF)));
+ assert(isnan(modf64(INF, NAN)));
+ assert(isnan(modf64(NAN, -INF)));
+ assert(isnan(modf64(NAN, -PI)));
+ assert(isnan(modf64(NAN, 0f64)));
+ assert(isnan(modf64(NAN, PI)));
+ assert(isnan(modf64(NAN, INF)));
+ assert(isnan(modf64(NAN, NAN)));
+ assert(modf64(5.9790119248836734e+200, 1.1258465975523544) ==
+ 0.6447968302508578);
};
diff --git a/math/testdata.ha b/math/testdata.ha
@@ -0,0 +1,440 @@
+// The test data below is based on Go's implementation, and came with the
+// following note and copyright notice:
+//
+// The expected results below were computed by the high precision calculators
+// at https://keisan.casio.com/. More exact input values (array vf[], above)
+// were obtained by printing them with "%.26f". The answers were calculated
+// to 26 digits (by using the "Digit number" drop-down control of each
+// calculator).
+//
+// The Go copyright notice:
+// ====================================================
+// Copyright (c) 2009 The Go Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ====================================================
+
+export const TEST_INPUTS: [_]f64 = [
+ 4.9790119248836735e+00,
+ 7.7388724745781045e+00,
+ -2.7688005719200159e-01,
+ -5.0106036182710749e+00,
+ 9.6362937071984173e+00,
+ 2.9263772392439646e+00,
+ 5.2290834314593066e+00,
+ 2.7279399104360102e+00,
+ 1.8253080916808550e+00,
+ -8.6859247685756013e+00,
+];
+
+export const TEST_ACOS: [_]f64 = [
+ 1.0496193546107222142571536e+00,
+ 6.8584012813664425171660692e-01,
+ 1.5984878714577160325521819e+00,
+ 2.0956199361475859327461799e+00,
+ 2.7053008467824138592616927e-01,
+ 1.2738121680361776018155625e+00,
+ 1.0205369421140629186287407e+00,
+ 1.2945003481781246062157835e+00,
+ 1.3872364345374451433846657e+00,
+ 2.6231510803970463967294145e+00,
+];
+export const TEST_ACOSH: [_]f64 = [
+ 2.4743347004159012494457618e+00,
+ 2.8576385344292769649802701e+00,
+ 7.2796961502981066190593175e-01,
+ 2.4796794418831451156471977e+00,
+ 3.0552020742306061857212962e+00,
+ 2.044238592688586588942468e+00,
+ 2.5158701513104513595766636e+00,
+ 1.99050839282411638174299e+00,
+ 1.6988625798424034227205445e+00,
+ 2.9611454842470387925531875e+00,
+];
+export const TEST_ASIN: [_]f64 = [
+ 5.2117697218417440497416805e-01,
+ 8.8495619865825236751471477e-01,
+ -02.769154466281941332086016e-02,
+ -5.2482360935268931351485822e-01,
+ 1.3002662421166552333051524e+00,
+ 2.9698415875871901741575922e-01,
+ 5.5025938468083370060258102e-01,
+ 2.7629597861677201301553823e-01,
+ 1.83559892257451475846656e-01,
+ -1.0523547536021497774980928e+00,
+];
+export const TEST_ASINH: [_]f64 = [
+ 2.3083139124923523427628243e+00,
+ 2.743551594301593620039021e+00,
+ -2.7345908534880091229413487e-01,
+ -2.3145157644718338650499085e+00,
+ 2.9613652154015058521951083e+00,
+ 1.7949041616585821933067568e+00,
+ 2.3564032905983506405561554e+00,
+ 1.7287118790768438878045346e+00,
+ 1.3626658083714826013073193e+00,
+ -2.8581483626513914445234004e+00,
+];
+export const TEST_ATAN: [_]f64 = [
+ 1.372590262129621651920085e+00,
+ 1.442290609645298083020664e+00,
+ -2.7011324359471758245192595e-01,
+ -1.3738077684543379452781531e+00,
+ 1.4673921193587666049154681e+00,
+ 1.2415173565870168649117764e+00,
+ 1.3818396865615168979966498e+00,
+ 1.2194305844639670701091426e+00,
+ 1.0696031952318783760193244e+00,
+ -1.4561721938838084990898679e+00,
+];
+export const TEST_ATANH: [_]f64 = [
+ 5.4651163712251938116878204e-01,
+ 1.0299474112843111224914709e+00,
+ -2.7695084420740135145234906e-02,
+ -5.5072096119207195480202529e-01,
+ 1.9943940993171843235906642e+00,
+ 3.01448604578089708203017e-01,
+ 5.8033427206942188834370595e-01,
+ 2.7987997499441511013958297e-01,
+ 1.8459947964298794318714228e-01,
+ -1.3273186910532645867272502e+00,
+];
+export const TEST_CEIL: [_]f64 = [
+ 5.0000000000000000e+00,
+ 8.0000000000000000e+00,
+ -0.0000000000000000e+00,
+ -5.0000000000000000e+00,
+ 1.0000000000000000e+01,
+ 3.0000000000000000e+00,
+ 6.0000000000000000e+00,
+ 3.0000000000000000e+00,
+ 2.0000000000000000e+00,
+ -8.0000000000000000e+00,
+];
+export const TEST_COS: [_]f64 = [
+ 2.634752140995199110787593e-01,
+ 1.148551260848219865642039e-01,
+ 9.6191297325640768154550453e-01,
+ 2.938141150061714816890637e-01,
+ -9.777138189897924126294461e-01,
+ -9.7693041344303219127199518e-01,
+ 4.940088096948647263961162e-01,
+ -9.1565869021018925545016502e-01,
+ -2.517729313893103197176091e-01,
+ -7.39241351595676573201918e-01,
+];
+// Results for 100000 * PI + TEST_INPUTS[i]
+export const TEST_COSLARGE: [_]f64 = [
+ 2.634752141185559426744e-01,
+ 1.14855126055543100712e-01,
+ 9.61912973266488928113e-01,
+ 2.9381411499556122552e-01,
+ -9.777138189880161924641e-01,
+ -9.76930413445147608049e-01,
+ 4.940088097314976789841e-01,
+ -9.15658690217517835002e-01,
+ -2.51772931436786954751e-01,
+ -7.3924135157173099849e-01,
+];
+export const TEST_COSH: [_]f64 = [
+ 7.2668796942212842775517446e+01,
+ 1.1479413465659254502011135e+03,
+ 1.0385767908766418550935495e+00,
+ 7.5000957789658051428857788e+01,
+ 7.655246669605357888468613e+03,
+ 9.3567491758321272072888257e+00,
+ 9.331351599270605471131735e+01,
+ 7.6833430994624643209296404e+00,
+ 3.1829371625150718153881164e+00,
+ 2.9595059261916188501640911e+03,
+];
+export const TEST_EXP: [_]f64 = [
+ 1.4533071302642137507696589e+02,
+ 2.2958822575694449002537581e+03,
+ 7.5814542574851666582042306e-01,
+ 6.6668778421791005061482264e-03,
+ 1.5310493273896033740861206e+04,
+ 1.8659907517999328638667732e+01,
+ 1.8662167355098714543942057e+02,
+ 1.5301332413189378961665788e+01,
+ 6.2047063430646876349125085e+00,
+ 1.6894712385826521111610438e-04,
+];
+export const TEST_EXP2: [_]f64 = [
+ 3.1537839463286288034313104e+01,
+ 2.1361549283756232296144849e+02,
+ 8.2537402562185562902577219e-01,
+ 3.1021158628740294833424229e-02,
+ 7.9581744110252191462569661e+02,
+ 7.6019905892596359262696423e+00,
+ 3.7506882048388096973183084e+01,
+ 6.6250893439173561733216375e+00,
+ 3.5438267900243941544605339e+00,
+ 2.4281533133513300984289196e-03,
+];
+export const TEST_ABSF: [_]f64 = [
+ 4.9790119248836735e+00,
+ 7.7388724745781045e+00,
+ 2.7688005719200159e-01,
+ 5.0106036182710749e+00,
+ 9.6362937071984173e+00,
+ 2.9263772392439646e+00,
+ 5.2290834314593066e+00,
+ 2.7279399104360102e+00,
+ 1.8253080916808550e+00,
+ 8.6859247685756013e+00,
+];
+export const TEST_FLOOR: [_]f64 = [
+ 4.0000000000000000e+00,
+ 7.0000000000000000e+00,
+ -1.0000000000000000e+00,
+ -6.0000000000000000e+00,
+ 9.0000000000000000e+00,
+ 2.0000000000000000e+00,
+ 5.0000000000000000e+00,
+ 2.0000000000000000e+00,
+ 1.0000000000000000e+00,
+ -9.0000000000000000e+00,
+];
+export const TEST_MODF: [_]f64 = [
+ 4.197615023265299782906368e-02,
+ 2.261127525421895434476482e+00,
+ 3.231794108794261433104108e-02,
+ 4.989396381728925078391512e+00,
+ 3.637062928015826201999516e-01,
+ 1.220868282268106064236690e+00,
+ 4.770916568540693347699744e+00,
+ 1.816180268691969246219742e+00,
+ 8.734595415957246977711748e-01,
+ 1.314075231424398637614104e+00,
+];
+export const TEST_FREXP: [_](f64, i64) = [
+ (6.2237649061045918750e-01, 3),
+ (9.6735905932226306250e-01, 3),
+ (-5.5376011438400318000e-01, -1),
+ (-6.2632545228388436250e-01, 3),
+ (6.02268356699901081250e-01, 4),
+ (7.3159430981099115000e-01, 2),
+ (6.5363542893241332500e-01, 3),
+ (6.8198497760900255000e-01, 2),
+ (9.1265404584042750000e-01, 1),
+ (-5.4287029803597508250e-01, 4),
+];
+export const TEST_LOG: [_]f64 = [
+ 1.605231462693062999102599e+00,
+ 2.0462560018708770653153909e+00,
+ -1.2841708730962657801275038e+00,
+ 1.6115563905281545116286206e+00,
+ 2.2655365644872016636317461e+00,
+ 1.0737652208918379856272735e+00,
+ 1.6542360106073546632707956e+00,
+ 1.0035467127723465801264487e+00,
+ 6.0174879014578057187016475e-01,
+ 2.161703872847352815363655e+00,
+];
+export const TEST_LOG10: [_]f64 = [
+ 6.9714316642508290997617083e-01,
+ 8.886776901739320576279124e-01,
+ -5.5770832400658929815908236e-01,
+ 6.998900476822994346229723e-01,
+ 9.8391002850684232013281033e-01,
+ 4.6633031029295153334285302e-01,
+ 7.1842557117242328821552533e-01,
+ 4.3583479968917773161304553e-01,
+ 2.6133617905227038228626834e-01,
+ 9.3881606348649405716214241e-01,
+];
+export const TEST_LOG2: [_]f64 = [
+ 2.3158594707062190618898251e+00,
+ 2.9521233862883917703341018e+00,
+ -1.8526669502700329984917062e+00,
+ 2.3249844127278861543568029e+00,
+ 3.268478366538305087466309e+00,
+ 1.5491157592596970278166492e+00,
+ 2.3865580889631732407886495e+00,
+ 1.447811865817085365540347e+00,
+ 8.6813999540425116282815557e-01,
+ 3.118679457227342224364709e+00,
+];
+export const TEST_LOG1P: []f64 = [
+ 4.8590257759797794104158205e-02,
+ 7.4540265965225865330849141e-02,
+ -2.7726407903942672823234024e-03,
+ -5.1404917651627649094953380e-02,
+ 9.1998280672258624681335010e-02,
+ 2.8843762576593352865894824e-02,
+ 5.0969534581863707268992645e-02,
+ 2.6913947602193238458458594e-02,
+ 1.8088493239630770262045333e-02,
+ -9.0865245631588989681559268e-02,
+];
+export const TEST_MODFRAC: [_](i64, f64) = [
+ (4i64, 9.7901192488367350108546816e-01),
+ (7i64, 7.3887247457810456552351752e-01),
+ (-0i64, -2.7688005719200159404635997e-01),
+ (-5i64, -1.060361827107492160848778e-02),
+ (9i64, 6.3629370719841737980004837e-01),
+ (2i64, 9.2637723924396464525443662e-01),
+ (5i64, 2.2908343145930665230025625e-01),
+ (2i64, 7.2793991043601025126008608e-01),
+ (1i64, 8.2530809168085506044576505e-01),
+ (-8i64, -6.8592476857560136238589621e-01),
+];
+export const TEST_POW: [_]f64 = [
+ 9.5282232631648411840742957e+04,
+ 5.4811599352999901232411871e+07,
+ 5.2859121715894396531132279e-01,
+ 9.7587991957286474464259698e-06,
+ 4.328064329346044846740467e+09,
+ 8.4406761805034547437659092e+02,
+ 1.6946633276191194947742146e+05,
+ 5.3449040147551939075312879e+02,
+ 6.688182138451414936380374e+01,
+ 2.0609869004248742886827439e-09,
+];
+export const TEST_ROUND: [_]f64 = [
+ 5.0f64,
+ 8.0f64,
+ -0.0f64,
+ -5.0f64,
+ 10.0f64,
+ 3.0f64,
+ 5.0f64,
+ 3.0f64,
+ 2.0f64,
+ -9.0f64,
+];
+export const TEST_SIGNF: [_]i8 = [
+ 1,
+ 1,
+ -1,
+ -1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ -1,
+];
+export const TEST_SIN: [_]f64 = [
+ -9.6466616586009283766724726e-01,
+ 9.9338225271646545763467022e-01,
+ -2.7335587039794393342449301e-01,
+ 9.5586257685042792878173752e-01,
+ -2.099421066779969164496634e-01,
+ 2.135578780799860532750616e-01,
+ -8.694568971167362743327708e-01,
+ 4.019566681155577786649878e-01,
+ 9.6778633541687993721617774e-01,
+ -6.734405869050344734943028e-01,
+];
+// Results for 100000 * PI + TEST_INPUTS[i]
+export const TEST_SINLARGE: [_]f64 = [
+ -9.646661658548936063912e-01,
+ 9.933822527198506903752e-01,
+ -2.7335587036246899796e-01,
+ 9.55862576853689321268e-01,
+ -2.099421066862688873691e-01,
+ 2.13557878070308981163e-01,
+ -8.694568970959221300497e-01,
+ 4.01956668098863248917e-01,
+ 9.67786335404528727927e-01,
+ -6.7344058693131973066e-01,
+];
+export const TEST_SINH: [_]f64 = [
+ 7.2661916084208532301448439e+01,
+ 1.1479409110035194500526446e+03,
+ -2.8043136512812518927312641e-01,
+ -7.499429091181587232835164e+01,
+ 7.6552466042906758523925934e+03,
+ 9.3031583421672014313789064e+00,
+ 9.330815755828109072810322e+01,
+ 7.6179893137269146407361477e+00,
+ 3.021769180549615819524392e+00,
+ -2.95950575724449499189888e+03,
+];
+export const TEST_SQRT: [_]f64 = [
+ 2.2313699659365484748756904e+00,
+ 2.7818829009464263511285458e+00,
+ 5.2619393496314796848143251e-01,
+ 2.2384377628763938724244104e+00,
+ 3.1042380236055381099288487e+00,
+ 1.7106657298385224403917771e+00,
+ 2.286718922705479046148059e+00,
+ 1.6516476350711159636222979e+00,
+ 1.3510396336454586262419247e+00,
+ 2.9471892997524949215723329e+00,
+];
+export const TEST_TAN: [_]f64 = [
+ -3.661316565040227801781974e+00,
+ 8.64900232648597589369854e+00,
+ -2.8417941955033612725238097e-01,
+ 3.253290185974728640827156e+00,
+ 2.147275640380293804770778e-01,
+ -2.18600910711067004921551e-01,
+ -1.760002817872367935518928e+00,
+ -4.389808914752818126249079e-01,
+ -3.843885560201130679995041e+00,
+ 9.10988793377685105753416e-01,
+];
+// Results for 100000 * PI + TEST_INPUTS[i]
+export const TEST_TANLARGE: [_]f64 = [
+ -3.66131656475596512705e+00,
+ 8.6490023287202547927e+00,
+ -2.841794195104782406e-01,
+ 3.2532901861033120983e+00,
+ 2.14727564046880001365e-01,
+ -2.18600910700688062874e-01,
+ -1.760002817699722747043e+00,
+ -4.38980891453536115952e-01,
+ -3.84388555942723509071e+00,
+ 9.1098879344275101051e-01,
+];
+export const TEST_TANH: [_]f64 = [
+ 9.9990531206936338549262119e-01,
+ 9.9999962057085294197613294e-01,
+ -2.7001505097318677233756845e-01,
+ -9.9991110943061718603541401e-01,
+ 9.9999999146798465745022007e-01,
+ 9.9427249436125236705001048e-01,
+ 9.9994257600983138572705076e-01,
+ 9.9149409509772875982054701e-01,
+ 9.4936501296239685514466577e-01,
+ -9.9999994291374030946055701e-01,
+];
+export const TEST_TRUNC: [_]f64 = [
+ 4.0000000000000000e+00,
+ 7.0000000000000000e+00,
+ -0.0000000000000000e+00,
+ -5.0000000000000000e+00,
+ 9.0000000000000000e+00,
+ 2.0000000000000000e+00,
+ 5.0000000000000000e+00,
+ 2.0000000000000000e+00,
+ 1.0000000000000000e+00,
+ -8.0000000000000000e+00,
+];
diff --git a/math/trig.ha b/math/trig.ha
@@ -0,0 +1,1019 @@
+// Sections of the code below are based on Go's implementation, which is, in
+// turn, based on:
+// * the Cephes Mathematical Library (cephes/cmath/{sin,..}), available from
+// http://www.netlib.org/cephes/cmath.tgz.
+// * FreeBSD's /usr/src/lib/msun/src/{s_asinh.c,...}
+// The original C code, as well as the respective comments and constants are
+// from these libraries.
+//
+// The Cephes copyright notice:
+// ====================================================
+// Cephes Math Library Release 2.8: June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+// Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+// The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+// Stephen L. Moshier
+// moshier@na-net.ornl.gov
+// ====================================================
+//
+// The FreeBSD copyright notice:
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// The Go copyright notice:
+// ====================================================
+// Copyright (c) 2009 The Go Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ====================================================
+
+// sin coefficients
+const SIN_CF: [_]f64 = [
+ 1.58962301576546568060e-10, // 0x3de5d8fd1fd19ccd
+ -2.50507477628578072866e-8, // 0xbe5ae5e5a9291f5d
+ 2.75573136213857245213e-6, // 0x3ec71de3567d48a1
+ -1.98412698295895385996e-4, // 0xbf2a01a019bfdf03
+ 8.33333333332211858878e-3, // 0x3f8111111110f7d0
+ -1.66666666666666307295e-1, // 0xbfc5555555555548
+];
+
+// cos coefficients
+const COS_CF: [_]f64 = [
+ -1.13585365213876817300e-11, // 0xbda8fa49a0861a9b
+ 2.08757008419747316778e-9, // 0x3e21ee9d7b4e3f05
+ -2.75573141792967388112e-7, // 0xbe927e4f7eac4bc6
+ 2.48015872888517045348e-5, // 0x3efa01a019c844f5
+ -1.38888888888730564116e-3, // 0xbf56c16c16c14f91
+ 4.16666666666665929218e-2, // 0x3fa555555555554b
+];
+
+// PI / 4 split into three parts
+def PI4A: f64 = 7.85398125648498535156e-1; // 0x3fe921fb40000000
+def PI4B: f64 = 3.77489470793079817668e-8; // 0x3e64442d00000000
+def PI4C: f64 = 2.69515142907905952645e-15; // 0x3ce8469898cc5170
+
+// reduce_threshold is the maximum value of x where the reduction using PI/4
+// in 3 float64 parts still gives accurate results. This threshold
+// is set by y*C being representable as a float64 without error
+// where y is given by y = floor(x * (4 / PI)) and C is the leading partial
+// terms of 4/PI. Since the leading terms (PI4A and PI4B in sin.go) have 30
+// and 32 trailing zero bits, y should have less than 30 significant bits.
+// y < 1<<30 -> floor(x*4/PI) < 1<<30 -> x < (1<<30 - 1) * PI/4
+// So, conservatively we can take x < 1<<29.
+// Above this threshold Payne-Hanek range reduction must be used.
+def REDUCE_THRESHOLD: f64 = ((1u64 << 29): f64);
+
+// MPI4 is the binary digits of 4/pi as a uint64 array,
+// that is, 4/pi = Sum MPI4[i]*2^(-64*i)
+// 19 64-bit digits and the leading one bit give 1217 bits
+// of precision to handle the largest possible float64 exponent.
+const MPI4: [_]u64 = [
+ 0x0000000000000001,
+ 0x45f306dc9c882a53,
+ 0xf84eafa3ea69bb81,
+ 0xb6c52b3278872083,
+ 0xfca2c757bd778ac3,
+ 0x6e48dc74849ba5c0,
+ 0x0c925dd413a32439,
+ 0xfc3bd63962534e7d,
+ 0xd1046bea5d768909,
+ 0xd338e04d68befc82,
+ 0x7323ac7306a673e9,
+ 0x3908bf177bf25076,
+ 0x3ff12fffbc0b301f,
+ 0xde5e2316b414da3e,
+ 0xda6cfd9e4f96136e,
+ 0x9e8c7ecd3cbfd45a,
+ 0xea4f758fd7cbe2f6,
+ 0x7a0e73ef14a525d4,
+ 0xd7f6bf623f1aba10,
+ 0xac06608df8f6d757,
+];
+
+// trig_reduce implements Payne-Hanek range reduction by PI/4 for x > 0. It
+// returns the integer part mod 8 (j) and the fractional part (z) of x / (PI/4).
+fn trig_reduce(x: f64) (u64, f64) = {
+ // The implementation is based on: "ARGUMENT REDUCTION FOR HUGE
+ // ARGUMENTS: Good to the Last Bit" K. C. Ng et al, March 24, 1992
+ // The simulated multi-precision calculation of x * B uses 64-bit
+ // integer arithmetic.
+ const PI4 = PI / 4f64;
+ if (x < PI4) {
+ return (0u64, x);
+ };
+ // Extract out the integer and exponent such that x = ix * 2 ** exp
+ let ix = f64bits(x);
+ const exp =
+ ((ix >> F64_MANTISSA_BITS &
+ F64_EXPONENT_MASK): i64) -
+ (F64_EXPONENT_BIAS: i64) -
+ (F64_MANTISSA_BITS: i64);
+ ix = ix & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS);
+ ix |= 1 << F64_MANTISSA_BITS;
+ // Use the exponent to extract the 3 appropriate uint64 digits from
+ // MPI4, B ~ (z0, z1, z2), such that the product leading digit has the
+ // exponent -61. Note, exp >= -53 since x >= PI4 and exp < 971 for
+ // maximum float64.
+ const digit = ((exp + 61): u64) / 64;
+ const bitshift = ((exp + 61): u64) % 64;
+ const z0 = (MPI4[digit] << bitshift) |
+ (MPI4[digit + 1] >> (64 - bitshift));
+ const z1 = (MPI4[digit + 1] << bitshift) |
+ (MPI4[digit + 2] >> (64 - bitshift));
+ const z2 = (MPI4[digit + 2] << bitshift) |
+ (MPI4[digit + 3] >> (64 - bitshift));
+ // Multiply mantissa by the digits and extract the upper two digits
+ // (hi, lo).
+ const z2 = mulu64(z2, ix);
+ const z2hi = z2.0;
+ const z1 = mulu64(z1, ix);
+ const z1hi = z1.0;
+ const z1lo = z1.1;
+ const z0lo = z0 * ix;
+ const add1 = addu64(z1lo, z2hi, 0);
+ const lo = add1.0;
+ const c = add1.1;
+ const add2 = addu64(z0lo, z1hi, c);
+ let hi = add2.0;
+ // The top 3 bits are j.
+ let j = hi >> 61;
+ // Extract the fraction and find its magnitude.
+ hi = hi << 3 | lo >> 61;
+ const lz = ((leading_zeros_u64(hi)): uint);
+ const e = ((F64_EXPONENT_BIAS - (lz + 1)): u64);
+ // Clear implicit mantissa bit and shift into place.
+ hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1)));
+ hi >>= 64 - F64_MANTISSA_BITS;
+ // Include the exponent and convert to a float.
+ hi |= e << F64_MANTISSA_BITS;
+ let z = f64frombits(hi);
+ // Map zeros to origin.
+ if (j & 1 == 1) {
+ j += 1;
+ j &= 7;
+ z -= 1f64;
+ };
+ // Multiply the fractional part by pi/4.
+ return (j, z * PI4);
+};
+
+@test fn trig_reduce() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ const reduced = trig_reduce(TEST_INPUTS[idx]);
+ const j = reduced.0;
+ const z = reduced.1;
+ const xred = (j: i64: f64) * (PI / 4f64) + z;
+ assert(eqwithin(
+ sinf64(TEST_INPUTS[idx]),
+ sinf64(xred),
+ 1e-6f64));
+ };
+};
+
+// cos.c
+//
+// Circular cosine
+//
+// SYNOPSIS:
+//
+// double x, y, cos();
+// y = cos( x );
+//
+// DESCRIPTION:
+//
+// Range reduction is into intervals of pi/4. The reduction error is nearly
+// eliminated by contriving an extended precision modular arithmetic.
+//
+// Two polynomial approximating functions are employed.
+// Between 0 and pi/4 the cosine is approximated by
+// 1 - x**2 Q(x**2).
+// Between pi/4 and pi/2 the sine is represented as
+// x + x**3 P(x**2).
+//
+// ACCURACY:
+//
+// Relative error:
+// arithmetic domain # trials peak rms
+// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17
+// DEC 0,+1.07e9 17000 3.0e-17 7.2e-18
+
+// Returns the cosine of x, in radians.
+export fn cosf64(x: f64) f64 = {
+ if (isnan(x) || isinf(x)) {
+ return NAN;
+ };
+
+ // Make argument positive
+ let is_negative = false;
+ x = absf(x);
+
+ let j = 0u64;
+ let y = 0f64;
+ let z = 0f64;
+ if (x >= REDUCE_THRESHOLD) {
+ const reduce_res = trig_reduce(x);
+ j = reduce_res.0;
+ z = reduce_res.1;
+ } else {
+ // Integer part of x/(PI/4), as integer for tests on the phase
+ // angle
+ j = ((x * (4f64 / PI)): i64: u64);
+ // Integer part of x/(PI/4), as float
+ y = (j: i64: f64);
+
+ // Map zeros to origin
+ if (j & 1 == 1) {
+ j += 1;
+ y += 1f64;
+ };
+ // Octant modulo 2PI radians (360 degrees)
+ j &= 7;
+ // Extended precision modular arithmetic
+ z = ((x - (y * PI4A)) - (y * PI4B)) - (y * PI4C);
+ };
+
+ if (j > 3) {
+ j -= 4;
+ is_negative = !is_negative;
+ };
+ if (j > 1) {
+ is_negative = !is_negative;
+ };
+
+ const zz = z * z;
+ if (j == 1 || j == 2) {
+ y = z + z * zz * ((((((SIN_CF[0] * zz) +
+ SIN_CF[1]) * zz +
+ SIN_CF[2]) * zz +
+ SIN_CF[3]) * zz +
+ SIN_CF[4]) * zz +
+ SIN_CF[5]);
+ } else {
+ y = 1.0 - 0.5 * zz + zz * zz * ((((((COS_CF[0] * zz) +
+ COS_CF[1]) * zz +
+ COS_CF[2]) * zz +
+ COS_CF[3]) * zz +
+ COS_CF[4]) * zz +
+ COS_CF[5]);
+ };
+ if (is_negative) {
+ y = -y;
+ };
+ return y;
+};
+
+@test fn cos() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ cosf64(TEST_INPUTS[idx]), TEST_COS[idx]));
+ };
+ assert(isnan(cosf64(-INF)));
+ assert(isnan(cosf64(INF)));
+ assert(isnan(cosf64(NAN)));
+};
+
+// sin.c
+//
+// Circular sine
+//
+// SYNOPSIS:
+//
+// double x, y, sin();
+// y = sin( x );
+//
+// DESCRIPTION:
+//
+// Range reduction is into intervals of pi/4. The reduction error is nearly
+// eliminated by contriving an extended precision modular arithmetic.
+//
+// Two polynomial approximating functions are employed.
+// Between 0 and pi/4 the sine is approximated by
+// x + x**3 P(x**2).
+// Between pi/4 and pi/2 the cosine is represented as
+// 1 - x**2 Q(x**2).
+//
+// ACCURACY:
+//
+// Relative error:
+// arithmetic domain # trials peak rms
+// DEC 0, 10 150000 3.0e-17 7.8e-18
+// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17
+//
+// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss
+// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may
+// be meaningless for x > 2**49 = 5.6e14.
+
+// Returns the sine of x, in radians.
+export fn sinf64(x: f64) f64 = {
+ if (x == 0f64 || isnan(x)) {
+ return x;
+ } else if (isinf(x)) {
+ return NAN;
+ };
+
+ // Make argument positive but save the sign
+ let is_negative = false;
+ if (x < 0f64) {
+ x = -x;
+ is_negative = true;
+ };
+
+ let j = 0u64;
+ let y = 0f64;
+ let z = 0f64;
+ if (x >= REDUCE_THRESHOLD) {
+ const reduce_res = trig_reduce(x);
+ j = reduce_res.0;
+ z = reduce_res.1;
+ } else {
+ // Integer part of x/(PI/4), as integer for tests on the phase
+ // angle
+ j = ((x * (4f64 / PI)): i64: u64);
+ // Integer part of x/(PI/4), as float
+ y = (j: i64: f64);
+
+ // Map zeros to origin
+ if (j & 1 == 1) {
+ j += 1;
+ y += 1f64;
+ };
+ // Octant modulo 2PI radians (360 degrees)
+ j &= 7;
+ // Extended precision modular arithmetic
+ z = ((x - (y * PI4A)) - (y * PI4B)) - (y * PI4C);
+ };
+
+ // Reflect in x axis
+ if (j > 3) {
+ j -= 4;
+ is_negative = !is_negative;
+ };
+
+ const zz = z * z;
+ if (j == 1 || j == 2) {
+ y = 1.0 - 0.5 * zz + zz * zz *
+ ((((((COS_CF[0] * zz) +
+ COS_CF[1]) * zz +
+ COS_CF[2]) * zz +
+ COS_CF[3]) * zz +
+ COS_CF[4]) * zz +
+ COS_CF[5]);
+ } else {
+ y = z + z * zz *
+ ((((((SIN_CF[0] * zz) +
+ SIN_CF[1]) * zz +
+ SIN_CF[2]) * zz +
+ SIN_CF[3]) * zz +
+ SIN_CF[4]) * zz +
+ SIN_CF[5]);
+ };
+ if (is_negative) {
+ y = -y;
+ };
+ return y;
+};
+
+@test fn sin() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ sinf64(TEST_INPUTS[idx]), TEST_SIN[idx]));
+ };
+ assert(isnan(sinf64(-INF)));
+ assert(sinf64(-0f64) == -0f64);
+ assert(sinf64(0f64) == 0f64);
+ assert(isnan(sinf64(INF)));
+ assert(isnan(sinf64(NAN)));
+};
+
+// tan.c
+//
+// Circular tangent
+//
+// SYNOPSIS:
+//
+// double x, y, tan();
+// y = tan( x );
+//
+// DESCRIPTION:
+//
+// Returns the circular tangent of the radian argument x.
+//
+// Range reduction is modulo pi/4. A rational function
+// x + x**3 P(x**2)/Q(x**2)
+// is employed in the basic interval [0, pi/4].
+//
+// ACCURACY:
+// Relative error:
+// arithmetic domain # trials peak rms
+// DEC +-1.07e9 44000 4.1e-17 1.0e-17
+// IEEE +-1.07e9 30000 2.9e-16 8.1e-17
+//
+// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss
+// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may
+// be meaningless for x > 2**49 = 5.6e14.
+// [Accuracy loss statement from sin.go comments.]
+
+// tan coefficients
+const TAN_P: [_]f64 = [
+ -1.30936939181383777646e4, // 0xc0c992d8d24f3f38
+ 1.15351664838587416140e6, // 0x413199eca5fc9ddd
+ -1.79565251976484877988e7, // 0xc1711fead3299176
+];
+const TAN_Q: [_]f64 = [
+ 1.00000000000000000000e0,
+ 1.36812963470692954678e4, // 0x40cab8a5eeb36572
+ -1.32089234440210967447e6, // 0xc13427bc582abc96
+ 2.50083801823357915839e7, // 0x4177d98fc2ead8ef
+ -5.38695755929454629881e7, // 0xc189afe03cbe5a31
+];
+
+// Returns the tangent of x, in radians.
+export fn tanf64(x: f64) f64 = {
+ if (x == 0f64 || isnan(x)) {
+ return x;
+ } else if (isinf(x)) {
+ return NAN;
+ };
+
+ // Make argument positive but save the sign
+ let is_negative = false;
+ if (x < 0f64) {
+ x = -x;
+ is_negative = true;
+ };
+ let j = 0u64;
+ let y = 0f64;
+ let z = 0f64;
+ if (x >= REDUCE_THRESHOLD) {
+ const reduce_res = trig_reduce(x);
+ j = reduce_res.0;
+ z = reduce_res.1;
+ } else {
+ // Integer part of x/(PI/4), as integer for tests on the phase
+ // angle
+ j = ((x * (4f64 / PI)): i64: u64);
+ // Integer part of x/(PI/4), as float
+ y = (j: i64: f64);
+
+ // Map zeros and singularities to origin
+ if (j & 1 == 1) {
+ j += 1;
+ y += 1f64;
+ };
+
+ z = ((x - (y * PI4A)) - (y * PI4B)) - (y * PI4C);
+ };
+ const zz = z * z;
+
+ if (zz > 1e-14) {
+ y = z + z * (zz *
+ (((TAN_P[0] * zz) + TAN_P[1]) * zz + TAN_P[2]) /
+ ((((zz + TAN_Q[1]) * zz +
+ TAN_Q[2]) * zz +
+ TAN_Q[3]) * zz +
+ TAN_Q[4]));
+ } else {
+ y = z;
+ };
+ if (j & 2 == 2) {
+ y = -1f64 / y;
+ };
+ if (is_negative) {
+ y = -y;
+ };
+ return y;
+};
+
+@test fn tan() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ tanf64(TEST_INPUTS[idx]), TEST_TAN[idx]));
+ };
+ assert(isnan(sinf64(-INF)));
+ assert(sinf64(-0f64) == -0f64);
+ assert(sinf64(0f64) == 0f64);
+ assert(isnan(sinf64(INF)));
+ assert(isnan(sinf64(NAN)));
+};
+
+// Evaluates a series valid in the range [0, 0.66].
+fn xatan(x: f64) f64 = {
+ const P0 = -8.750608600031904122785e-01;
+ const P1 = -1.615753718733365076637e+01;
+ const P2 = -7.500855792314704667340e+01;
+ const P3 = -1.228866684490136173410e+02;
+ const P4 = -6.485021904942025371773e+01;
+ const Q0 = +2.485846490142306297962e+01;
+ const Q1 = +1.650270098316988542046e+02;
+ const Q2 = +4.328810604912902668951e+02;
+ const Q3 = +4.853903996359136964868e+02;
+ const Q4 = +1.945506571482613964425e+02;
+ let z = x * x;
+ z = z * ((((P0 * z + P1) * z + P2) * z + P3) * z + P4) /
+ (((((z + Q0) * z + Q1) * z + Q2) * z + Q3) * z + Q4);
+ z = (x * z) + x;
+ return z;
+};
+
+// Reduces argument (known to be positive) to the range [0, 0.66] and calls
+// xatan.
+fn satan(x: f64) f64 = {
+ // pi / 2 = PIO2 + morebits
+ const morebits = 6.123233995736765886130e-17;
+ // tan(3 * pi / 8)
+ const tan3pio8 = 2.41421356237309504880;
+ if (x <= 0.66) {
+ return xatan(x);
+ };
+ if (x > tan3pio8) {
+ return (PI / 2f64) - xatan(1f64 / x) + morebits;
+ };
+ return (PI / 4f64) +
+ xatan((x - 1f64) / (x + 1f64)) +
+ (0.5f64 * morebits);
+};
+
+// Returns the arcsine, in radians, of x.
+export fn asinf64(x: f64) f64 = {
+ if (x == 0f64) {
+ return x;
+ };
+ let is_negative = false;
+ if (x < 0.064) {
+ x = -x;
+ is_negative = true;
+ };
+ if (x > 1f64) {
+ return NAN;
+ };
+ let temp = sqrtf64(1f64 - x * x);
+ if (x > 0.7f64) {
+ temp = PI / 2f64 - satan(temp / x);
+ } else {
+ temp = satan(x / temp);
+ };
+
+ if (is_negative) {
+ temp = -temp;
+ };
+ return temp;
+};
+
+@test fn asin() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ asinf64(TEST_INPUTS[idx] / 10f64),
+ TEST_ASIN[idx],
+ 1e-6f64));
+ };
+ assert(isnan(asinf64(-PI)));
+ assert(asinf64(-0f64) == -0f64);
+ assert(asinf64(0f64) == 0f64);
+ assert(isnan(asinf64(PI)));
+ assert(isnan(asinf64(NAN)));
+};
+
+// Returns the arccosine, in radians, of x.
+export fn acosf64(x: f64) f64 = {
+ return PI / 2f64 - asinf64(x);
+};
+
+@test fn acos() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ acosf64(TEST_INPUTS[idx] / 10f64),
+ TEST_ACOS[idx],
+ 1e-6f64));
+ };
+ assert(isnan(acosf64(-PI)));
+ assert(acosf64(1f64) == 0f64);
+ assert(isnan(acosf64(PI)));
+ assert(isnan(acosf64(NAN)));
+};
+
+// atan.c
+// Inverse circular tangent (arctangent)
+//
+// SYNOPSIS:
+// double x, y, atan();
+// y = atan( x );
+//
+// DESCRIPTION:
+// Returns radian angle between -pi/2 and +pi/2 whose tangent is x.
+//
+// Range reduction is from three intervals into the interval from zero to 0.66.
+// The approximant uses a rational function of degree 4/5 of the form
+// x + x**3 P(x)/Q(x).
+//
+// ACCURACY:
+// Relative error:
+// arithmetic domain # trials peak rms
+// DEC -10, 10 50000 2.4e-17 8.3e-18
+// IEEE -10, 10 10^6 1.8e-16 5.0e-17
+
+// Returns the arctangent, in radians, of x.
+export fn atanf64(x: f64) f64 = {
+ if (x == 0f64) {
+ return x;
+ };
+ if (x > 0f64) {
+ return satan(x);
+ };
+ return -satan(-x);
+};
+
+@test fn atan() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ atanf64(TEST_INPUTS[idx]),
+ TEST_ATAN[idx],
+ 1e-6f64));
+ };
+ assert(atanf64(-INF) == -PI / 2f64);
+ assert(atanf64(-0f64) == -0f64);
+ assert(atanf64(0f64) == 0f64);
+ assert(atanf64(INF) == PI / 2f64);
+ assert(isnan(atanf64(NAN)));
+};
+
+// Floating-point hyperbolic sine and cosine.
+// The exponential func is called for arguments greater in magnitude than 0.5.
+// A series is used for arguments smaller in magnitude than 0.5.
+// Cosh(x) is computed from the exponential func for all arguments.
+
+// Returns the hyperbolic sine of x.
+export fn sinhf64(x: f64) f64 = {
+ // The coefficients are #2029 from Hart & Cheney. (20.36D)
+ const P0 = -0.6307673640497716991184787251e+6;
+ const P1 = -0.8991272022039509355398013511e+5;
+ const P2 = -0.2894211355989563807284660366e+4;
+ const P3 = -0.2630563213397497062819489e+2;
+ const Q0 = -0.6307673640497716991212077277e+6;
+ const Q1 = 0.1521517378790019070696485176e+5;
+ const Q2 = -0.173678953558233699533450911e+3;
+
+ let is_negative = false;
+ if (x < 0f64) {
+ x = -x;
+ is_negative = true;
+ };
+
+ let temp = 0f64;
+ if (x > 21f64) {
+ temp = expf64(x) * 0.5f64;
+ } else if (x > 0.5f64) {
+ const ex = expf64(x);
+ temp = (ex - (1f64 / ex)) * 0.5f64;
+ } else {
+ const sq = x * x;
+ temp = (((P3 * sq + P2) * sq + P1) * sq + P0) * x;
+ temp = temp / (((sq + Q2) * sq + Q1) * sq + Q0);
+ };
+
+ if (is_negative) {
+ temp = -temp;
+ };
+
+ return temp;
+};
+
+@test fn sinh() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ sinhf64(TEST_INPUTS[idx]),
+ TEST_SINH[idx],
+ 1e-6f64));
+ };
+ assert(sinhf64(-INF) == -INF);
+ assert(sinhf64(-0f64) == -0f64);
+ assert(sinhf64(0f64) == 0f64);
+ assert(sinhf64(INF) == INF);
+ assert(isnan(sinhf64(NAN)));
+};
+
+// Returns the hyperbolic cosine of x.
+export fn coshf64(x: f64) f64 = {
+ x = absf64(x);
+ if (x > 21f64) {
+ return expf64(x) * 0.5f64;
+ };
+ const ex = expf64(x);
+ return (ex + 1f64 / ex) * 0.5f64;
+};
+
+@test fn cosh() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ coshf64(TEST_INPUTS[idx]),
+ TEST_COSH[idx],
+ 1e-6f64));
+ };
+ assert(coshf64(-INF) == INF);
+ assert(coshf64(-0f64) == 1f64);
+ assert(coshf64(0f64) == 1f64);
+ assert(coshf64(INF) == INF);
+ assert(isnan(coshf64(NAN)));
+};
+
+// tanh.c
+//
+// Hyperbolic tangent
+//
+// SYNOPSIS:
+//
+// double x, y, tanh();
+//
+// y = tanh( x );
+//
+// DESCRIPTION:
+//
+// Returns hyperbolic tangent of argument in the range MINLOG to MAXLOG.
+// MAXLOG = 8.8029691931113054295988e+01 = log(2**127)
+// MINLOG = -8.872283911167299960540e+01 = log(2**-128)
+//
+// A rational function is used for |x| < 0.625. The form
+// x + x**3 P(x)/Q(x) of Cody & Waite is employed.
+// Otherwise,
+// tanh(x) = sinh(x)/cosh(x) = 1 - 2/(exp(2x) + 1).
+//
+// ACCURACY:
+//
+// Relative error:
+// arithmetic domain # trials peak rms
+// IEEE -2,2 30000 2.5e-16 5.8e-17
+
+// tanh coefficients
+const TANH_P: [_]f64 = [
+ -9.64399179425052238628e-1,
+ -9.92877231001918586564e1,
+ -1.61468768441708447952e3,
+];
+const TANH_Q: [_]f64 = [
+ 1.12811678491632931402e2,
+ 2.23548839060100448583e3,
+ 4.84406305325125486048e3,
+];
+
+// Returns the hyperbolic tangent of x.
+export fn tanhf64(x: f64) f64 = {
+ const MAXLOG = 8.8029691931113054295988e+01; // log(2**127)
+ let z = absf64(x);
+ if (z > 0.5f64 * MAXLOG) {
+ if (x < 0f64) {
+ return -1f64;
+ };
+ return 1f64;
+ } else if (z >= 0.625f64) {
+ const s = expf64(2f64 * z);
+ z = 1f64 - 2f64 / (s + 1f64);
+ if (x < 0f64) {
+ z = -z;
+ };
+ } else {
+ if (x == 0f64) {
+ return x;
+ };
+ const s = x * x;
+ z = x + x * s * ((TANH_P[0] * s + TANH_P[1]) * s + TANH_P[2]) /
+ (((s + TANH_Q[0]) * s + TANH_Q[1]) * s + TANH_Q[2]);
+ };
+ return z;
+};
+
+@test fn tanh() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ tanhf64(TEST_INPUTS[idx]),
+ TEST_TANH[idx],
+ 1e-6f64));
+ };
+ assert(tanhf64(-INF) == -1f64);
+ assert(tanhf64(-0f64) == -0f64);
+ assert(tanhf64(0f64) == 0f64);
+ assert(tanhf64(INF) == 1f64);
+ assert(isnan(tanhf64(NAN)));
+};
+
+// asinh(x)
+// Method :
+// Based on
+// asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
+// we have
+// asinh(x) := x if 1+x*x=1,
+// := sign(x)*(log(x)+ln2)) for large |x|, else
+// := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
+// := sign(x)*log1p(|x| + x**2/(1 + sqrt(1+x**2)))
+//
+
+// Returns the inverse hyperbolic sine of x.
+export fn asinhf64(x: f64) f64 = {
+ const NEAR_ZERO = 1f64 / ((1i64 << 28): f64);
+ const LARGE = ((1i64 << 28): f64);
+
+ if (isnan(x) || isinf(x)) {
+ return x;
+ };
+
+ let is_negative = false;
+ if (x < 0f64) {
+ x = -x;
+ is_negative = true;
+ };
+
+ let temp = 0f64;
+
+ if (x > LARGE) {
+ // |x| > 2**28
+ temp = logf64(x) + LN_2;
+ } else if (x > 2f64) {
+ // 2**28 > |x| > 2.0
+ temp = logf64(2f64 * x +
+ 1f64 / (sqrtf64(x * x + 1f64) + x));
+ } else if (x < NEAR_ZERO) {
+ // |x| < 2**-28
+ temp = x;
+ } else {
+ // 2.0 > |x| > 2**-28
+ temp = log1pf64(x + x * x /
+ (1f64 + sqrtf64(1f64 + x * x)));
+ };
+ if (is_negative) {
+ temp = -temp;
+ };
+ return temp;
+};
+
+@test fn asinh() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ asinhf64(TEST_INPUTS[idx]),
+ TEST_ASINH[idx],
+ 1e-6f64));
+ };
+ assert(asinhf64(-INF) == -INF);
+ assert(asinhf64(-0f64) == -0f64);
+ assert(asinhf64(0f64) == 0f64);
+ assert(asinhf64(INF) == INF);
+ assert(isnan(asinhf64(NAN)));
+};
+
+// __ieee754_acosh(x)
+// Method :
+// Based on
+// acosh(x) = log [ x + sqrt(x*x-1) ]
+// we have
+// acosh(x) := log(x)+ln2, if x is large; else
+// acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
+// acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
+//
+// Special cases:
+// acosh(x) is NaN with signal if x<1.
+// acosh(NaN) is NaN without signal.
+//
+
+// Returns the inverse hyperbolic cosine of x.
+export fn acoshf64(x: f64) f64 = {
+ const LARGE = ((1i64 << 28): f64);
+
+ if (x < 1f64 || isnan(x)) {
+ return NAN;
+ } else if (x == 1f64) {
+ return 0f64;
+ } else if (x >= LARGE) {
+ // x > 2**28
+ return logf64(x) + LN_2;
+ } else if (x > 2f64) {
+ // 2**28 > x > 2
+ return logf64(2f64 * x - 1f64 /
+ (x + sqrtf64(x * x - 1f64)));
+ };
+ const t = x - 1f64;
+ // 2 >= x > 1
+ return log1pf64(t + sqrtf64(2f64 * t + t * t));
+};
+
+@test fn acosh() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(eqwithin(
+ acoshf64(1f64 + absf64(TEST_INPUTS[idx])),
+ TEST_ACOSH[idx],
+ 1e-6f64));
+ };
+ assert(isnan(acoshf64(-INF)));
+ assert(isnan(acoshf64(0.5f64)));
+ assert(acoshf64(1f64) == 0f64);
+ assert(acoshf64(INF) == INF);
+ assert(isnan(acoshf64(NAN)));
+};
+
+// __ieee754_atanh(x)
+// Method :
+// 1. Reduce x to positive by atanh(-x) = -atanh(x)
+// 2. For x>=0.5
+// 1 2x x
+// atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
+// 2 1 - x 1 - x
+//
+// For x<0.5
+// atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
+//
+// Special cases:
+// atanh(x) is NaN if |x| > 1 with signal;
+// atanh(NaN) is that NaN with no signal;
+// atanh(+-1) is +-INF with signal.
+//
+
+// Returns the inverse hyperbolic tangent of x.
+export fn atanhf64(x: f64) f64 = {
+ const NEAR_ZERO = 1f64 / ((1i64 << 28): f64);
+
+ if (x < -1f64 || x > 1.064) {
+ return NAN;
+ } else if (isnan(x)) {
+ return NAN;
+ } else if (x == 1f64) {
+ return INF;
+ } else if (x == -1f64) {
+ return -INF;
+ };
+
+ let is_negative = false;
+
+ if (x < 0f64) {
+ x = -x;
+ is_negative = true;
+ };
+
+ let temp = 0f64;
+
+ if (x < NEAR_ZERO) {
+ temp = x;
+ } else if (x < 0.5f64) {
+ temp = x + x;
+ temp = 0.5f64 * log1pf64(temp + temp * x / (1f64 - x));
+ } else {
+ temp = 0.5f64 * log1pf64((x + x) / (1f64 - x));
+ };
+ if (is_negative) {
+ temp = -temp;
+ };
+ return temp;
+};
+
+@test fn atanh() void = {
+ for (let idx = 0z; idx < 10; idx += 1) {
+ assert(isclose(
+ atanhf64(TEST_INPUTS[idx] / 10f64),
+ TEST_ATANH[idx]));
+ };
+ assert(isnan(atanhf64(-INF)));
+ assert(isnan(atanhf64(-PI)));
+ assert(atanhf64(-1f64) == -INF);
+ assert(atanhf64(-0f64) == -0f64);
+ assert(atanhf64(0f64) == 0f64);
+ assert(atanhf64(1f64) == INF);
+ assert(isnan(atanhf64(PI)));
+ assert(isnan(atanhf64(INF)));
+ assert(isnan(atanhf64(NAN)));
+};
diff --git a/math/uints.ha b/math/uints.ha
@@ -0,0 +1,543 @@
+// Sections of the code below are based on Go's implementation, in particular
+// https://raw.githubusercontent.com/golang/go/master/src/math/bits/bits.go.
+//
+// The Go copyright notice:
+// ====================================================
+// Copyright (c) 2009 The Go Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ====================================================
+
+// The number of bits required to represent the first 256 numbers.
+const LEN8TAB: [256]u8 = [
+ 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04,
+ 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08,
+];
+
+// The size of an unsigned int: 32 or 64
+def UINT_SIZE: u8 = (size(uint): u8) * 8;
+
+// Returns the minimum number of bits required to represent x.
+export fn bit_size_u8(x: u8) u8 = {
+ return LEN8TAB[x];
+};
+
+// Returns the minimum number of bits required to represent x.
+export fn bit_size_u16(x: u16) u8 = {
+ let res = 0u8;
+ if (x >= 1u16 << 8) {
+ x >>= 8;
+ res += 8;
+ };
+ return res + LEN8TAB[x];
+};
+
+// Returns the minimum number of bits required to represent x.
+export fn bit_size_u32(x: u32) u8 = {
+ let res = 0u8;
+ if (x >= 1u32 << 16) {
+ x >>= 16;
+ res += 16;
+ };
+ if (x >= 1u32 << 8) {
+ x >>= 8;
+ res += 8;
+ };
+ return res + LEN8TAB[x];
+};
+
+// Returns the minimum number of bits required to represent x.
+export fn bit_size_u64(x: u64) u8 = {
+ let res = 0u8;
+ if (x >= 1u64 << 32) {
+ x >>= 32;
+ res += 32;
+ };
+ if (x >= 1u64 << 16) {
+ x >>= 16;
+ res += 16;
+ };
+ if (x >= 1u64 << 8) {
+ x >>= 8;
+ res += 8;
+ };
+ return res + LEN8TAB[x];
+};
+
+// Returns the minimum number of bits required to represent x.
+export fn bit_size_u(x: uint) u8 = {
+ if (UINT_SIZE == 32) {
+ return bit_size_u32(x: u32);
+ };
+ return bit_size_u64(x: u64);
+};
+
+@test fn bit_size_u() void = {
+ assert(bit_size_u8(0) == 0);
+ assert(bit_size_u8(2) == 2);
+ assert(bit_size_u8(5) == 3);
+ assert(bit_size_u16(5) == 3);
+ assert(bit_size_u32(5) == 3);
+ assert(bit_size_u64(5) == 3);
+ assert(bit_size_u16(31111) == 15);
+ assert(bit_size_u32(536870911) == 29);
+ assert(bit_size_u64(8589934591) == 33);
+ assert(bit_size_u(0) == 0);
+ assert(bit_size_u(1) == 1);
+};
+
+
+// Returns the number of leading zero bits in x
+// The result is UINT_SIZE for x == 0.
+export fn leading_zeros_u(x: uint) u8 = UINT_SIZE - bit_size_u(x);
+
+// Returns the number of leading zero bits in x
+// The result is 8 for x == 0.
+export fn leading_zeros_u8(x: u8) u8 = 8 - bit_size_u8(x);
+
+// Returns the number of leading zero bits in x
+// The result is 16 for x == 0.
+export fn leading_zeros_u16(x: u16) u8 = 16 - bit_size_u16(x);
+
+// Returns the number of leading zero bits in x
+// The result is 32 for x == 0.
+export fn leading_zeros_u32(x: u32) u8 = 32 - bit_size_u32(x);
+
+// Returns the number of leading zero bits in x
+// The result is 64 for x == 0.
+export fn leading_zeros_u64(x: u64) u8 = 64 - bit_size_u64(x);
+
+@test fn leading_zeros_u() void = {
+ assert(leading_zeros_u(0) == UINT_SIZE);
+ assert(leading_zeros_u(1) == UINT_SIZE - 1);
+ assert(leading_zeros_u8(0) == 8);
+ assert(leading_zeros_u8(1) == 8 - 1);
+ assert(leading_zeros_u16(0) == 16);
+ assert(leading_zeros_u16(1) == 16 - 1);
+ assert(leading_zeros_u32(0) == 32);
+ assert(leading_zeros_u32(1) == 32 - 1);
+ assert(leading_zeros_u64(0) == 64);
+ assert(leading_zeros_u64(1) == 64 - 1);
+};
+
+// Returns the sum with carry of x, y and carry: sum = x + y + carry.
+// The carry input must be 0 or 1, otherwise the behavior is undefined.
+// The carry_out output is guaranteed to be 0 or 1.
+export fn addu32(x: u32, y: u32, carry: u32) (u32, u32) = {
+ const sum64 = (x: u64) + (y: u64) + (carry: u64);
+ const sum = (sum64: u32);
+ const carry_out = ((sum64 >> 32): u32);
+ return (sum, carry_out);
+};
+
+// Returns the sum with carry of x, y and carry: sum = x + y + carry.
+// The carry input must be 0 or 1, otherwise the behavior is undefined.
+// The carry_out output is guaranteed to be 0 or 1.
+export fn addu64(x: u64, y: u64, carry: u64) (u64, u64) = {
+ const sum = x + y + carry;
+ // The sum will overflow if both top bits are set (x & y) or if one of
+ // them is (x | y), and a carry from the lower place happened. If such a
+ // carry happens, the top bit will be 1 + 0 + 1 = 0 (& ~sum).
+ const carry_out = ((x & y) | ((x | y) & ~sum)) >> 63;
+ return (sum, carry_out);
+};
+
+// Calls either addu32() or addu64() depending on UINT_SIZE.
+export fn addu(x: uint, y: uint, carry: uint) (uint, uint) = {
+ if (UINT_SIZE == 32) {
+ const res = addu32((x: u32), (y: u32), (carry: u32));
+ return ((res.0: uint), (res.1: uint));
+ };
+ const res = addu64((x: u64), (y: u64), (carry: u64));
+ return ((res.0: uint), (res.1: uint));
+};
+
+@test fn addu() void = {
+ // 32
+ let res = addu32(2u32, 2u32, 0u32);
+ assert(res.0 == 4u32);
+ assert(res.1 == 0u32);
+ let res = addu32(2u32, 2u32, 1u32);
+ assert(res.0 == 5u32);
+ assert(res.1 == 0u32);
+ let res = addu32(~0u32, 0u32, 0u32);
+ assert(res.0 == ~0u32);
+ assert(res.1 == 0u32);
+ let res = addu32(~0u32, 1u32, 0u32);
+ assert(res.0 == 0u32);
+ assert(res.1 == 1u32);
+
+ // 64
+ let res = addu64(2u64, 2u64, 0u64);
+ assert(res.0 == 4u64);
+ assert(res.1 == 0u64);
+ let res = addu64(2u64, 2u64, 1u64);
+ assert(res.0 == 5u64);
+ assert(res.1 == 0u64);
+ let res = addu64(~0u64, 0u64, 0u64);
+ assert(res.0 == ~0u64);
+ assert(res.1 == 0u64);
+ let res = addu64(~0u64, 1u64, 0u64);
+ assert(res.0 == 0u64);
+ assert(res.1 == 1u64);
+
+ // addu()
+ let res = addu(2u, 2u, 0u);
+ assert(res.0 == 4u);
+ assert(res.1 == 0u);
+ let res = addu(2u, 2u, 1u);
+ assert(res.0 == 5u);
+ assert(res.1 == 0u);
+};
+
+// Returns the difference of x, y and borrow, diff = x - y - borrow.
+// The borrow input must be 0 or 1, otherwise the behavior is undefined.
+// The borrow_out output is guaranteed to be 0 or 1.
+export fn subu32(x: u32, y: u32, borrow: u32) (u32, u32) = {
+ const diff = x - y - borrow;
+ // The difference will underflow if the top bit of x is not set and the
+ // top bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a
+ // borrow from the lower place happens. If that borrow happens, the
+ // result will be 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff).
+ const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 31;
+ return (diff, borrow_out);
+};
+
+// Returns the difference of x, y and borrow, diff = x - y - borrow.
+// The borrow input must be 0 or 1, otherwise the behavior is undefined.
+// The borrow_out output is guaranteed to be 0 or 1.
+export fn subu64(x: u64, y: u64, borrow: u64) (u64, u64) = {
+ const diff = x - y - borrow;
+ // See subu32 for the bit logic.
+ const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 63;
+ return (diff, borrow_out);
+};
+
+// Calls either mulu32() or mulu64() depending on UINT_SIZE.
+export fn subu(x: uint, y: uint, carry: uint) (uint, uint) = {
+ if (UINT_SIZE == 32) {
+ const res = subu32((x: u32), (y: u32), (carry: u32));
+ return ((res.0: uint), (res.1: uint));
+ };
+ const res = subu64((x: u64), (y: u64), (carry: u64));
+ return ((res.0: uint), (res.1: uint));
+};
+
+@test fn subu() void = {
+ // 32
+ let res = subu32(4u32, 2u32, 0u32);
+ assert(res.0 == 2u32);
+ assert(res.1 == 0u32);
+ let res = subu32(4u32, 2u32, 1u32);
+ assert(res.0 == 1u32);
+ assert(res.1 == 0u32);
+ let res = subu32(0u32, 0u32, 0u32);
+ assert(res.0 == 0u32);
+ assert(res.1 == 0u32);
+ let res = subu32(0u32, 1u32, 0u32);
+ assert(res.0 == ~0u32);
+ assert(res.1 == 1u32);
+
+ // 64
+ let res = subu64(4u64, 2u64, 0u64);
+ assert(res.0 == 2u64);
+ assert(res.1 == 0u64);
+ let res = subu64(4u64, 2u64, 1u64);
+ assert(res.0 == 1u64);
+ assert(res.1 == 0u64);
+ let res = subu64(0u64, 0u64, 0u64);
+ assert(res.0 == 0u64);
+ assert(res.1 == 0u64);
+ let res = subu64(0u64, 1u64, 0u64);
+ assert(res.0 == ~0u64);
+ assert(res.1 == 1u64);
+
+ // subu()
+ let res = subu(4u, 2u, 0u);
+ assert(res.0 == 2u);
+ assert(res.1 == 0u);
+ let res = subu(4u, 2u, 1u);
+ assert(res.0 == 1u);
+ assert(res.1 == 0u);
+};
+
+// Returns the 64-bit product of x and y: (hi, lo) = x * y
+// with the product bits' upper half returned in hi and the lower
+// half returned in lo.
+export fn mulu32(x: u32, y: u32) (u32, u32) = {
+ const product = (x: u64) * (y: u64);
+ const hi = ((product >> 32): u32);
+ const lo = (product: u32);
+ return (hi, lo);
+};
+
+// Returns the 128-bit product of x and y: (hi, lo) = x * y
+// with the product bits' upper half returned in hi and the lower
+// half returned in lo.
+export fn mulu64(x: u64, y: u64) (u64, u64) = {
+ const mask32 = (1u64 << 32) - 1;
+ const x0 = x & mask32;
+ const x1 = x >> 32;
+ const y0 = y & mask32;
+ const y1 = y >> 32;
+ const w0 = x0 * y0;
+ const t = (x1 * y0) + (w0 >> 32);
+ let w1 = t & mask32;
+ const w2 = t >> 32;
+ w1 += x0 * y1;
+ const hi = (x1 * y1) + w2 + (w1 >> 32);
+ const lo = x * y;
+ return (hi, lo);
+};
+
+// Calls either mulu32() or mulu64() depending on UINT_SIZE.
+export fn mulu(x: uint, y: uint) (uint, uint) = {
+ if (UINT_SIZE == 32) {
+ const res = mulu32((x: u32), (y: u32));
+ return ((res.0: uint), (res.1: uint));
+ };
+ const res = mulu64((x: u64), (y: u64));
+ return ((res.0: uint), (res.1: uint));
+};
+
+@test fn mulu() void = {
+ // 32
+ let res = mulu32(2u32, 3u32);
+ assert(res.0 == 0u32);
+ assert(res.1 == 6u32);
+ let res = mulu32(~0u32, 2u32);
+ assert(res.0 == 1u32);
+ assert(res.1 == ~0u32 - 1);
+
+ // 64
+ let res = mulu64(2u64, 3u64);
+ assert(res.0 == 0u64);
+ assert(res.1 == 6u64);
+ let res = mulu64(~0u64, 2u64);
+ assert(res.0 == 1u64);
+ assert(res.1 == ~0u64 - 1);
+
+ // mulu()
+ let res = mulu(2u, 3u);
+ assert(res.0 == 0u);
+ assert(res.1 == 6u);
+ let res = mulu(~0u, 2u);
+ assert(res.0 == 1u);
+ assert(res.1 == ~0u - 1);
+};
+
+// Returns the quotient and remainder of (hi, lo) divided by y:
+// quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper
+// half in parameter hi and the lower half in parameter lo.
+// Panics for y == 0 (division by zero) or y <= hi (quotient overflow).
+export fn divu32(hi: u32, lo: u32, y: u32) (u32, u32) = {
+ assert(y != 0);
+ assert(y > hi);
+ const z = (hi: u64) << 32 | (lo: u64);
+ const quo = ((z / (y: u64)): u32);
+ const rem = ((z % (y: u64)): u32);
+ return (quo, rem);
+};
+
+// Returns the quotient and remainder of (hi, lo) divided by y:
+// quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper
+// half in parameter hi and the lower half in parameter lo.
+// Panics for y == 0 (division by zero) or y <= hi (quotient overflow).
+export fn divu64(hi: u64, lo: u64, y: u64) (u64, u64) = {
+ const two32 = 1u64 << 32;
+ const mask32 = two32 - 1;
+ assert(y != 0);
+ assert(y > hi);
+
+ const s = leading_zeros_u64(y);
+ y <<= s;
+
+ const yn1 = y >> 32;
+ const yn0 = y & mask32;
+ const un32 = (hi << s) | (lo >> (64 - s));
+ const un10 = lo << s;
+ const un1 = un10 >> 32;
+ const un0 = un10 & mask32;
+ let q1 = un32 / yn1;
+ let rhat = un32 - (q1 * yn1);
+
+ for (q1 >= two32 || (q1 * yn0) > ((two32 * rhat) + un1)) {
+ q1 -= 1;
+ rhat += yn1;
+ if (rhat >= two32) {
+ break;
+ };
+ };
+
+ const un21 = (un32 * two32) + un1 - (q1 * y);
+ let q0 = un21 / yn1;
+ rhat = un21 - (q0 * yn1);
+
+ for (q0 >= two32 || (q0 * yn0) > ((two32 * rhat) + un0)) {
+ q0 -= 1;
+ rhat += yn1;
+ if (rhat >= two32) {
+ break;
+ };
+ };
+
+ const quo = (q1 * two32) + q0;
+ const rem = ((un21 * two32) + un0 - (q0 * y)) >> s;
+ return (quo, rem);
+};
+
+// Calls either divu32() or divu64() depending on UINT_SIZE.
+export fn divu(hi: uint, lo: uint, y: uint) (uint, uint) = {
+ if (UINT_SIZE == 32) {
+ const res = divu32((hi: u32), (lo: u32), (y: u32));
+ return ((res.0: uint), (res.1: uint));
+ };
+ const res = divu64((hi: u64), (lo: u64), (y: u64));
+ return ((res.0: uint), (res.1: uint));
+};
+
+@test fn divu() void = {
+ // 32
+ let res = divu32(0u32, 4u32, 2u32);
+ assert(res.0 == 2u32);
+ assert(res.1 == 0u32);
+ let res = divu32(0u32, 5u32, 2u32);
+ assert(res.0 == 2u32);
+ assert(res.1 == 1u32);
+ let res = divu32(1u32, 0u32, 2u32);
+ assert(res.0 == (1u32 << 31));
+ assert(res.1 == 0u32);
+ // These should panic.
+ // let res = divu32(1u32, 1u32, 0u32);
+ // let res = divu32(1u32, 0u32, 1u32);
+
+ // 64
+ let res = divu64(0u64, 4u64, 2u64);
+ assert(res.0 == 2u64);
+ assert(res.1 == 0u64);
+ let res = divu64(0u64, 5u64, 2u64);
+ assert(res.0 == 2u64);
+ assert(res.1 == 1u64);
+ let res = divu64(1u64, 0u64, 2u64);
+ assert(res.0 == (1u64 << 63));
+ assert(res.1 == 0u64);
+ // These should panic.
+ // let res = divu64(1u64, 1u64, 0u64);
+ // let res = divu64(1u64, 0u64, 1u64);
+
+ // divu()
+ let res = divu(0u, 4u, 2u);
+ assert(res.0 == 2u);
+ assert(res.1 == 0u);
+ let res = divu(0u, 5u, 2u);
+ assert(res.0 == 2u);
+ assert(res.1 == 1u);
+ let res = divu(1u, 0u, 2u);
+ assert(res.0 == (1u << 31));
+ assert(res.1 == 0u);
+ // These should panic.
+ // divu(1u, 1u, 0u);
+ // divu(1u, 0u, 1u);
+};
+
+// Returns the remainder of (hi, lo) divided by y.
+// Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on
+// a quotient overflow.
+export fn remu32(hi: u32, lo: u32, y: u32) u32 = {
+ assert(y != 0);
+ const res = ((hi: u64) << 32 | (lo: u64)) % (y: u64);
+ return (res: u32);
+};
+
+// Returns the remainder of (hi, lo) divided by y.
+// Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on
+// a quotient overflow.
+export fn remu64(hi: u64, lo: u64, y: u64) u64 = {
+ assert(y != 0);
+ // We scale down hi so that hi < y, then use divu() to compute the
+ // rem with the guarantee that it won't panic on quotient overflow.
+ // Given that
+ // hi ≡ hi%y (mod y)
+ // we have
+ // hi<<64 + lo ≡ (hi%y)<<64 + lo (mod y)
+ const res = divu64(hi % y, lo, y);
+ return res.1;
+};
+
+// Calls either remu32() or remu64() depending on UINT_SIZE.
+export fn remu(hi: uint, lo: uint, y: uint) uint = {
+ if (UINT_SIZE == 32) {
+ return (remu32((hi: u32), (lo: u32), (y: u32)): uint);
+ };
+ return (remu64((hi: u64), (lo: u64), (y: u64)): uint);
+};
+
+@test fn remu() void = {
+ // 32
+ assert(remu32(0u32, 4u32, 2u32) == 0u32);
+ assert(remu32(0u32, 5u32, 2u32) == 1u32);
+ assert(remu32(0u32, 5u32, 3u32) == 2u32);
+ assert(remu32(1u32, 1u32, 2u32) == 1u32);
+ // These should panic.
+ // remu32(0u32, 4u32, 0u32);
+
+ // 64
+ assert(remu64(0u64, 4u64, 2u64) == 0u64);
+ assert(remu64(0u64, 5u64, 2u64) == 1u64);
+ assert(remu64(0u64, 5u64, 3u64) == 2u64);
+ assert(remu64(1u32, 1u32, 2u32) == 1u32);
+ // These should panic.
+ // remu64(0u64, 4u64, 0u64);
+
+ // remu()
+ assert(remu(0u, 4u, 2u) == 0u);
+ assert(remu(0u, 5u, 2u) == 1u);
+ assert(remu(0u, 5u, 3u) == 2u);
+ assert(remu(1u, 1u, 2u) == 1u);
+ // These should panic.
+ // remu(0u, 4u, 0u);
+};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -569,7 +569,7 @@ linux_vdso() {
math() {
printf '# math\n'
gen_srcs math \
- math.ha floats.ha ints.ha
+ testdata.ha math.ha floats.ha ints.ha uints.ha trig.ha
gen_ssa math types
}
diff --git a/stdlib.mk b/stdlib.mk
@@ -852,9 +852,12 @@ $(HARECACHE)/linux/vdso/linux_vdso.ssa: $(stdlib_linux_vdso_srcs) $(stdlib_rt) $
# math
# math
stdlib_math_srcs= \
+ $(STDLIB)/math/testdata.ha \
$(STDLIB)/math/math.ha \
$(STDLIB)/math/floats.ha \
- $(STDLIB)/math/ints.ha
+ $(STDLIB)/math/ints.ha \
+ $(STDLIB)/math/uints.ha \
+ $(STDLIB)/math/trig.ha
$(HARECACHE)/math/math.ssa: $(stdlib_math_srcs) $(stdlib_rt) $(stdlib_types)
@printf 'HAREC \t$@\n'
@@ -2080,9 +2083,12 @@ $(TESTCACHE)/linux/vdso/linux_vdso.ssa: $(testlib_linux_vdso_srcs) $(testlib_rt)
# math
# math
testlib_math_srcs= \
+ $(STDLIB)/math/testdata.ha \
$(STDLIB)/math/math.ha \
$(STDLIB)/math/floats.ha \
- $(STDLIB)/math/ints.ha
+ $(STDLIB)/math/ints.ha \
+ $(STDLIB)/math/uints.ha \
+ $(STDLIB)/math/trig.ha
$(TESTCACHE)/math/math.ssa: $(testlib_math_srcs) $(testlib_rt) $(testlib_types)
@printf 'HAREC \t$@\n'
diff --git a/strconv/ftos.ha b/strconv/ftos.ha
@@ -368,7 +368,7 @@ fn f64todecf64(mantissa: u64, exponent: u32) decf64 = {
// round to even
last_removed_digit = 4;
};
- output = vr + ibool((vr == vm &&
+ output = vr + ibool((vr == vm &&
(!accept_bounds || !vm_trailing_zeros)) ||
last_removed_digit >= 5);
} else {
@@ -451,7 +451,7 @@ fn f32todecf32(mantissa: u32, exponent: u32) decf32 = {
vp = mulpow5_divpow2(mp, i, j);
vm = mulpow5_divpow2(mm, i, j);
if (q != 0 && (vp - 1) / 10 <= vm / 10) {
- j = (q: i32) - 1 - (pow5bits(i + 1): i32 -
+ j = (q: i32) - 1 - (pow5bits(i + 1): i32 -
F32_POW5_BITCOUNT: i32);
last_removed_digit = (mulpow5_divpow2(mv,
(i + 1), j) % 10): u8;
@@ -650,7 +650,7 @@ export fn f32tos(n: f32) const str = {
// The biggest string produced by a f32 number in base 10 would have the
// negative sign, followed by a digit and decimal point, and then seven
// more decimal digits, followed by 'e' and another negative sign and
- // the maximum of two digits for exponent.
+ // the maximum of two digits for exponent.
// (1 + 1 + 1 + 7 + 1 + 1 + 2) = 14
static let buf: [16]u8 = [0...];
const bits = math::f32bits(n);
diff --git a/strconv/stof.ha b/strconv/stof.ha
@@ -146,7 +146,7 @@ fn decimal_shift(d: *decimal, k: int) void = {
fn should_round_up(d: *decimal, nd: uint) bool = if (nd < d.num_digits) {
if (d.digits[nd] == 5 && nd + 1 == d.num_digits) {
return d.truncated ||
- (nd > 0 && d.digits[nd - 1] & 1 != 0);
+ (nd > 0 && d.digits[nd - 1] & 1 != 0);
} else return d.digits[nd] >= 5;
} else false;
@@ -436,7 +436,7 @@ fn floatbits(d: *decimal, f: *math::floatinfo) (u64 | overflow) = {
};
for (d.decimal_point <= 0) {
const n: int = if (d.decimal_point == 0) {
- if (d.digits[0] >= 5) break;
+ if (d.digits[0] >= 5) break;
if (d.digits[0] < 2) 2 else 1;
} else if (-d.decimal_point >= len(powtab): i32)
maxshift: int