commit cdcb4e915b5f2b0c02221b1c59a6439f368c58db
parent 834cae484694eb24f095ade777fd50996471da8d
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date: Wed, 14 Jul 2021 18:48:24 +0200
Add a variety of float functions to math/floats.ha
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
Diffstat:
M | math/floats.ha | | | 444 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- |
1 file changed, 437 insertions(+), 7 deletions(-)
diff --git a/math/floats.ha b/math/floats.ha
@@ -1,3 +1,5 @@
+use types;
+
// The floating point value representing Not a Number, i.e. an undefined or
// unrepresentable value. You cannot test if a number is NaN by comparing to
// this value; see [[isnan]] instead.
@@ -7,8 +9,19 @@ export def NAN: f32 = 0.0 / 0.0;
// negative infinity.
export def INF: f32 = 1.0 / 0.0;
+// Returns true if the given f64 is NaN.
+export fn isnanf64(n: f64) bool = n != n;
+
+// Returns true if the given f32 is NaN.
+export fn isnanf32(n: f32) bool = n != n;
+
// Returns true if the given floating-point number is NaN.
-export fn isnan(n: f64) bool = n != n;
+export fn isnan(n: types::floating) bool = {
+ return match (n) {
+ f: f64 => isnanf64(f),
+ f: f32 => isnanf32(f),
+ };
+};
@test fn isnan() void = {
assert(isnan(NAN));
@@ -35,7 +48,7 @@ export fn isinf(n: f64) bool = {
};
// Returns true if the given floating-point number is normal.
-export fn isnormal(n: (f32 | f64)) bool = {
+export fn isnormal(n: types::floating) bool = {
return match (n) {
n: f32 => isnormalf32(n),
n: f64 => isnormalf64(n),
@@ -59,7 +72,7 @@ export fn isnormalf64(n: f64) bool = {
};
// Returns true if the given floating-point number is subnormal.
-export fn issubnormal(n: (f32 | f64)) bool = {
+export fn issubnormal(n: types::floating) bool = {
return match (n) {
n: f32 => issubnormalf32(n),
n: f64 => issubnormalf64(n),
@@ -171,11 +184,17 @@ export def F32_EXPONENT_BITS: u64 = 8;
// from the exponent in the binary representation to get the actual exponent.
export def F32_EXPONENT_BIAS: u16 = 127;
-def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1;
-def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1;
+// Mask with each bit of an f64's mantissa set.
+export def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1;
-def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1;
-def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1;
+// Mask with each bit of an f64's exponent set.
+export def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1;
+
+// Mask with each bit of an f32's mantissa set.
+export def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1;
+
+// Mask with each bit of an f32's exponent set.
+export def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1;
// The largest representable f64 value which is less than Infinity.
export def F64_MAX_NORMAL: f64 = 1.7976931348623157e+308;
@@ -195,6 +214,29 @@ export def F32_MIN_NORMAL: f32 = 1.1754944e-38;
// The smallest (subnormal) f32 value greater than zero.
export def F32_MIN: f32 = 1.0e-45;
+// The mask that gets an f64's sign.
+def F64_SIGN_MASK: u64 = 1u64 << 63;
+
+// The mask that sets all exponent bits to 0.
+// NOTE: Replace with the following expression once the lexer supports it
+// 0u64 & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS);
+def F64_EXP_REMOVAL_MASK: u64 =
+ 0b1000000000001111111111111111111111111111111111111111111111111111;
+
+// The f64 that contains only an exponent that evaluates to zero.
+def F64_EXP_ZERO: u64 = ((F64_EXPONENT_BIAS: u64) - 1) << F64_MANTISSA_BITS;
+
+// The mask that gets an f32's sign.
+def F32_SIGN_MASK: u32 = 1u32 << 31;
+
+// The mask that sets all exponent bits to 0.
+// NOTE: Replace with the following expression once the lexer supports it
+// 0u32 & ~(F32_EXPONENT_MASK << F32_MANTISSA_BITS);
+def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111;
+
+// The f32 that contains only an exponent that evaluates to zero.
+def F32_EXP_ZERO: u32 = ((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32);
+
// Contains information about the structure of a specific floating point number
// type.
export type floatinfo = struct {
@@ -228,3 +270,391 @@ export const f32info: floatinfo = floatinfo {
expmask = (1 << 8) - 1,
};
+// Returns the absolute value of n.
+export fn absf64(n: f64) f64 = {
+ return f64frombits(f64bits(n) & ~F64_SIGN_MASK);
+};
+
+// Returns the absolute value of n.
+export fn absf32(n: f32) f32 = {
+ return f32frombits(f32bits(n) & ~F32_SIGN_MASK);
+};
+
+// Returns the absolute value of n.
+export fn abs(n: types::floating) f64 = {
+ return match (n) {
+ n: f64 => absf64(n),
+ n: f32 => (absf32(n): f64),
+ };
+};
+
+@test fn test_abs() void = {
+ assert(absf64(2.0f64) == 2.0f64);
+ assert(absf32(2.0f32) == 2.0f32);
+ assert(abs(2.0f64) == 2.0f64);
+ assert(abs(2.0f32) == 2.0f64);
+ assert(abs(-2.0f64) == 2.0f64);
+ assert(abs(-2.0f32) == 2.0f32);
+ assert(abs(0f32) == 0f32);
+ assert(abs(0f64) == 0f64);
+};
+
+// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
+export fn signf64(x: f64) i64 = {
+ if (f64bits(x) & F64_SIGN_MASK == 0) {
+ return 1i64;
+ } else {
+ return -1i64;
+ };
+};
+
+// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
+export fn signf32(x: f32) i64 = {
+ if (f32bits(x) & F32_SIGN_MASK == 0) {
+ return 1i64;
+ } else {
+ return -1i64;
+ };
+};
+
+// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
+export fn sign(x: types::floating) i64 = {
+ return match (x) {
+ n: f64 => signf64(n),
+ n: f32 => signf32(n),
+ };
+};
+
+@test fn test_sign() void = {
+ assert(sign(0.0f64) == 1i64);
+ assert(sign(-0.0f64) == -1i64);
+ assert(sign(0.0f32) == 1i64);
+ assert(sign(-0.0f32) == -1i64);
+ assert(sign(1.5f64) == 1i64);
+ assert(sign(-1.5f64) == -1i64);
+};
+
+// Returns x, but with the sign of y.
+export fn copysignf64(x: f64, y: f64) f64 = {
+ return f64frombits((f64bits(x) & ~F64_SIGN_MASK) |
+ (f64bits(y) & F64_SIGN_MASK));
+};
+
+// Returns x, but with the sign of y.
+export fn copysignf32(x: f32, y: f32) f32 = {
+ return f32frombits((f32bits(x) & ~F32_SIGN_MASK) |
+ (f32bits(y) & F32_SIGN_MASK));
+};
+
+// Returns x, but with the sign of y.
+export fn copysign(x: types::floating, y: types::floating) f64 = {
+ return match (x) {
+ n: f64 => copysignf64(n, (y: f64)),
+ n: f32 => (copysignf32(n, (y: f32)): f64),
+ };
+};
+
+@test fn test_copysign() void = {
+ assert(copysign(100.0f64, 1.0f64) == 100.0f64);
+ assert(copysign(100.0f64, -1.0f64) == -100.0f64);
+ assert(copysign(100.0f32, 1.0f32) == 100.0f32);
+ assert(copysign(100.0f32, -1.0f32) == -100.0f32);
+ assert(copysign(100.0f64, 0.0f64) == 100.0f64);
+ assert(copysign(100.0f64, -0.0f64) == -100.0f64);
+ assert(copysign(0.0f64, 100.0f64) == 0.0f64);
+ assert(sign(copysign(0.0f64, 100.0f64)) > 0);
+ assert(copysign(0.0f64, -100.0f64) == 0.0f64);
+ assert(sign(copysign(0.0f64, -100.0f64)) < 0);
+};
+
+// Takes a potentially subnormal f64 n and returns a normal f64 normal_float
+// and an exponent exp such that n == normal_float * 2^{exp}.
+export fn normalizef64(n: f64) (f64, i64) = {
+ if (issubnormalf64(n)) {
+ const factor = 1i64 << (F64_MANTISSA_BITS: i64);
+ const normal_float = (n * (factor: f64));
+ return (normal_float, -(F64_MANTISSA_BITS: i64));
+ };
+ return (n, 0);
+};
+
+// Takes a potentially subnormal f32 n and returns a normal f32 normal_float
+// and an exponent exp such that n == normal_float * 2^{exp}.
+export fn normalizef32(n: f32) (f64, i64) = {
+ if (issubnormalf32(n)) {
+ const factor = 1i32 << (F32_MANTISSA_BITS: i32);
+ const normal_float = ((n * (factor: f64)): f64);
+ return (normal_float, -(F32_MANTISSA_BITS: i64));
+ };
+ return (n, 0);
+};
+
+@test fn test_normalize() void = {
+ let res = normalizef64(5.0e-320);
+ assert(res.0 > F64_MIN_NORMAL);
+ assert(res.1 < 0i64);
+ res = normalizef64(5.0e-300);
+ assert(res.0 == 5.0e-300);
+ assert(res.1 == 0i64);
+};
+
+// Breaks a f64 down into its mantissa and exponent. The mantissa will be between 0.5 and
+// 1.
+export fn frexpf64(n: f64) (f64, i64) = {
+ if (isnan(n) || isinf(n) || n == 0f64) {
+ return (n, 0);
+ };
+ const normalized = normalizef64(n);
+ const normal_float = normalized.0;
+ const normalization_exp = normalized.1;
+ const bits = f64bits(normal_float);
+ const raw_exp: u64 = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK;
+ const exp: i64 = normalization_exp +
+ (raw_exp: i64) - (F64_EXPONENT_BIAS: i64) + 1;
+ const mantissa: f64 = f64frombits((bits & F64_EXP_REMOVAL_MASK) | F64_EXP_ZERO);
+ return (mantissa, exp);
+};
+
+// Breaks a f32 down into its mantissa and exponent. The mantissa will be between 0.5 and
+// 1.
+export fn frexpf32(n: f32) (f64, i64) = {
+ if (isnan(n) || isinf(n) || n == 0f32) {
+ return (n, 0);
+ };
+ const normalized = normalizef32(n);
+ const normal_float = normalized.0;
+ const normalization_exp = normalized.1;
+ const bits = f32bits(normal_float);
+ const raw_exp: u64 = ((bits >> (F32_MANTISSA_BITS: u32)) &
+ (F32_EXPONENT_MASK: u32));
+ const exp: i64 = normalization_exp +
+ (raw_exp: i64) - (F32_EXPONENT_BIAS: i64) + 1;
+ const mantissa: f32 = f32frombits((bits & F32_EXP_REMOVAL_MASK) | F32_EXP_ZERO);
+ return (mantissa, exp);
+};
+
+// Breaks a float down into its mantissa and exponent. The mantissa will be between 0.5
+// and 1.
+export fn frexp(n: types::floating) (f64, i64) = {
+ return match (n) {
+ n: f64 => frexpf64(n),
+ n: f32 => frexpf32(n),
+ };
+};
+
+@test fn test_frexp() void = {
+ let res = frexp(3.0f64);
+ assert(res.0 == 0.75f64);
+ assert(res.1 == 2i64);
+ res = frexp(2.42f64);
+ assert(res.0 == 0.605f64);
+ assert(res.1 == 2i64);
+ res = frexp(NAN);
+ assert(res.1 == 0);
+ res = frexp(INF);
+ assert(res.1 == 0);
+};
+
+// Creates an f64 from a mantissa and an exponent.
+export fn ldexpf64(mantissa: f64, exp: i64) f64 = {
+ if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f64) {
+ return mantissa;
+ };
+ const normalized = normalizef64(mantissa);
+ const normal_float = normalized.0;
+ const normalization_exp = normalized.1;
+ const bits = f64bits(normal_float);
+ const mantissa_exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
+ (F64_EXPONENT_BIAS: i64);
+ let res_exp = exp + normalization_exp + mantissa_exp;
+ // Underflow
+ if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) {
+ return copysign(0.0f64, mantissa);
+ };
+ // Overflow
+ if (res_exp > (F64_EXPONENT_BIAS: i64)) {
+ if (mantissa < 0.0f64) {
+ return -INF;
+ } else {
+ return INF;
+ };
+ };
+ // Subnormal
+ let subnormal_factor = 1.0f64;
+ if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) {
+ res_exp += (F64_MANTISSA_BITS: i64) - 1;
+ subnormal_factor = 1.0f64 /
+ ((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64);
+ };
+ const res: u64 = (bits & F64_EXP_REMOVAL_MASK) |
+ (
+ ((res_exp: u64) + F64_EXPONENT_BIAS)
+ << F64_MANTISSA_BITS
+ );
+ return subnormal_factor * f64frombits(res);
+};
+
+// Creates an f32 from a mantissa and an exponent.
+export fn ldexpf32(mantissa: f32, exp: i64) f32 = {
+ if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f32) {
+ return mantissa;
+ };
+ const normalized = normalizef32(mantissa);
+ const normal_float = normalized.0;
+ const normalization_exp = normalized.1;
+ const bits = f32bits(normal_float);
+ const mantissa_exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
+ (F32_EXPONENT_BIAS: i32);
+ let res_exp = exp + normalization_exp + mantissa_exp;
+ // Underflow
+ if (res_exp < -(F32_EXPONENT_BIAS: i32) - (F32_MANTISSA_BITS: i32)) {
+ return copysign(0.0f32, mantissa);
+ };
+ // Overflow
+ if (res_exp > (F32_EXPONENT_BIAS: i32)) {
+ if (mantissa < 0.0f32) {
+ return -INF;
+ } else {
+ return INF;
+ };
+ };
+ // Subnormal
+ let subnormal_factor = 1.0f32;
+ if (res_exp < -(F32_EXPONENT_BIAS: i32) + 1) {
+ res_exp += (F32_MANTISSA_BITS: i32) - 1;
+ subnormal_factor = 1.0f32 /
+ ((1i32 << ((F32_MANTISSA_BITS: i32) - 1)): f32);
+ };
+ const res: u32 = (bits & F32_EXP_REMOVAL_MASK) |
+ (
+ ((res_exp: u32) + F32_EXPONENT_BIAS)
+ << (F32_MANTISSA_BITS: u32)
+ );
+ return subnormal_factor * f32frombits(res);
+};
+
+@test fn test_frexp_ldexp() void = {
+ const tests64: [_]f64 = [INF, -INF,
+ 0.0, 1.0, -1.0, 2.42, 123456789.0,
+ F64_MIN_NORMAL, F64_MAX_NORMAL,
+ 3.0e-310f64];
+ for (let i = 0z; i < len(tests64); i += 1) {
+ const parts = frexpf64(tests64[i]);
+ const res64 = ldexpf64(parts.0, parts.1);
+ assert(res64 == tests64[i]);
+ };
+ assert(ldexpf64(1.0f64, -1076i64) == 0.0f64);
+ assert(ldexpf64(-1.0f64, -1076i64) == -0.0f64);
+ assert(sign(ldexpf64(-1.0f64, -1076i64)) < 0);
+ assert(ldexpf64(2.0f64, 1024i64) == INF);
+ assert(ldexpf64(-2.0f64, 1024i64) == -INF);
+
+ const tests32: [_]f32 = [INF, -INF,
+ 0.0, 1.0, -1.0, 2.42, 123456789.0,
+ F32_MIN_NORMAL, F32_MAX_NORMAL,
+ 3.0e-39f32];
+ for (let i = 0z; i < len(tests32); i += 1) {
+ const parts = frexpf32(tests32[i]);
+ const res = ldexpf32(parts.0, parts.1);
+ assert(res == tests32[i]);
+ };
+ assert(ldexpf32(1.0f32, -1076i32) == 0.0f32);
+ assert(ldexpf32(-1.0f32, -1076i32) == -0.0f32);
+ assert(sign(ldexpf32(-1.0f32, -1076i32)) < 0);
+ assert(ldexpf32(2.0f32, 1024i32) == INF);
+ assert(ldexpf32(-2.0f32, 1024i32) == -INF);
+};
+
+// Returns the integer and fractional parts of an f64.
+export fn modf64(n: f64) (f64, f64) = {
+ if (n < 1.0f64) {
+ if (n < 0.0f64) {
+ let positive_parts = modf64(-n);
+ return (-positive_parts.0, -positive_parts.1);
+ };
+ if (n == 0.0f64) {
+ return (n, n);
+ };
+ return (0.0f64, n);
+ };
+ let bits = f64bits(n);
+ const exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
+ (F64_EXPONENT_BIAS: i64);
+ // For exponent exp, all integers can be represented with the top exp
+ // bits of the mantissa
+ const sign_and_exp_bits = 64u64 - (F64_EXPONENT_BITS: u64) - 1u64;
+ if (exp < (sign_and_exp_bits: i64)) {
+ const bits_to_shift = (((sign_and_exp_bits: i64) - exp): u64);
+ bits = bits & ~((1u64 << bits_to_shift) - 1);
+ };
+ const int_part = f64frombits(bits);
+ const frac_part = n - int_part;
+ return (int_part, frac_part);
+};
+
+// Returns the integer and fractional parts of an f32.
+export fn modf32(n: f32) (f32, f32) = {
+ if (n < 1.0f32) {
+ if (n < 0.0f32) {
+ let positive_parts = modf32(-n);
+ return (-positive_parts.0, -positive_parts.1);
+ };
+ if (n == 0.0f32) {
+ return (n, n);
+ };
+ return (0.0f32, n);
+ };
+ let bits = f32bits(n);
+ const exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
+ (F32_EXPONENT_BIAS: i32);
+ // For exponent exp, all integers can be represented with the top exp
+ // bits of the mantissa
+ const sign_and_exp_bits = 32u32 - (F32_EXPONENT_BITS: u32) - 1u32;
+ if (exp < (sign_and_exp_bits: i32)) {
+ const bits_to_shift = (((sign_and_exp_bits: i32) - exp): u32);
+ bits = bits & ~((1u32 << bits_to_shift) - 1);
+ };
+ const int_part = f32frombits(bits);
+ const frac_part = n - int_part;
+ return (int_part, frac_part);
+};
+
+@test fn test_modf() void = {
+ // 64
+ let res = modf64(1.75f64);
+ assert(res.0 == 1.0f64);
+ assert(res.1 == 0.75f64);
+ res = modf64(0.75f64);
+ assert(res.0 == 0.0f64);
+ assert(res.1 == 0.75f64);
+ res = modf64(-0.75f64);
+ assert(res.0 == -0.0f64);
+ assert(res.1 == -0.75f64);
+ res = modf64(0.0f64);
+ assert(res.0 == 0.0f64);
+ assert(res.1 == 0.0f64);
+ assert(sign(res.1) > 0);
+ res = modf64(-0.0f64);
+ assert(res.0 == -0.0f64);
+ assert(res.1 == -0.0f64);
+ assert(sign(res.1) < 0);
+
+ // 32
+ let res = modf32(1.75f32);
+ assert(res.0 == 1.0f32);
+ assert(res.1 == 0.75f32);
+ res = modf32(0.75f32);
+ assert(res.0 == 0.0f32);
+ assert(res.1 == 0.75f32);
+ res = modf32(-0.75f32);
+ assert(res.0 == -0.0f32);
+ assert(res.1 == -0.75f32);
+ res = modf32(0.0f32);
+ assert(res.0 == 0.0f32);
+ assert(res.1 == 0.0f32);
+ assert(sign(res.1) > 0);
+ res = modf32(-0.0f32);
+ assert(res.0 == -0.0f32);
+ assert(res.0 == -0.0f32);
+ assert(sign(res.1) < 0);
+};