hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit cdcb4e915b5f2b0c02221b1c59a6439f368c58db
parent 834cae484694eb24f095ade777fd50996471da8d
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date:   Wed, 14 Jul 2021 18:48:24 +0200

Add a variety of float functions to math/floats.ha

Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>

Diffstat:
Mmath/floats.ha | 444+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 437 insertions(+), 7 deletions(-)

diff --git a/math/floats.ha b/math/floats.ha @@ -1,3 +1,5 @@ +use types; + // The floating point value representing Not a Number, i.e. an undefined or // unrepresentable value. You cannot test if a number is NaN by comparing to // this value; see [[isnan]] instead. @@ -7,8 +9,19 @@ export def NAN: f32 = 0.0 / 0.0; // negative infinity. export def INF: f32 = 1.0 / 0.0; +// Returns true if the given f64 is NaN. +export fn isnanf64(n: f64) bool = n != n; + +// Returns true if the given f32 is NaN. +export fn isnanf32(n: f32) bool = n != n; + // Returns true if the given floating-point number is NaN. -export fn isnan(n: f64) bool = n != n; +export fn isnan(n: types::floating) bool = { + return match (n) { + f: f64 => isnanf64(f), + f: f32 => isnanf32(f), + }; +}; @test fn isnan() void = { assert(isnan(NAN)); @@ -35,7 +48,7 @@ export fn isinf(n: f64) bool = { }; // Returns true if the given floating-point number is normal. -export fn isnormal(n: (f32 | f64)) bool = { +export fn isnormal(n: types::floating) bool = { return match (n) { n: f32 => isnormalf32(n), n: f64 => isnormalf64(n), @@ -59,7 +72,7 @@ export fn isnormalf64(n: f64) bool = { }; // Returns true if the given floating-point number is subnormal. -export fn issubnormal(n: (f32 | f64)) bool = { +export fn issubnormal(n: types::floating) bool = { return match (n) { n: f32 => issubnormalf32(n), n: f64 => issubnormalf64(n), @@ -171,11 +184,17 @@ export def F32_EXPONENT_BITS: u64 = 8; // from the exponent in the binary representation to get the actual exponent. export def F32_EXPONENT_BIAS: u16 = 127; -def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1; -def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1; +// Mask with each bit of an f64's mantissa set. +export def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1; -def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1; -def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1; +// Mask with each bit of an f64's exponent set. +export def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1; + +// Mask with each bit of an f32's mantissa set. +export def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1; + +// Mask with each bit of an f32's exponent set. +export def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1; // The largest representable f64 value which is less than Infinity. export def F64_MAX_NORMAL: f64 = 1.7976931348623157e+308; @@ -195,6 +214,29 @@ export def F32_MIN_NORMAL: f32 = 1.1754944e-38; // The smallest (subnormal) f32 value greater than zero. export def F32_MIN: f32 = 1.0e-45; +// The mask that gets an f64's sign. +def F64_SIGN_MASK: u64 = 1u64 << 63; + +// The mask that sets all exponent bits to 0. +// NOTE: Replace with the following expression once the lexer supports it +// 0u64 & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS); +def F64_EXP_REMOVAL_MASK: u64 = + 0b1000000000001111111111111111111111111111111111111111111111111111; + +// The f64 that contains only an exponent that evaluates to zero. +def F64_EXP_ZERO: u64 = ((F64_EXPONENT_BIAS: u64) - 1) << F64_MANTISSA_BITS; + +// The mask that gets an f32's sign. +def F32_SIGN_MASK: u32 = 1u32 << 31; + +// The mask that sets all exponent bits to 0. +// NOTE: Replace with the following expression once the lexer supports it +// 0u32 & ~(F32_EXPONENT_MASK << F32_MANTISSA_BITS); +def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111; + +// The f32 that contains only an exponent that evaluates to zero. +def F32_EXP_ZERO: u32 = ((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32); + // Contains information about the structure of a specific floating point number // type. export type floatinfo = struct { @@ -228,3 +270,391 @@ export const f32info: floatinfo = floatinfo { expmask = (1 << 8) - 1, }; +// Returns the absolute value of n. +export fn absf64(n: f64) f64 = { + return f64frombits(f64bits(n) & ~F64_SIGN_MASK); +}; + +// Returns the absolute value of n. +export fn absf32(n: f32) f32 = { + return f32frombits(f32bits(n) & ~F32_SIGN_MASK); +}; + +// Returns the absolute value of n. +export fn abs(n: types::floating) f64 = { + return match (n) { + n: f64 => absf64(n), + n: f32 => (absf32(n): f64), + }; +}; + +@test fn test_abs() void = { + assert(absf64(2.0f64) == 2.0f64); + assert(absf32(2.0f32) == 2.0f32); + assert(abs(2.0f64) == 2.0f64); + assert(abs(2.0f32) == 2.0f64); + assert(abs(-2.0f64) == 2.0f64); + assert(abs(-2.0f32) == 2.0f32); + assert(abs(0f32) == 0f32); + assert(abs(0f64) == 0f64); +}; + +// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed. +export fn signf64(x: f64) i64 = { + if (f64bits(x) & F64_SIGN_MASK == 0) { + return 1i64; + } else { + return -1i64; + }; +}; + +// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed. +export fn signf32(x: f32) i64 = { + if (f32bits(x) & F32_SIGN_MASK == 0) { + return 1i64; + } else { + return -1i64; + }; +}; + +// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed. +export fn sign(x: types::floating) i64 = { + return match (x) { + n: f64 => signf64(n), + n: f32 => signf32(n), + }; +}; + +@test fn test_sign() void = { + assert(sign(0.0f64) == 1i64); + assert(sign(-0.0f64) == -1i64); + assert(sign(0.0f32) == 1i64); + assert(sign(-0.0f32) == -1i64); + assert(sign(1.5f64) == 1i64); + assert(sign(-1.5f64) == -1i64); +}; + +// Returns x, but with the sign of y. +export fn copysignf64(x: f64, y: f64) f64 = { + return f64frombits((f64bits(x) & ~F64_SIGN_MASK) | + (f64bits(y) & F64_SIGN_MASK)); +}; + +// Returns x, but with the sign of y. +export fn copysignf32(x: f32, y: f32) f32 = { + return f32frombits((f32bits(x) & ~F32_SIGN_MASK) | + (f32bits(y) & F32_SIGN_MASK)); +}; + +// Returns x, but with the sign of y. +export fn copysign(x: types::floating, y: types::floating) f64 = { + return match (x) { + n: f64 => copysignf64(n, (y: f64)), + n: f32 => (copysignf32(n, (y: f32)): f64), + }; +}; + +@test fn test_copysign() void = { + assert(copysign(100.0f64, 1.0f64) == 100.0f64); + assert(copysign(100.0f64, -1.0f64) == -100.0f64); + assert(copysign(100.0f32, 1.0f32) == 100.0f32); + assert(copysign(100.0f32, -1.0f32) == -100.0f32); + assert(copysign(100.0f64, 0.0f64) == 100.0f64); + assert(copysign(100.0f64, -0.0f64) == -100.0f64); + assert(copysign(0.0f64, 100.0f64) == 0.0f64); + assert(sign(copysign(0.0f64, 100.0f64)) > 0); + assert(copysign(0.0f64, -100.0f64) == 0.0f64); + assert(sign(copysign(0.0f64, -100.0f64)) < 0); +}; + +// Takes a potentially subnormal f64 n and returns a normal f64 normal_float +// and an exponent exp such that n == normal_float * 2^{exp}. +export fn normalizef64(n: f64) (f64, i64) = { + if (issubnormalf64(n)) { + const factor = 1i64 << (F64_MANTISSA_BITS: i64); + const normal_float = (n * (factor: f64)); + return (normal_float, -(F64_MANTISSA_BITS: i64)); + }; + return (n, 0); +}; + +// Takes a potentially subnormal f32 n and returns a normal f32 normal_float +// and an exponent exp such that n == normal_float * 2^{exp}. +export fn normalizef32(n: f32) (f64, i64) = { + if (issubnormalf32(n)) { + const factor = 1i32 << (F32_MANTISSA_BITS: i32); + const normal_float = ((n * (factor: f64)): f64); + return (normal_float, -(F32_MANTISSA_BITS: i64)); + }; + return (n, 0); +}; + +@test fn test_normalize() void = { + let res = normalizef64(5.0e-320); + assert(res.0 > F64_MIN_NORMAL); + assert(res.1 < 0i64); + res = normalizef64(5.0e-300); + assert(res.0 == 5.0e-300); + assert(res.1 == 0i64); +}; + +// Breaks a f64 down into its mantissa and exponent. The mantissa will be between 0.5 and +// 1. +export fn frexpf64(n: f64) (f64, i64) = { + if (isnan(n) || isinf(n) || n == 0f64) { + return (n, 0); + }; + const normalized = normalizef64(n); + const normal_float = normalized.0; + const normalization_exp = normalized.1; + const bits = f64bits(normal_float); + const raw_exp: u64 = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK; + const exp: i64 = normalization_exp + + (raw_exp: i64) - (F64_EXPONENT_BIAS: i64) + 1; + const mantissa: f64 = f64frombits((bits & F64_EXP_REMOVAL_MASK) | F64_EXP_ZERO); + return (mantissa, exp); +}; + +// Breaks a f32 down into its mantissa and exponent. The mantissa will be between 0.5 and +// 1. +export fn frexpf32(n: f32) (f64, i64) = { + if (isnan(n) || isinf(n) || n == 0f32) { + return (n, 0); + }; + const normalized = normalizef32(n); + const normal_float = normalized.0; + const normalization_exp = normalized.1; + const bits = f32bits(normal_float); + const raw_exp: u64 = ((bits >> (F32_MANTISSA_BITS: u32)) & + (F32_EXPONENT_MASK: u32)); + const exp: i64 = normalization_exp + + (raw_exp: i64) - (F32_EXPONENT_BIAS: i64) + 1; + const mantissa: f32 = f32frombits((bits & F32_EXP_REMOVAL_MASK) | F32_EXP_ZERO); + return (mantissa, exp); +}; + +// Breaks a float down into its mantissa and exponent. The mantissa will be between 0.5 +// and 1. +export fn frexp(n: types::floating) (f64, i64) = { + return match (n) { + n: f64 => frexpf64(n), + n: f32 => frexpf32(n), + }; +}; + +@test fn test_frexp() void = { + let res = frexp(3.0f64); + assert(res.0 == 0.75f64); + assert(res.1 == 2i64); + res = frexp(2.42f64); + assert(res.0 == 0.605f64); + assert(res.1 == 2i64); + res = frexp(NAN); + assert(res.1 == 0); + res = frexp(INF); + assert(res.1 == 0); +}; + +// Creates an f64 from a mantissa and an exponent. +export fn ldexpf64(mantissa: f64, exp: i64) f64 = { + if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f64) { + return mantissa; + }; + const normalized = normalizef64(mantissa); + const normal_float = normalized.0; + const normalization_exp = normalized.1; + const bits = f64bits(normal_float); + const mantissa_exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) - + (F64_EXPONENT_BIAS: i64); + let res_exp = exp + normalization_exp + mantissa_exp; + // Underflow + if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) { + return copysign(0.0f64, mantissa); + }; + // Overflow + if (res_exp > (F64_EXPONENT_BIAS: i64)) { + if (mantissa < 0.0f64) { + return -INF; + } else { + return INF; + }; + }; + // Subnormal + let subnormal_factor = 1.0f64; + if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) { + res_exp += (F64_MANTISSA_BITS: i64) - 1; + subnormal_factor = 1.0f64 / + ((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64); + }; + const res: u64 = (bits & F64_EXP_REMOVAL_MASK) | + ( + ((res_exp: u64) + F64_EXPONENT_BIAS) + << F64_MANTISSA_BITS + ); + return subnormal_factor * f64frombits(res); +}; + +// Creates an f32 from a mantissa and an exponent. +export fn ldexpf32(mantissa: f32, exp: i64) f32 = { + if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f32) { + return mantissa; + }; + const normalized = normalizef32(mantissa); + const normal_float = normalized.0; + const normalization_exp = normalized.1; + const bits = f32bits(normal_float); + const mantissa_exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) - + (F32_EXPONENT_BIAS: i32); + let res_exp = exp + normalization_exp + mantissa_exp; + // Underflow + if (res_exp < -(F32_EXPONENT_BIAS: i32) - (F32_MANTISSA_BITS: i32)) { + return copysign(0.0f32, mantissa); + }; + // Overflow + if (res_exp > (F32_EXPONENT_BIAS: i32)) { + if (mantissa < 0.0f32) { + return -INF; + } else { + return INF; + }; + }; + // Subnormal + let subnormal_factor = 1.0f32; + if (res_exp < -(F32_EXPONENT_BIAS: i32) + 1) { + res_exp += (F32_MANTISSA_BITS: i32) - 1; + subnormal_factor = 1.0f32 / + ((1i32 << ((F32_MANTISSA_BITS: i32) - 1)): f32); + }; + const res: u32 = (bits & F32_EXP_REMOVAL_MASK) | + ( + ((res_exp: u32) + F32_EXPONENT_BIAS) + << (F32_MANTISSA_BITS: u32) + ); + return subnormal_factor * f32frombits(res); +}; + +@test fn test_frexp_ldexp() void = { + const tests64: [_]f64 = [INF, -INF, + 0.0, 1.0, -1.0, 2.42, 123456789.0, + F64_MIN_NORMAL, F64_MAX_NORMAL, + 3.0e-310f64]; + for (let i = 0z; i < len(tests64); i += 1) { + const parts = frexpf64(tests64[i]); + const res64 = ldexpf64(parts.0, parts.1); + assert(res64 == tests64[i]); + }; + assert(ldexpf64(1.0f64, -1076i64) == 0.0f64); + assert(ldexpf64(-1.0f64, -1076i64) == -0.0f64); + assert(sign(ldexpf64(-1.0f64, -1076i64)) < 0); + assert(ldexpf64(2.0f64, 1024i64) == INF); + assert(ldexpf64(-2.0f64, 1024i64) == -INF); + + const tests32: [_]f32 = [INF, -INF, + 0.0, 1.0, -1.0, 2.42, 123456789.0, + F32_MIN_NORMAL, F32_MAX_NORMAL, + 3.0e-39f32]; + for (let i = 0z; i < len(tests32); i += 1) { + const parts = frexpf32(tests32[i]); + const res = ldexpf32(parts.0, parts.1); + assert(res == tests32[i]); + }; + assert(ldexpf32(1.0f32, -1076i32) == 0.0f32); + assert(ldexpf32(-1.0f32, -1076i32) == -0.0f32); + assert(sign(ldexpf32(-1.0f32, -1076i32)) < 0); + assert(ldexpf32(2.0f32, 1024i32) == INF); + assert(ldexpf32(-2.0f32, 1024i32) == -INF); +}; + +// Returns the integer and fractional parts of an f64. +export fn modf64(n: f64) (f64, f64) = { + if (n < 1.0f64) { + if (n < 0.0f64) { + let positive_parts = modf64(-n); + return (-positive_parts.0, -positive_parts.1); + }; + if (n == 0.0f64) { + return (n, n); + }; + return (0.0f64, n); + }; + let bits = f64bits(n); + const exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) - + (F64_EXPONENT_BIAS: i64); + // For exponent exp, all integers can be represented with the top exp + // bits of the mantissa + const sign_and_exp_bits = 64u64 - (F64_EXPONENT_BITS: u64) - 1u64; + if (exp < (sign_and_exp_bits: i64)) { + const bits_to_shift = (((sign_and_exp_bits: i64) - exp): u64); + bits = bits & ~((1u64 << bits_to_shift) - 1); + }; + const int_part = f64frombits(bits); + const frac_part = n - int_part; + return (int_part, frac_part); +}; + +// Returns the integer and fractional parts of an f32. +export fn modf32(n: f32) (f32, f32) = { + if (n < 1.0f32) { + if (n < 0.0f32) { + let positive_parts = modf32(-n); + return (-positive_parts.0, -positive_parts.1); + }; + if (n == 0.0f32) { + return (n, n); + }; + return (0.0f32, n); + }; + let bits = f32bits(n); + const exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) - + (F32_EXPONENT_BIAS: i32); + // For exponent exp, all integers can be represented with the top exp + // bits of the mantissa + const sign_and_exp_bits = 32u32 - (F32_EXPONENT_BITS: u32) - 1u32; + if (exp < (sign_and_exp_bits: i32)) { + const bits_to_shift = (((sign_and_exp_bits: i32) - exp): u32); + bits = bits & ~((1u32 << bits_to_shift) - 1); + }; + const int_part = f32frombits(bits); + const frac_part = n - int_part; + return (int_part, frac_part); +}; + +@test fn test_modf() void = { + // 64 + let res = modf64(1.75f64); + assert(res.0 == 1.0f64); + assert(res.1 == 0.75f64); + res = modf64(0.75f64); + assert(res.0 == 0.0f64); + assert(res.1 == 0.75f64); + res = modf64(-0.75f64); + assert(res.0 == -0.0f64); + assert(res.1 == -0.75f64); + res = modf64(0.0f64); + assert(res.0 == 0.0f64); + assert(res.1 == 0.0f64); + assert(sign(res.1) > 0); + res = modf64(-0.0f64); + assert(res.0 == -0.0f64); + assert(res.1 == -0.0f64); + assert(sign(res.1) < 0); + + // 32 + let res = modf32(1.75f32); + assert(res.0 == 1.0f32); + assert(res.1 == 0.75f32); + res = modf32(0.75f32); + assert(res.0 == 0.0f32); + assert(res.1 == 0.75f32); + res = modf32(-0.75f32); + assert(res.0 == -0.0f32); + assert(res.1 == -0.75f32); + res = modf32(0.0f32); + assert(res.0 == 0.0f32); + assert(res.1 == 0.0f32); + assert(sign(res.1) > 0); + res = modf32(-0.0f32); + assert(res.0 == -0.0f32); + assert(res.0 == -0.0f32); + assert(sign(res.1) < 0); +};