Add a variety of float functions to math/floats.ha - hare

commit cdcb4e915b5f2b0c02221b1c59a6439f368c58db
parent 834cae484694eb24f095ade777fd50996471da8d
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date:   Wed, 14 Jul 2021 18:48:24 +0200

Add a variety of float functions to math/floats.ha

Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>

Diffstat:
M math/floats.ha  | 444 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--

1 file changed, 437 insertions(+), 7 deletions(-)
diff --git a/math/floats.ha b/math/floats.ha
@@ -1,3 +1,5 @@
+use types;
+
 // The floating point value representing Not a Number, i.e. an undefined or
 // unrepresentable value. You cannot test if a number is NaN by comparing to
 // this value; see [[isnan]] instead.
@@ -7,8 +9,19 @@ export def NAN: f32 = 0.0 / 0.0;
 // negative infinity.
 export def INF: f32 = 1.0 / 0.0;
 
+// Returns true if the given f64 is NaN.
+export fn isnanf64(n: f64) bool = n != n;
+
+// Returns true if the given f32 is NaN.
+export fn isnanf32(n: f32) bool = n != n;
+
 // Returns true if the given floating-point number is NaN.
-export fn isnan(n: f64) bool = n != n;
+export fn isnan(n: types::floating) bool = {
+	return match (n) {
+		f: f64 => isnanf64(f),
+		f: f32 => isnanf32(f),
+	};
+};
 
 @test fn isnan() void = {
 	assert(isnan(NAN));
@@ -35,7 +48,7 @@ export fn isinf(n: f64) bool = {
 };
 
 // Returns true if the given floating-point number is normal.
-export fn isnormal(n: (f32 | f64)) bool = {
+export fn isnormal(n: types::floating) bool = {
 	return match (n) {
 		n: f32 => isnormalf32(n),
 		n: f64 => isnormalf64(n),
@@ -59,7 +72,7 @@ export fn isnormalf64(n: f64) bool = {
 };
 
 // Returns true if the given floating-point number is subnormal.
-export fn issubnormal(n: (f32 | f64)) bool = {
+export fn issubnormal(n: types::floating) bool = {
 	return match (n) {
 		n: f32 => issubnormalf32(n),
 		n: f64 => issubnormalf64(n),
@@ -171,11 +184,17 @@ export def F32_EXPONENT_BITS: u64 = 8;
 // from the exponent in the binary representation to get the actual exponent.
 export def F32_EXPONENT_BIAS: u16 = 127;
 
-def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1;
-def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1;
+// Mask with each bit of an f64's mantissa set.
+export def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1;
 
-def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1;
-def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1;
+// Mask with each bit of an f64's exponent set.
+export def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1;
+
+// Mask with each bit of an f32's mantissa set.
+export def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1;
+
+// Mask with each bit of an f32's exponent set.
+export def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1;
 
 // The largest representable f64 value which is less than Infinity.
 export def F64_MAX_NORMAL: f64 = 1.7976931348623157e+308;
@@ -195,6 +214,29 @@ export def F32_MIN_NORMAL: f32 = 1.1754944e-38;
 // The smallest (subnormal) f32 value greater than zero.
 export def F32_MIN: f32 = 1.0e-45;
 
+// The mask that gets an f64's sign.
+def F64_SIGN_MASK: u64 = 1u64 << 63;
+
+// The mask that sets all exponent bits to 0.
+// NOTE: Replace with the following expression once the lexer supports it
+// 0u64 & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS);
+def F64_EXP_REMOVAL_MASK: u64 =
+	0b1000000000001111111111111111111111111111111111111111111111111111;
+
+// The f64 that contains only an exponent that evaluates to zero.
+def F64_EXP_ZERO: u64 = ((F64_EXPONENT_BIAS: u64) - 1) << F64_MANTISSA_BITS;
+
+// The mask that gets an f32's sign.
+def F32_SIGN_MASK: u32 = 1u32 << 31;
+
+// The mask that sets all exponent bits to 0.
+// NOTE: Replace with the following expression once the lexer supports it
+// 0u32 & ~(F32_EXPONENT_MASK << F32_MANTISSA_BITS);
+def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111;
+
+// The f32 that contains only an exponent that evaluates to zero.
+def F32_EXP_ZERO: u32 = ((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32);
+
 // Contains information about the structure of a specific floating point number
 // type.
 export type floatinfo = struct {
@@ -228,3 +270,391 @@ export const f32info: floatinfo = floatinfo {
 	expmask = (1 << 8) - 1,
 };
 
+// Returns the absolute value of n.
+export fn absf64(n: f64) f64 = {
+	return f64frombits(f64bits(n) & ~F64_SIGN_MASK);
+};
+
+// Returns the absolute value of n.
+export fn absf32(n: f32) f32 = {
+	return f32frombits(f32bits(n) & ~F32_SIGN_MASK);
+};
+
+// Returns the absolute value of n.
+export fn abs(n: types::floating) f64 = {
+	return match (n) {
+		n: f64 => absf64(n),
+		n: f32 => (absf32(n): f64),
+	};
+};
+
+@test fn test_abs() void = {
+	assert(absf64(2.0f64) == 2.0f64);
+	assert(absf32(2.0f32) == 2.0f32);
+	assert(abs(2.0f64) == 2.0f64);
+	assert(abs(2.0f32) == 2.0f64);
+	assert(abs(-2.0f64) == 2.0f64);
+	assert(abs(-2.0f32) == 2.0f32);
+	assert(abs(0f32) == 0f32);
+	assert(abs(0f64) == 0f64);
+};
+
+// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
+export fn signf64(x: f64) i64 = {
+	if (f64bits(x) & F64_SIGN_MASK == 0) {
+		return 1i64;
+	} else {
+		return -1i64;
+	};
+};
+
+// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
+export fn signf32(x: f32) i64 = {
+	if (f32bits(x) & F32_SIGN_MASK == 0) {
+		return 1i64;
+	} else {
+		return -1i64;
+	};
+};
+
+// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
+export fn sign(x: types::floating) i64 = {
+	return match (x) {
+		n: f64 => signf64(n),
+		n: f32 => signf32(n),
+	};
+};
+
+@test fn test_sign() void = {
+	assert(sign(0.0f64) == 1i64);
+	assert(sign(-0.0f64) == -1i64);
+	assert(sign(0.0f32) == 1i64);
+	assert(sign(-0.0f32) == -1i64);
+	assert(sign(1.5f64) == 1i64);
+	assert(sign(-1.5f64) == -1i64);
+};
+
+// Returns x, but with the sign of y.
+export fn copysignf64(x: f64, y: f64) f64 = {
+	return f64frombits((f64bits(x) & ~F64_SIGN_MASK) |
+		(f64bits(y) & F64_SIGN_MASK));
+};
+
+// Returns x, but with the sign of y.
+export fn copysignf32(x: f32, y: f32) f32 = {
+	return f32frombits((f32bits(x) & ~F32_SIGN_MASK) |
+		(f32bits(y) & F32_SIGN_MASK));
+};
+
+// Returns x, but with the sign of y.
+export fn copysign(x: types::floating, y: types::floating) f64 = {
+	return match (x) {
+		n: f64 => copysignf64(n, (y: f64)),
+		n: f32 => (copysignf32(n, (y: f32)): f64),
+	};
+};
+
+@test fn test_copysign() void = {
+	assert(copysign(100.0f64, 1.0f64) == 100.0f64);
+	assert(copysign(100.0f64, -1.0f64) == -100.0f64);
+	assert(copysign(100.0f32, 1.0f32) == 100.0f32);
+	assert(copysign(100.0f32, -1.0f32) == -100.0f32);
+	assert(copysign(100.0f64, 0.0f64) == 100.0f64);
+	assert(copysign(100.0f64, -0.0f64) == -100.0f64);
+	assert(copysign(0.0f64, 100.0f64) == 0.0f64);
+	assert(sign(copysign(0.0f64, 100.0f64)) > 0);
+	assert(copysign(0.0f64, -100.0f64) == 0.0f64);
+	assert(sign(copysign(0.0f64, -100.0f64)) < 0);
+};
+
+// Takes a potentially subnormal f64 n and returns a normal f64 normal_float
+// and an exponent exp such that n == normal_float * 2^{exp}.
+export fn normalizef64(n: f64) (f64, i64) = {
+	if (issubnormalf64(n)) {
+		const factor = 1i64 << (F64_MANTISSA_BITS: i64);
+		const normal_float = (n * (factor: f64));
+		return (normal_float, -(F64_MANTISSA_BITS: i64));
+	};
+	return (n, 0);
+};
+
+// Takes a potentially subnormal f32 n and returns a normal f32 normal_float
+// and an exponent exp such that n == normal_float * 2^{exp}.
+export fn normalizef32(n: f32) (f64, i64) = {
+	if (issubnormalf32(n)) {
+		const factor = 1i32 << (F32_MANTISSA_BITS: i32);
+		const normal_float = ((n * (factor: f64)): f64);
+		return (normal_float, -(F32_MANTISSA_BITS: i64));
+	};
+	return (n, 0);
+};
+
+@test fn test_normalize() void = {
+	let res = normalizef64(5.0e-320);
+	assert(res.0 > F64_MIN_NORMAL);
+	assert(res.1 < 0i64);
+	res = normalizef64(5.0e-300);
+	assert(res.0 == 5.0e-300);
+	assert(res.1 == 0i64);
+};
+
+// Breaks a f64 down into its mantissa and exponent. The mantissa will be between 0.5 and
+// 1.
+export fn frexpf64(n: f64) (f64, i64) = {
+	if (isnan(n) || isinf(n) || n == 0f64) {
+		return (n, 0);
+	};
+	const normalized = normalizef64(n);
+	const normal_float = normalized.0;
+	const normalization_exp = normalized.1;
+	const bits = f64bits(normal_float);
+	const raw_exp: u64 = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK;
+	const exp: i64 = normalization_exp +
+		(raw_exp: i64) - (F64_EXPONENT_BIAS: i64) + 1;
+	const mantissa: f64 = f64frombits((bits & F64_EXP_REMOVAL_MASK) | F64_EXP_ZERO);
+	return (mantissa, exp);
+};
+
+// Breaks a f32 down into its mantissa and exponent. The mantissa will be between 0.5 and
+// 1.
+export fn frexpf32(n: f32) (f64, i64) = {
+	if (isnan(n) || isinf(n) || n == 0f32) {
+		return (n, 0);
+	};
+	const normalized = normalizef32(n);
+	const normal_float = normalized.0;
+	const normalization_exp = normalized.1;
+	const bits = f32bits(normal_float);
+	const raw_exp: u64 = ((bits >> (F32_MANTISSA_BITS: u32)) &
+		(F32_EXPONENT_MASK: u32));
+	const exp: i64 = normalization_exp +
+		(raw_exp: i64) - (F32_EXPONENT_BIAS: i64) + 1;
+	const mantissa: f32 = f32frombits((bits & F32_EXP_REMOVAL_MASK) | F32_EXP_ZERO);
+	return (mantissa, exp);
+};
+
+// Breaks a float down into its mantissa and exponent. The mantissa will be between 0.5
+// and 1.
+export fn frexp(n: types::floating) (f64, i64) = {
+	return match (n) {
+		n: f64 => frexpf64(n),
+		n: f32 => frexpf32(n),
+	};
+};
+
+@test fn test_frexp() void = {
+	let res = frexp(3.0f64);
+	assert(res.0 == 0.75f64);
+	assert(res.1 == 2i64);
+	res = frexp(2.42f64);
+	assert(res.0 == 0.605f64);
+	assert(res.1 == 2i64);
+	res = frexp(NAN);
+	assert(res.1 == 0);
+	res = frexp(INF);
+	assert(res.1 == 0);
+};
+
+// Creates an f64 from a mantissa and an exponent.
+export fn ldexpf64(mantissa: f64, exp: i64) f64 = {
+	if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f64) {
+		return mantissa;
+	};
+	const normalized = normalizef64(mantissa);
+	const normal_float = normalized.0;
+	const normalization_exp = normalized.1;
+	const bits = f64bits(normal_float);
+	const mantissa_exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
+		(F64_EXPONENT_BIAS: i64);
+	let res_exp = exp + normalization_exp + mantissa_exp;
+	// Underflow
+	if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) {
+		return copysign(0.0f64, mantissa);
+	};
+	// Overflow
+	if (res_exp > (F64_EXPONENT_BIAS: i64)) {
+		if (mantissa < 0.0f64) {
+			return -INF;
+		} else {
+			return INF;
+		};
+	};
+	// Subnormal
+	let subnormal_factor = 1.0f64;
+	if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) {
+		res_exp += (F64_MANTISSA_BITS: i64) - 1;
+		subnormal_factor = 1.0f64 /
+			((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64);
+	};
+	const res: u64 = (bits & F64_EXP_REMOVAL_MASK) |
+		(
+			((res_exp: u64) + F64_EXPONENT_BIAS)
+			<< F64_MANTISSA_BITS
+		);
+	return subnormal_factor * f64frombits(res);
+};
+
+// Creates an f32 from a mantissa and an exponent.
+export fn ldexpf32(mantissa: f32, exp: i64) f32 = {
+	if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f32) {
+		return mantissa;
+	};
+	const normalized = normalizef32(mantissa);
+	const normal_float = normalized.0;
+	const normalization_exp = normalized.1;
+	const bits = f32bits(normal_float);
+	const mantissa_exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
+		(F32_EXPONENT_BIAS: i32);
+	let res_exp = exp + normalization_exp + mantissa_exp;
+	// Underflow
+	if (res_exp < -(F32_EXPONENT_BIAS: i32) - (F32_MANTISSA_BITS: i32)) {
+		return copysign(0.0f32, mantissa);
+	};
+	// Overflow
+	if (res_exp > (F32_EXPONENT_BIAS: i32)) {
+		if (mantissa < 0.0f32) {
+			return -INF;
+		} else {
+			return INF;
+		};
+	};
+	// Subnormal
+	let subnormal_factor = 1.0f32;
+	if (res_exp < -(F32_EXPONENT_BIAS: i32) + 1) {
+		res_exp += (F32_MANTISSA_BITS: i32) - 1;
+		subnormal_factor = 1.0f32 /
+			((1i32 << ((F32_MANTISSA_BITS: i32) - 1)): f32);
+	};
+	const res: u32 = (bits & F32_EXP_REMOVAL_MASK) |
+		(
+			((res_exp: u32) + F32_EXPONENT_BIAS)
+			<< (F32_MANTISSA_BITS: u32)
+		);
+	return subnormal_factor * f32frombits(res);
+};
+
+@test fn test_frexp_ldexp() void = {
+	const tests64: [_]f64 = [INF, -INF,
+		0.0, 1.0, -1.0, 2.42, 123456789.0,
+		F64_MIN_NORMAL, F64_MAX_NORMAL,
+		3.0e-310f64];
+	for (let i = 0z; i < len(tests64); i += 1) {
+		const parts = frexpf64(tests64[i]);
+		const res64 = ldexpf64(parts.0, parts.1);
+		assert(res64 == tests64[i]);
+	};
+	assert(ldexpf64(1.0f64, -1076i64) == 0.0f64);
+	assert(ldexpf64(-1.0f64, -1076i64) == -0.0f64);
+	assert(sign(ldexpf64(-1.0f64, -1076i64)) < 0);
+	assert(ldexpf64(2.0f64, 1024i64) == INF);
+	assert(ldexpf64(-2.0f64, 1024i64) == -INF);
+
+	const tests32: [_]f32 = [INF, -INF,
+		0.0, 1.0, -1.0, 2.42, 123456789.0,
+		F32_MIN_NORMAL, F32_MAX_NORMAL,
+		3.0e-39f32];
+	for (let i = 0z; i < len(tests32); i += 1) {
+		const parts = frexpf32(tests32[i]);
+		const res = ldexpf32(parts.0, parts.1);
+		assert(res == tests32[i]);
+	};
+	assert(ldexpf32(1.0f32, -1076i32) == 0.0f32);
+	assert(ldexpf32(-1.0f32, -1076i32) == -0.0f32);
+	assert(sign(ldexpf32(-1.0f32, -1076i32)) < 0);
+	assert(ldexpf32(2.0f32, 1024i32) == INF);
+	assert(ldexpf32(-2.0f32, 1024i32) == -INF);
+};
+
+// Returns the integer and fractional parts of an f64.
+export fn modf64(n: f64) (f64, f64) = {
+	if (n < 1.0f64) {
+		if (n < 0.0f64) {
+			let positive_parts = modf64(-n);
+			return (-positive_parts.0, -positive_parts.1);
+		};
+		if (n == 0.0f64) {
+			return (n, n);
+		};
+		return (0.0f64, n);
+	};
+	let bits = f64bits(n);
+	const exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
+		(F64_EXPONENT_BIAS: i64);
+	// For exponent exp, all integers can be represented with the top exp
+	// bits of the mantissa
+	const sign_and_exp_bits = 64u64 - (F64_EXPONENT_BITS: u64) - 1u64;
+	if (exp < (sign_and_exp_bits: i64)) {
+		const bits_to_shift = (((sign_and_exp_bits: i64) - exp): u64);
+		bits = bits & ~((1u64 << bits_to_shift) - 1);
+	};
+	const int_part = f64frombits(bits);
+	const frac_part = n - int_part;
+	return (int_part, frac_part);
+};
+
+// Returns the integer and fractional parts of an f32.
+export fn modf32(n: f32) (f32, f32) = {
+	if (n < 1.0f32) {
+		if (n < 0.0f32) {
+			let positive_parts = modf32(-n);
+			return (-positive_parts.0, -positive_parts.1);
+		};
+		if (n == 0.0f32) {
+			return (n, n);
+		};
+		return (0.0f32, n);
+	};
+	let bits = f32bits(n);
+	const exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
+		(F32_EXPONENT_BIAS: i32);
+	// For exponent exp, all integers can be represented with the top exp
+	// bits of the mantissa
+	const sign_and_exp_bits = 32u32 - (F32_EXPONENT_BITS: u32) - 1u32;
+	if (exp < (sign_and_exp_bits: i32)) {
+		const bits_to_shift = (((sign_and_exp_bits: i32) - exp): u32);
+		bits = bits & ~((1u32 << bits_to_shift) - 1);
+	};
+	const int_part = f32frombits(bits);
+	const frac_part = n - int_part;
+	return (int_part, frac_part);
+};
+
+@test fn test_modf() void = {
+	// 64
+	let res = modf64(1.75f64);
+	assert(res.0 == 1.0f64);
+	assert(res.1 == 0.75f64);
+	res = modf64(0.75f64);
+	assert(res.0 == 0.0f64);
+	assert(res.1 == 0.75f64);
+	res = modf64(-0.75f64);
+	assert(res.0 == -0.0f64);
+	assert(res.1 == -0.75f64);
+	res = modf64(0.0f64);
+	assert(res.0 == 0.0f64);
+	assert(res.1 == 0.0f64);
+	assert(sign(res.1) > 0);
+	res = modf64(-0.0f64);
+	assert(res.0 == -0.0f64);
+	assert(res.1 == -0.0f64);
+	assert(sign(res.1) < 0);
+
+	// 32
+	let res = modf32(1.75f32);
+	assert(res.0 == 1.0f32);
+	assert(res.1 == 0.75f32);
+	res = modf32(0.75f32);
+	assert(res.0 == 0.0f32);
+	assert(res.1 == 0.75f32);
+	res = modf32(-0.75f32);
+	assert(res.0 == -0.0f32);
+	assert(res.1 == -0.75f32);
+	res = modf32(0.0f32);
+	assert(res.0 == 0.0f32);
+	assert(res.1 == 0.0f32);
+	assert(sign(res.1) > 0);
+	res = modf32(-0.0f32);
+	assert(res.0 == -0.0f32);
+	assert(res.0 == -0.0f32);
+	assert(sign(res.1) < 0);
+};

	hare The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE