hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

floats.ha (16109B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Returns the binary representation of the given f64.
      5 export fn f64bits(n: f64) u64 = *(&n: *u64);
      6 
      7 // Returns the binary representation of the given f32.
      8 export fn f32bits(n: f32) u32 = *(&n: *u32);
      9 
     10 // Returns f64 with the given binary representation.
     11 export fn f64frombits(n: u64) f64 = *(&n: *f64);
     12 
     13 // Returns f32 with the given binary representation.
     14 export fn f32frombits(n: u32) f32 = *(&n: *f32);
     15 
     16 // The number of bits in the significand of the binary representation of f64.
     17 export def F64_MANTISSA_BITS = 52;
     18 
     19 // The number of bits in the exponent of the binary representation of f64.
     20 export def F64_EXPONENT_BITS = 11;
     21 
     22 // The bias of the exponent of the binary representation of f64. Subtract this
     23 // from the exponent in the binary representation to get the actual exponent.
     24 export def F64_EXPONENT_BIAS = 1023;
     25 
     26 // The number of bits in the significand of the binary representation of f32.
     27 export def F32_MANTISSA_BITS = 23;
     28 
     29 // The number of bits in the exponent of the binary representation of f32.
     30 export def F32_EXPONENT_BITS = 8;
     31 
     32 // The bias of the exponent of the binary representation of f32. Subtract this
     33 // from the exponent in the binary representation to get the actual exponent.
     34 export def F32_EXPONENT_BIAS = 127;
     35 
     36 // Mask with each bit of an f64's mantissa set.
     37 export def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1;
     38 
     39 // Mask with each bit of an f64's exponent set.
     40 export def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1;
     41 
     42 // Mask with each bit of an f32's mantissa set.
     43 export def F32_MANTISSA_MASK: u32 = (1 << F32_MANTISSA_BITS) - 1;
     44 
     45 // Mask with each bit of an f32's exponent set.
     46 export def F32_EXPONENT_MASK: u32 = (1 << F32_EXPONENT_BITS) - 1;
     47 
     48 // The largest representable f64 value which is less than Infinity.
     49 export def F64_MAX_NORMAL: f64 = 1.7976931348623157e+308;
     50 
     51 // The smallest representable normal f64 value.
     52 export def F64_MIN_NORMAL: f64 = 2.2250738585072014e-308;
     53 
     54 // The smallest (subnormal) f64 value greater than zero.
     55 export def F64_MIN_SUBNORMAL: f64 = 5.0e-324;
     56 
     57 // The difference between 1 and the smallest f64 representable value that is
     58 // greater than 1.
     59 export def F64_EPS: f64 = 2.22040000000000004884e-16;
     60 
     61 // The largest representable f32 value which is less than Infinity.
     62 export def F32_MAX_NORMAL: f32 = 3.4028234e+38;
     63 
     64 // The smallest representable normal f32 value.
     65 export def F32_MIN_NORMAL: f32 = 1.1754944e-38;
     66 
     67 // The smallest (subnormal) f32 value greater than zero.
     68 export def F32_MIN_SUBNORMAL: f32 = 1.0e-45;
     69 
     70 // The difference between 1 and the smallest f32 representable value that is
     71 // greater than 1.
     72 export def F32_EPS: f32 = 1.1920928955078125e-7;
     73 
     74 // The mask that gets an f64's sign.
     75 def F64_SIGN_MASK: u64 = 1u64 << 63;
     76 
     77 // The mask that sets all exponent bits to 0.
     78 // NOTE: Replace with the following expression once the lexer supports it
     79 // 0u64 & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS);
     80 def F64_EXP_REMOVAL_MASK: u64 =
     81 	0b1000000000001111111111111111111111111111111111111111111111111111;
     82 
     83 // The f64 that contains only an exponent that evaluates to zero.
     84 def F64_EXP_ZERO: u64 = ((F64_EXPONENT_BIAS: u64) - 1) << F64_MANTISSA_BITS;
     85 
     86 // The mask that gets an f32's sign.
     87 def F32_SIGN_MASK: u32 = 1u32 << 31;
     88 
     89 // The mask that sets all exponent bits to 0.
     90 // NOTE: Replace with the following expression once the lexer supports it
     91 // 0u32 & ~(F32_EXPONENT_MASK << F32_MANTISSA_BITS);
     92 def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111;
     93 
     94 // The f32 that contains only an exponent that evaluates to zero.
     95 def F32_EXP_ZERO: u32 =
     96 	((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32);
     97 
     98 // The bits that represent the number 1f64.
     99 def F64_ONE: u64 = 0x3FF0000000000000;
    100 
    101 // Contains information about the structure of a specific floating point number
    102 // type.
    103 export type floatinfo = struct {
    104 	// Bits in significand.
    105 	mantbits: u64,
    106 	// Bits in exponent.
    107 	expbits: u64,
    108 	// Bias of exponent.
    109 	expbias: int,
    110 	// Mask for mantissa.
    111 	mantmask: u64,
    112 	// Mask for exponent.
    113 	expmask: u64,
    114 };
    115 
    116 // A [[floatinfo]] structure defining the structure of the f64 type.
    117 export const f64info: floatinfo = floatinfo {
    118 	mantbits = 52,
    119 	expbits = 11,
    120 	expbias = 1023,
    121 	mantmask = (1 << 52) - 1,
    122 	expmask = (1 << 11) - 1,
    123 };
    124 
    125 // A [[floatinfo]] structure defining the structure of the f32 type.
    126 export const f32info: floatinfo = floatinfo {
    127 	mantbits = 23,
    128 	expbits = 8,
    129 	expbias = 127,
    130 	mantmask = (1 << 23) - 1,
    131 	expmask = (1 << 8) - 1,
    132 };
    133 
    134 // The floating point value representing Not a Number, i.e. an undefined or
    135 // unrepresentable value. You cannot test if a number is NaN by comparing to
    136 // this value; see [[isnan]] instead.
    137 export def NAN = 0.0 / 0.0;
    138 
    139 // The floating point value representing positive infinity. Use -[[INF]] for
    140 // negative infinity.
    141 export def INF = 1.0 / 0.0;
    142 
    143 // Returns true if the given floating-point number is NaN.
    144 export fn isnan(n: f64) bool = n != n;
    145 
    146 // Returns true if the given floating-point number is infinite.
    147 export fn isinf(n: f64) bool = {
    148 	const bits = f64bits(n);
    149 	const mant = bits & F64_MANTISSA_MASK;
    150 	const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK;
    151 	return exp == F64_EXPONENT_MASK && mant == 0;
    152 };
    153 
    154 @test fn isinf() void = {
    155 	assert(isinf(INF));
    156 	assert(isinf(-INF));
    157 	assert(!isinf(NAN));
    158 	assert(!isinf(1.23));
    159 	assert(!isinf(-1.23f32));
    160 };
    161 
    162 // Returns true if the given f64 is normal.
    163 export fn isnormalf64(n: f64) bool = {
    164 	const bits = f64bits(n);
    165 	const mant = bits & F64_MANTISSA_MASK;
    166 	const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK;
    167 	return exp != F64_EXPONENT_MASK && (exp > 0 || mant == 0);
    168 };
    169 
    170 // Returns true if the given f32 is normal.
    171 export fn isnormalf32(n: f32) bool = {
    172 	const bits = f32bits(n);
    173 	const mant = bits & F32_MANTISSA_MASK;
    174 	const exp = bits >> F32_MANTISSA_BITS & F32_EXPONENT_MASK;
    175 	return exp != F32_EXPONENT_MASK && (exp > 0 || mant == 0);
    176 };
    177 
    178 // Returns true if the given f64 is subnormal.
    179 export fn issubnormalf64(n: f64) bool = {
    180 	const bits = f64bits(n);
    181 	const mant = bits & F64_MANTISSA_MASK;
    182 	const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK;
    183 	return exp == 0 && mant != 0;
    184 };
    185 
    186 // Returns true if the given f32 is subnormal.
    187 export fn issubnormalf32(n: f32) bool = {
    188 	const bits = f32bits(n);
    189 	const mant = bits & F32_MANTISSA_MASK;
    190 	const exp = bits >> F32_MANTISSA_BITS & F32_EXPONENT_MASK;
    191 	return exp == 0 && mant != 0;
    192 };
    193 
    194 // Returns the absolute value of f64 n.
    195 export fn absf64(n: f64) f64 = {
    196 	if (isnan(n)) {
    197 		return n;
    198 	};
    199 	return f64frombits(f64bits(n) & ~F64_SIGN_MASK);
    200 };
    201 
    202 // Returns the absolute value of f32 n.
    203 export fn absf32(n: f32) f32 = {
    204 	if (isnan(n)) {
    205 		return n;
    206 	};
    207 	return f32frombits(f32bits(n) & ~F32_SIGN_MASK);
    208 };
    209 
    210 // Returns 1 if x is positive and -1 if x is negative. Note that zero is also
    211 // signed.
    212 export fn signf64(x: f64) i64 = {
    213 	if (f64bits(x) & F64_SIGN_MASK == 0) {
    214 		return 1i64;
    215 	} else {
    216 		return -1i64;
    217 	};
    218 };
    219 
    220 // Returns 1 if x is positive and -1 if x is negative. Note that zero is also
    221 // signed.
    222 export fn signf32(x: f32) i64 = {
    223 	if (f32bits(x) & F32_SIGN_MASK == 0) {
    224 		return 1i64;
    225 	} else {
    226 		return -1i64;
    227 	};
    228 };
    229 
    230 // Returns whether or not x is positive.
    231 export fn ispositivef64(x: f64) bool = signf64(x) == 1i64;
    232 
    233 // Returns whether or not x is positive.
    234 export fn ispositivef32(x: f32) bool = signf32(x) == 1i32;
    235 
    236 // Returns whether or not x is negative.
    237 export fn isnegativef64(x: f64) bool = signf64(x) == -1i64;
    238 
    239 // Returns whether or not x is negative.
    240 export fn isnegativef32(x: f32) bool = signf32(x) == -1i32;
    241 
    242 // Returns x, but with the sign of y.
    243 export fn copysignf64(x: f64, y: f64) f64 = {
    244 	return f64frombits((f64bits(x) & ~F64_SIGN_MASK) |
    245 		(f64bits(y) & F64_SIGN_MASK));
    246 };
    247 
    248 // Returns x, but with the sign of y.
    249 export fn copysignf32(x: f32, y: f32) f32 = {
    250 	return f32frombits((f32bits(x) & ~F32_SIGN_MASK) |
    251 		(f32bits(y) & F32_SIGN_MASK));
    252 };
    253 
    254 // Takes a potentially subnormal f64 n and returns a normal f64 normal_float
    255 // and an exponent exp such that n == normal_float * 2^{exp}.
    256 export fn normalizef64(n: f64) (f64, i64) = {
    257 	if (issubnormalf64(n)) {
    258 		const factor = 1i64 << (F64_MANTISSA_BITS: i64);
    259 		const normal_float = (n * (factor: f64));
    260 		return (normal_float, -(F64_MANTISSA_BITS: i64));
    261 	};
    262 	return (n, 0);
    263 };
    264 
    265 // Takes a potentially subnormal f32 n and returns a normal f32 normal_float
    266 // and an exponent exp such that n == normal_float * 2^{exp}.
    267 export fn normalizef32(n: f32) (f32, i64) = {
    268 	if (issubnormalf32(n)) {
    269 		const factor = 1i32 << (F32_MANTISSA_BITS: i32);
    270 		const normal_float = n * factor: f32;
    271 		return (normal_float, -(F32_MANTISSA_BITS: i64));
    272 	};
    273 	return (n, 0);
    274 };
    275 
    276 // Breaks a f64 down into its mantissa and exponent. The mantissa will be
    277 // between 0.5 and 1.
    278 export fn frexpf64(n: f64) (f64, i64) = {
    279 	if (isnan(n) || isinf(n) || n == 0f64) {
    280 		return (n, 0);
    281 	};
    282 	const normalized = normalizef64(n);
    283 	const normal_float = normalized.0;
    284 	const normalization_exp = normalized.1;
    285 	const bits = f64bits(normal_float);
    286 	const raw_exp: u64 = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK;
    287 	const exp: i64 = normalization_exp +
    288 		(raw_exp: i64) - (F64_EXPONENT_BIAS: i64) + 1;
    289 	const mantissa: f64 =
    290 		f64frombits((bits & F64_EXP_REMOVAL_MASK) | F64_EXP_ZERO);
    291 	return (mantissa, exp);
    292 };
    293 
    294 // Breaks a f32 down into its mantissa and exponent. The mantissa will be
    295 // between 0.5 and 1.
    296 export fn frexpf32(n: f32) (f32, i64) = {
    297 	if (isnan(n) || isinf(n) || n == 0f32) {
    298 		return (n, 0);
    299 	};
    300 	const normalized = normalizef32(n);
    301 	const normal_float = normalized.0;
    302 	const normalization_exp = normalized.1;
    303 	const bits = f32bits(normal_float);
    304 	const raw_exp: u64 = (bits >> F32_MANTISSA_BITS)
    305 		& F32_EXPONENT_MASK: u32;
    306 	const exp: i64 = normalization_exp +
    307 		(raw_exp: i64) - (F32_EXPONENT_BIAS: i64) + 1;
    308 	const mantissa: f32 =
    309 		f32frombits((bits & F32_EXP_REMOVAL_MASK) | F32_EXP_ZERO);
    310 	return (mantissa, exp);
    311 };
    312 
    313 // Creates an f64 from a mantissa and an exponent.
    314 export fn ldexpf64(mantissa: f64, exp: i64) f64 = {
    315 	if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f64) {
    316 		return mantissa;
    317 	};
    318 	const normalized = normalizef64(mantissa);
    319 	const normal_float = normalized.0;
    320 	const normalization_exp = normalized.1;
    321 	const bits = f64bits(normal_float);
    322 	const mantissa_exp =
    323 		(((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
    324 		(F64_EXPONENT_BIAS: i64);
    325 	let res_exp = exp + normalization_exp + mantissa_exp;
    326 	// Underflow
    327 	if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) {
    328 		return copysignf64(0f64, mantissa);
    329 	};
    330 	// Overflow
    331 	if (res_exp > (F64_EXPONENT_BIAS: i64)) {
    332 		if (mantissa < 0f64) {
    333 			return -INF;
    334 		} else {
    335 			return INF;
    336 		};
    337 	};
    338 	// Subnormal
    339 	let subnormal_factor = 1f64;
    340 	if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) {
    341 		res_exp += (F64_MANTISSA_BITS: i64) - 1;
    342 		subnormal_factor = 1f64 /
    343 			((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64);
    344 	};
    345 	const res: u64 = (bits & F64_EXP_REMOVAL_MASK) |
    346 		(
    347 			((res_exp: u64) + F64_EXPONENT_BIAS)
    348 			<< F64_MANTISSA_BITS
    349 		);
    350 	return subnormal_factor * f64frombits(res);
    351 };
    352 
    353 // Creates an f32 from a mantissa and an exponent.
    354 export fn ldexpf32(mantissa: f32, exp: i64) f32 = {
    355 	if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f32) {
    356 		return mantissa;
    357 	};
    358 	const normalized = normalizef32(mantissa);
    359 	const normal_float = normalized.0;
    360 	const normalization_exp = normalized.1;
    361 	const bits = f32bits(normal_float);
    362 	const mantissa_exp =
    363 		(((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
    364 		(F32_EXPONENT_BIAS: i32);
    365 	let res_exp = exp + normalization_exp + mantissa_exp;
    366 	// Underflow
    367 	if (res_exp < -(F32_EXPONENT_BIAS: i32) - (F32_MANTISSA_BITS: i32)) {
    368 		return copysignf32(0.0f32, mantissa);
    369 	};
    370 	// Overflow
    371 	if (res_exp > (F32_EXPONENT_BIAS: i32)) {
    372 		if (mantissa < 0.0f32) {
    373 			return -INF;
    374 		} else {
    375 			return INF;
    376 		};
    377 	};
    378 	// Subnormal
    379 	let subnormal_factor = 1.0f32;
    380 	if (res_exp < -(F32_EXPONENT_BIAS: i32) + 1) {
    381 		res_exp += (F32_MANTISSA_BITS: i32) - 1;
    382 		subnormal_factor = 1.0f32 /
    383 			((1i32 << ((F32_MANTISSA_BITS: i32) - 1)): f32);
    384 	};
    385 	const res: u32 = (bits & F32_EXP_REMOVAL_MASK) |
    386 		(
    387 			((res_exp: u32) + F32_EXPONENT_BIAS)
    388 			<< (F32_MANTISSA_BITS: u32)
    389 		);
    390 	return subnormal_factor * f32frombits(res);
    391 };
    392 
    393 // Returns the integer and fractional parts of an f64.
    394 export fn modfracf64(n: f64) (f64, f64) = {
    395 	if (n < 1f64) {
    396 		if (n < 0f64) {
    397 			let positive_parts = modfracf64(-n);
    398 			return (-positive_parts.0, -positive_parts.1);
    399 		};
    400 		if (n == 0f64) {
    401 			return (n, n);
    402 		};
    403 		return (0f64, n);
    404 	};
    405 	let bits = f64bits(n);
    406 	const exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
    407 		(F64_EXPONENT_BIAS: i64);
    408 	// For exponent exp, all integers can be represented with the top exp
    409 	// bits of the mantissa
    410 	const sign_and_exp_bits = 64u64 - (F64_EXPONENT_BITS: u64) - 1u64;
    411 	if (exp < (sign_and_exp_bits: i64)) {
    412 		const bits_to_shift = (((sign_and_exp_bits: i64) - exp): u64);
    413 		bits = bits & ~((1u64 << bits_to_shift) - 1);
    414 	};
    415 	const int_part = f64frombits(bits);
    416 	const frac_part = n - int_part;
    417 	return (int_part, frac_part);
    418 };
    419 
    420 // Returns the integer and fractional parts of an f32.
    421 export fn modfracf32(n: f32) (f32, f32) = {
    422 	if (n < 1.0f32) {
    423 		if (n < 0.0f32) {
    424 			let positive_parts = modfracf32(-n);
    425 			return (-positive_parts.0, -positive_parts.1);
    426 		};
    427 		if (n == 0.0f32) {
    428 			return (n, n);
    429 		};
    430 		return (0f32, n);
    431 	};
    432 	let bits = f32bits(n);
    433 	const exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
    434 		(F32_EXPONENT_BIAS: i32);
    435 	// For exponent exp, all integers can be represented with the top exp
    436 	// bits of the mantissa
    437 	const sign_and_exp_bits = 32u32 - (F32_EXPONENT_BITS: u32) - 1u32;
    438 	if (exp < (sign_and_exp_bits: i32)) {
    439 		const bits_to_shift = (((sign_and_exp_bits: i32) - exp): u32);
    440 		bits = bits & ~((1u32 << bits_to_shift) - 1);
    441 	};
    442 	const int_part = f32frombits(bits);
    443 	const frac_part = n - int_part;
    444 	return (int_part, frac_part);
    445 };
    446 
    447 // Returns the f32 that is closest to 'x' in direction of 'y'. Returns NaN
    448 // if either parameter is NaN. Returns 'x' if both parameters are same.
    449 export fn nextafterf32(x: f32, y: f32) f32 = {
    450 	if (isnan(x) || isnan(y)) {
    451 		return x + y;
    452 	};
    453 	let ux = f32bits(x);
    454 	let uy = f32bits(y);
    455 	if (ux == uy) {
    456 		return x;
    457 	};
    458 
    459 	let absx = ux & 0x7fffffff, absy = uy & 0x7fffffff;
    460 	if (absx == 0) {
    461 		if (absy == 0) {
    462 			return x;
    463 		};
    464 		ux = uy & 0x80000000 | 1;
    465 	} else if (absx > absy || (ux ^ uy) & 0x80000000 != 0) {
    466 		ux -= 1;
    467 	} else {
    468 		ux += 1;
    469 	};
    470 	// TODO handle over/underflow
    471 	return f32frombits(ux);
    472 };
    473 
    474 // Returns the f64 that is closest to 'x' in direction of 'y' Returns NaN
    475 // if either parameter is NaN. Returns 'x' if both parameters are same.
    476 export fn nextafterf64(x: f64, y: f64) f64 = {
    477 	if (isnan(x) || isnan(y)) {
    478 		return x + y;
    479 	};
    480 	let ux = f64bits(x);
    481 	let uy = f64bits(y);
    482 	if (ux == uy) {
    483 		return x;
    484 	};
    485 
    486 	let absx = ux & ~(1u64 << 63), absy = uy & ~(1u64 << 63);
    487 	if (absx == 0) {
    488 		if (absy == 0) {
    489 			return x;
    490 		};
    491 		ux = uy & (1u64 << 63) | 1u64;
    492 	} else if (absx > absy || (ux ^ uy) & (1u64 << 63) != 0) {
    493 		ux -= 1;
    494 	} else {
    495 		ux += 1;
    496 	};
    497 	// TODO handle over/underflow
    498 	return f64frombits(ux);
    499 };
    500 
    501 // Round a f32 to nearest integer value in floating point format
    502 fn nearbyintf32(x: f32) f32 = {
    503 	let n = f32bits(x);
    504 	let e = (n & F32_EXPONENT_MASK) >> F32_MANTISSA_BITS;
    505 	if (e >= F32_EXPONENT_BIAS + F32_MANTISSA_BITS) {
    506 		return x;
    507 	};
    508 	let s = n >> 31;
    509 	let y = if (s != 0)
    510 		x - 1.0 / F32_EPS + 1.0 / F32_EPS
    511 	else
    512 		x + 1.0 / F32_EPS - 1.0 / F32_EPS;
    513 
    514 	if (y == 0.0f32)
    515 		return if (s != 0) -0.0f32 else 0.0f32
    516 	else
    517 		return y;
    518 };
    519 
    520 // Round a f64 to nearest integer value in floating point format
    521 fn nearbyintf64(x: f64) f64 = {
    522 	let n = f64bits(x);
    523 	let e = (n & F64_EXPONENT_MASK) >> F64_MANTISSA_BITS;
    524 	if (e >= F64_EXPONENT_BIAS + F64_MANTISSA_BITS) {
    525 		return x;
    526 	};
    527 	let s = n >> 63;
    528 	let y = if (s != 0)
    529 		x - 1.0 / F64_EPS + 1.0 / F64_EPS
    530 	else
    531 		x + 1.0 / F64_EPS - 1.0 / F64_EPS;
    532 
    533 	if (y == 0.0f64)
    534 		return if (s != 0) -0.0f64 else 0.0f64
    535 	else
    536 		return y;
    537 };