floats.ha (16109B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Returns the binary representation of the given f64. 5 export fn f64bits(n: f64) u64 = *(&n: *u64); 6 7 // Returns the binary representation of the given f32. 8 export fn f32bits(n: f32) u32 = *(&n: *u32); 9 10 // Returns f64 with the given binary representation. 11 export fn f64frombits(n: u64) f64 = *(&n: *f64); 12 13 // Returns f32 with the given binary representation. 14 export fn f32frombits(n: u32) f32 = *(&n: *f32); 15 16 // The number of bits in the significand of the binary representation of f64. 17 export def F64_MANTISSA_BITS = 52; 18 19 // The number of bits in the exponent of the binary representation of f64. 20 export def F64_EXPONENT_BITS = 11; 21 22 // The bias of the exponent of the binary representation of f64. Subtract this 23 // from the exponent in the binary representation to get the actual exponent. 24 export def F64_EXPONENT_BIAS = 1023; 25 26 // The number of bits in the significand of the binary representation of f32. 27 export def F32_MANTISSA_BITS = 23; 28 29 // The number of bits in the exponent of the binary representation of f32. 30 export def F32_EXPONENT_BITS = 8; 31 32 // The bias of the exponent of the binary representation of f32. Subtract this 33 // from the exponent in the binary representation to get the actual exponent. 34 export def F32_EXPONENT_BIAS = 127; 35 36 // Mask with each bit of an f64's mantissa set. 37 export def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1; 38 39 // Mask with each bit of an f64's exponent set. 40 export def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1; 41 42 // Mask with each bit of an f32's mantissa set. 43 export def F32_MANTISSA_MASK: u32 = (1 << F32_MANTISSA_BITS) - 1; 44 45 // Mask with each bit of an f32's exponent set. 46 export def F32_EXPONENT_MASK: u32 = (1 << F32_EXPONENT_BITS) - 1; 47 48 // The largest representable f64 value which is less than Infinity. 49 export def F64_MAX_NORMAL: f64 = 1.7976931348623157e+308; 50 51 // The smallest representable normal f64 value. 52 export def F64_MIN_NORMAL: f64 = 2.2250738585072014e-308; 53 54 // The smallest (subnormal) f64 value greater than zero. 55 export def F64_MIN_SUBNORMAL: f64 = 5.0e-324; 56 57 // The difference between 1 and the smallest f64 representable value that is 58 // greater than 1. 59 export def F64_EPS: f64 = 2.22040000000000004884e-16; 60 61 // The largest representable f32 value which is less than Infinity. 62 export def F32_MAX_NORMAL: f32 = 3.4028234e+38; 63 64 // The smallest representable normal f32 value. 65 export def F32_MIN_NORMAL: f32 = 1.1754944e-38; 66 67 // The smallest (subnormal) f32 value greater than zero. 68 export def F32_MIN_SUBNORMAL: f32 = 1.0e-45; 69 70 // The difference between 1 and the smallest f32 representable value that is 71 // greater than 1. 72 export def F32_EPS: f32 = 1.1920928955078125e-7; 73 74 // The mask that gets an f64's sign. 75 def F64_SIGN_MASK: u64 = 1u64 << 63; 76 77 // The mask that sets all exponent bits to 0. 78 // NOTE: Replace with the following expression once the lexer supports it 79 // 0u64 & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS); 80 def F64_EXP_REMOVAL_MASK: u64 = 81 0b1000000000001111111111111111111111111111111111111111111111111111; 82 83 // The f64 that contains only an exponent that evaluates to zero. 84 def F64_EXP_ZERO: u64 = ((F64_EXPONENT_BIAS: u64) - 1) << F64_MANTISSA_BITS; 85 86 // The mask that gets an f32's sign. 87 def F32_SIGN_MASK: u32 = 1u32 << 31; 88 89 // The mask that sets all exponent bits to 0. 90 // NOTE: Replace with the following expression once the lexer supports it 91 // 0u32 & ~(F32_EXPONENT_MASK << F32_MANTISSA_BITS); 92 def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111; 93 94 // The f32 that contains only an exponent that evaluates to zero. 95 def F32_EXP_ZERO: u32 = 96 ((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32); 97 98 // The bits that represent the number 1f64. 99 def F64_ONE: u64 = 0x3FF0000000000000; 100 101 // Contains information about the structure of a specific floating point number 102 // type. 103 export type floatinfo = struct { 104 // Bits in significand. 105 mantbits: u64, 106 // Bits in exponent. 107 expbits: u64, 108 // Bias of exponent. 109 expbias: int, 110 // Mask for mantissa. 111 mantmask: u64, 112 // Mask for exponent. 113 expmask: u64, 114 }; 115 116 // A [[floatinfo]] structure defining the structure of the f64 type. 117 export const f64info: floatinfo = floatinfo { 118 mantbits = 52, 119 expbits = 11, 120 expbias = 1023, 121 mantmask = (1 << 52) - 1, 122 expmask = (1 << 11) - 1, 123 }; 124 125 // A [[floatinfo]] structure defining the structure of the f32 type. 126 export const f32info: floatinfo = floatinfo { 127 mantbits = 23, 128 expbits = 8, 129 expbias = 127, 130 mantmask = (1 << 23) - 1, 131 expmask = (1 << 8) - 1, 132 }; 133 134 // The floating point value representing Not a Number, i.e. an undefined or 135 // unrepresentable value. You cannot test if a number is NaN by comparing to 136 // this value; see [[isnan]] instead. 137 export def NAN = 0.0 / 0.0; 138 139 // The floating point value representing positive infinity. Use -[[INF]] for 140 // negative infinity. 141 export def INF = 1.0 / 0.0; 142 143 // Returns true if the given floating-point number is NaN. 144 export fn isnan(n: f64) bool = n != n; 145 146 // Returns true if the given floating-point number is infinite. 147 export fn isinf(n: f64) bool = { 148 const bits = f64bits(n); 149 const mant = bits & F64_MANTISSA_MASK; 150 const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK; 151 return exp == F64_EXPONENT_MASK && mant == 0; 152 }; 153 154 @test fn isinf() void = { 155 assert(isinf(INF)); 156 assert(isinf(-INF)); 157 assert(!isinf(NAN)); 158 assert(!isinf(1.23)); 159 assert(!isinf(-1.23f32)); 160 }; 161 162 // Returns true if the given f64 is normal. 163 export fn isnormalf64(n: f64) bool = { 164 const bits = f64bits(n); 165 const mant = bits & F64_MANTISSA_MASK; 166 const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK; 167 return exp != F64_EXPONENT_MASK && (exp > 0 || mant == 0); 168 }; 169 170 // Returns true if the given f32 is normal. 171 export fn isnormalf32(n: f32) bool = { 172 const bits = f32bits(n); 173 const mant = bits & F32_MANTISSA_MASK; 174 const exp = bits >> F32_MANTISSA_BITS & F32_EXPONENT_MASK; 175 return exp != F32_EXPONENT_MASK && (exp > 0 || mant == 0); 176 }; 177 178 // Returns true if the given f64 is subnormal. 179 export fn issubnormalf64(n: f64) bool = { 180 const bits = f64bits(n); 181 const mant = bits & F64_MANTISSA_MASK; 182 const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK; 183 return exp == 0 && mant != 0; 184 }; 185 186 // Returns true if the given f32 is subnormal. 187 export fn issubnormalf32(n: f32) bool = { 188 const bits = f32bits(n); 189 const mant = bits & F32_MANTISSA_MASK; 190 const exp = bits >> F32_MANTISSA_BITS & F32_EXPONENT_MASK; 191 return exp == 0 && mant != 0; 192 }; 193 194 // Returns the absolute value of f64 n. 195 export fn absf64(n: f64) f64 = { 196 if (isnan(n)) { 197 return n; 198 }; 199 return f64frombits(f64bits(n) & ~F64_SIGN_MASK); 200 }; 201 202 // Returns the absolute value of f32 n. 203 export fn absf32(n: f32) f32 = { 204 if (isnan(n)) { 205 return n; 206 }; 207 return f32frombits(f32bits(n) & ~F32_SIGN_MASK); 208 }; 209 210 // Returns 1 if x is positive and -1 if x is negative. Note that zero is also 211 // signed. 212 export fn signf64(x: f64) i64 = { 213 if (f64bits(x) & F64_SIGN_MASK == 0) { 214 return 1i64; 215 } else { 216 return -1i64; 217 }; 218 }; 219 220 // Returns 1 if x is positive and -1 if x is negative. Note that zero is also 221 // signed. 222 export fn signf32(x: f32) i64 = { 223 if (f32bits(x) & F32_SIGN_MASK == 0) { 224 return 1i64; 225 } else { 226 return -1i64; 227 }; 228 }; 229 230 // Returns whether or not x is positive. 231 export fn ispositivef64(x: f64) bool = signf64(x) == 1i64; 232 233 // Returns whether or not x is positive. 234 export fn ispositivef32(x: f32) bool = signf32(x) == 1i32; 235 236 // Returns whether or not x is negative. 237 export fn isnegativef64(x: f64) bool = signf64(x) == -1i64; 238 239 // Returns whether or not x is negative. 240 export fn isnegativef32(x: f32) bool = signf32(x) == -1i32; 241 242 // Returns x, but with the sign of y. 243 export fn copysignf64(x: f64, y: f64) f64 = { 244 return f64frombits((f64bits(x) & ~F64_SIGN_MASK) | 245 (f64bits(y) & F64_SIGN_MASK)); 246 }; 247 248 // Returns x, but with the sign of y. 249 export fn copysignf32(x: f32, y: f32) f32 = { 250 return f32frombits((f32bits(x) & ~F32_SIGN_MASK) | 251 (f32bits(y) & F32_SIGN_MASK)); 252 }; 253 254 // Takes a potentially subnormal f64 n and returns a normal f64 normal_float 255 // and an exponent exp such that n == normal_float * 2^{exp}. 256 export fn normalizef64(n: f64) (f64, i64) = { 257 if (issubnormalf64(n)) { 258 const factor = 1i64 << (F64_MANTISSA_BITS: i64); 259 const normal_float = (n * (factor: f64)); 260 return (normal_float, -(F64_MANTISSA_BITS: i64)); 261 }; 262 return (n, 0); 263 }; 264 265 // Takes a potentially subnormal f32 n and returns a normal f32 normal_float 266 // and an exponent exp such that n == normal_float * 2^{exp}. 267 export fn normalizef32(n: f32) (f32, i64) = { 268 if (issubnormalf32(n)) { 269 const factor = 1i32 << (F32_MANTISSA_BITS: i32); 270 const normal_float = n * factor: f32; 271 return (normal_float, -(F32_MANTISSA_BITS: i64)); 272 }; 273 return (n, 0); 274 }; 275 276 // Breaks a f64 down into its mantissa and exponent. The mantissa will be 277 // between 0.5 and 1. 278 export fn frexpf64(n: f64) (f64, i64) = { 279 if (isnan(n) || isinf(n) || n == 0f64) { 280 return (n, 0); 281 }; 282 const normalized = normalizef64(n); 283 const normal_float = normalized.0; 284 const normalization_exp = normalized.1; 285 const bits = f64bits(normal_float); 286 const raw_exp: u64 = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK; 287 const exp: i64 = normalization_exp + 288 (raw_exp: i64) - (F64_EXPONENT_BIAS: i64) + 1; 289 const mantissa: f64 = 290 f64frombits((bits & F64_EXP_REMOVAL_MASK) | F64_EXP_ZERO); 291 return (mantissa, exp); 292 }; 293 294 // Breaks a f32 down into its mantissa and exponent. The mantissa will be 295 // between 0.5 and 1. 296 export fn frexpf32(n: f32) (f32, i64) = { 297 if (isnan(n) || isinf(n) || n == 0f32) { 298 return (n, 0); 299 }; 300 const normalized = normalizef32(n); 301 const normal_float = normalized.0; 302 const normalization_exp = normalized.1; 303 const bits = f32bits(normal_float); 304 const raw_exp: u64 = (bits >> F32_MANTISSA_BITS) 305 & F32_EXPONENT_MASK: u32; 306 const exp: i64 = normalization_exp + 307 (raw_exp: i64) - (F32_EXPONENT_BIAS: i64) + 1; 308 const mantissa: f32 = 309 f32frombits((bits & F32_EXP_REMOVAL_MASK) | F32_EXP_ZERO); 310 return (mantissa, exp); 311 }; 312 313 // Creates an f64 from a mantissa and an exponent. 314 export fn ldexpf64(mantissa: f64, exp: i64) f64 = { 315 if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f64) { 316 return mantissa; 317 }; 318 const normalized = normalizef64(mantissa); 319 const normal_float = normalized.0; 320 const normalization_exp = normalized.1; 321 const bits = f64bits(normal_float); 322 const mantissa_exp = 323 (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) - 324 (F64_EXPONENT_BIAS: i64); 325 let res_exp = exp + normalization_exp + mantissa_exp; 326 // Underflow 327 if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) { 328 return copysignf64(0f64, mantissa); 329 }; 330 // Overflow 331 if (res_exp > (F64_EXPONENT_BIAS: i64)) { 332 if (mantissa < 0f64) { 333 return -INF; 334 } else { 335 return INF; 336 }; 337 }; 338 // Subnormal 339 let subnormal_factor = 1f64; 340 if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) { 341 res_exp += (F64_MANTISSA_BITS: i64) - 1; 342 subnormal_factor = 1f64 / 343 ((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64); 344 }; 345 const res: u64 = (bits & F64_EXP_REMOVAL_MASK) | 346 ( 347 ((res_exp: u64) + F64_EXPONENT_BIAS) 348 << F64_MANTISSA_BITS 349 ); 350 return subnormal_factor * f64frombits(res); 351 }; 352 353 // Creates an f32 from a mantissa and an exponent. 354 export fn ldexpf32(mantissa: f32, exp: i64) f32 = { 355 if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f32) { 356 return mantissa; 357 }; 358 const normalized = normalizef32(mantissa); 359 const normal_float = normalized.0; 360 const normalization_exp = normalized.1; 361 const bits = f32bits(normal_float); 362 const mantissa_exp = 363 (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) - 364 (F32_EXPONENT_BIAS: i32); 365 let res_exp = exp + normalization_exp + mantissa_exp; 366 // Underflow 367 if (res_exp < -(F32_EXPONENT_BIAS: i32) - (F32_MANTISSA_BITS: i32)) { 368 return copysignf32(0.0f32, mantissa); 369 }; 370 // Overflow 371 if (res_exp > (F32_EXPONENT_BIAS: i32)) { 372 if (mantissa < 0.0f32) { 373 return -INF; 374 } else { 375 return INF; 376 }; 377 }; 378 // Subnormal 379 let subnormal_factor = 1.0f32; 380 if (res_exp < -(F32_EXPONENT_BIAS: i32) + 1) { 381 res_exp += (F32_MANTISSA_BITS: i32) - 1; 382 subnormal_factor = 1.0f32 / 383 ((1i32 << ((F32_MANTISSA_BITS: i32) - 1)): f32); 384 }; 385 const res: u32 = (bits & F32_EXP_REMOVAL_MASK) | 386 ( 387 ((res_exp: u32) + F32_EXPONENT_BIAS) 388 << (F32_MANTISSA_BITS: u32) 389 ); 390 return subnormal_factor * f32frombits(res); 391 }; 392 393 // Returns the integer and fractional parts of an f64. 394 export fn modfracf64(n: f64) (f64, f64) = { 395 if (n < 1f64) { 396 if (n < 0f64) { 397 let positive_parts = modfracf64(-n); 398 return (-positive_parts.0, -positive_parts.1); 399 }; 400 if (n == 0f64) { 401 return (n, n); 402 }; 403 return (0f64, n); 404 }; 405 let bits = f64bits(n); 406 const exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) - 407 (F64_EXPONENT_BIAS: i64); 408 // For exponent exp, all integers can be represented with the top exp 409 // bits of the mantissa 410 const sign_and_exp_bits = 64u64 - (F64_EXPONENT_BITS: u64) - 1u64; 411 if (exp < (sign_and_exp_bits: i64)) { 412 const bits_to_shift = (((sign_and_exp_bits: i64) - exp): u64); 413 bits = bits & ~((1u64 << bits_to_shift) - 1); 414 }; 415 const int_part = f64frombits(bits); 416 const frac_part = n - int_part; 417 return (int_part, frac_part); 418 }; 419 420 // Returns the integer and fractional parts of an f32. 421 export fn modfracf32(n: f32) (f32, f32) = { 422 if (n < 1.0f32) { 423 if (n < 0.0f32) { 424 let positive_parts = modfracf32(-n); 425 return (-positive_parts.0, -positive_parts.1); 426 }; 427 if (n == 0.0f32) { 428 return (n, n); 429 }; 430 return (0f32, n); 431 }; 432 let bits = f32bits(n); 433 const exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) - 434 (F32_EXPONENT_BIAS: i32); 435 // For exponent exp, all integers can be represented with the top exp 436 // bits of the mantissa 437 const sign_and_exp_bits = 32u32 - (F32_EXPONENT_BITS: u32) - 1u32; 438 if (exp < (sign_and_exp_bits: i32)) { 439 const bits_to_shift = (((sign_and_exp_bits: i32) - exp): u32); 440 bits = bits & ~((1u32 << bits_to_shift) - 1); 441 }; 442 const int_part = f32frombits(bits); 443 const frac_part = n - int_part; 444 return (int_part, frac_part); 445 }; 446 447 // Returns the f32 that is closest to 'x' in direction of 'y'. Returns NaN 448 // if either parameter is NaN. Returns 'x' if both parameters are same. 449 export fn nextafterf32(x: f32, y: f32) f32 = { 450 if (isnan(x) || isnan(y)) { 451 return x + y; 452 }; 453 let ux = f32bits(x); 454 let uy = f32bits(y); 455 if (ux == uy) { 456 return x; 457 }; 458 459 let absx = ux & 0x7fffffff, absy = uy & 0x7fffffff; 460 if (absx == 0) { 461 if (absy == 0) { 462 return x; 463 }; 464 ux = uy & 0x80000000 | 1; 465 } else if (absx > absy || (ux ^ uy) & 0x80000000 != 0) { 466 ux -= 1; 467 } else { 468 ux += 1; 469 }; 470 // TODO handle over/underflow 471 return f32frombits(ux); 472 }; 473 474 // Returns the f64 that is closest to 'x' in direction of 'y' Returns NaN 475 // if either parameter is NaN. Returns 'x' if both parameters are same. 476 export fn nextafterf64(x: f64, y: f64) f64 = { 477 if (isnan(x) || isnan(y)) { 478 return x + y; 479 }; 480 let ux = f64bits(x); 481 let uy = f64bits(y); 482 if (ux == uy) { 483 return x; 484 }; 485 486 let absx = ux & ~(1u64 << 63), absy = uy & ~(1u64 << 63); 487 if (absx == 0) { 488 if (absy == 0) { 489 return x; 490 }; 491 ux = uy & (1u64 << 63) | 1u64; 492 } else if (absx > absy || (ux ^ uy) & (1u64 << 63) != 0) { 493 ux -= 1; 494 } else { 495 ux += 1; 496 }; 497 // TODO handle over/underflow 498 return f64frombits(ux); 499 }; 500 501 // Round a f32 to nearest integer value in floating point format 502 fn nearbyintf32(x: f32) f32 = { 503 let n = f32bits(x); 504 let e = (n & F32_EXPONENT_MASK) >> F32_MANTISSA_BITS; 505 if (e >= F32_EXPONENT_BIAS + F32_MANTISSA_BITS) { 506 return x; 507 }; 508 let s = n >> 31; 509 let y = if (s != 0) 510 x - 1.0 / F32_EPS + 1.0 / F32_EPS 511 else 512 x + 1.0 / F32_EPS - 1.0 / F32_EPS; 513 514 if (y == 0.0f32) 515 return if (s != 0) -0.0f32 else 0.0f32 516 else 517 return y; 518 }; 519 520 // Round a f64 to nearest integer value in floating point format 521 fn nearbyintf64(x: f64) f64 = { 522 let n = f64bits(x); 523 let e = (n & F64_EXPONENT_MASK) >> F64_MANTISSA_BITS; 524 if (e >= F64_EXPONENT_BIAS + F64_MANTISSA_BITS) { 525 return x; 526 }; 527 let s = n >> 63; 528 let y = if (s != 0) 529 x - 1.0 / F64_EPS + 1.0 / F64_EPS 530 else 531 x + 1.0 / F64_EPS - 1.0 / F64_EPS; 532 533 if (y == 0.0f64) 534 return if (s != 0) -0.0f64 else 0.0f64 535 else 536 return y; 537 };