floats.ha (17884B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use types; 5 6 // Returns the binary representation of the given f64. 7 export fn f64bits(n: f64) u64 = *(&n: *u64); 8 9 // Returns the binary representation of the given f32. 10 export fn f32bits(n: f32) u32 = *(&n: *u32); 11 12 // Returns f64 with the given binary representation. 13 export fn f64frombits(n: u64) f64 = *(&n: *f64); 14 15 // Returns f32 with the given binary representation. 16 export fn f32frombits(n: u32) f32 = *(&n: *f32); 17 18 // The number of bits in the significand of the binary representation of f64. 19 export def F64_MANTISSA_BITS: u64 = 52; 20 21 // The number of bits in the exponent of the binary representation of f64. 22 export def F64_EXPONENT_BITS: u64 = 11; 23 24 // The bias of the exponent of the binary representation of f64. Subtract this 25 // from the exponent in the binary representation to get the actual exponent. 26 export def F64_EXPONENT_BIAS: u16 = 1023; 27 28 // The number of bits in the significand of the binary representation of f32. 29 export def F32_MANTISSA_BITS: u64 = 23; 30 31 // The number of bits in the exponent of the binary representation of f32. 32 export def F32_EXPONENT_BITS: u64 = 8; 33 34 // The bias of the exponent of the binary representation of f32. Subtract this 35 // from the exponent in the binary representation to get the actual exponent. 36 export def F32_EXPONENT_BIAS: u16 = 127; 37 38 // Mask with each bit of an f64's mantissa set. 39 export def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1; 40 41 // Mask with each bit of an f64's exponent set. 42 export def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1; 43 44 // Mask with each bit of an f32's mantissa set. 45 export def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1; 46 47 // Mask with each bit of an f32's exponent set. 48 export def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1; 49 50 // The largest representable f64 value which is less than Infinity. 51 export def F64_MAX_NORMAL: f64 = 1.7976931348623157e+308; 52 53 // The smallest representable normal f64 value. 54 export def F64_MIN_NORMAL: f64 = 2.2250738585072014e-308; 55 56 // The smallest (subnormal) f64 value greater than zero. 57 export def F64_MIN: f64 = 5.0e-324; 58 59 // The difference between 1 and the smallest f64 representable value that is 60 // greater than 1. 61 export def F64_EPS: f64 = 2.22040000000000004884e-16; 62 63 // The largest representable f32 value which is less than Infinity. 64 export def F32_MAX_NORMAL: f32 = 3.4028234e+38; 65 66 // The smallest representable normal f32 value. 67 export def F32_MIN_NORMAL: f32 = 1.1754944e-38; 68 69 // The smallest (subnormal) f32 value greater than zero. 70 export def F32_MIN: f32 = 1.0e-45; 71 72 // The difference between 1 and the smallest f32 representable value that is 73 // greater than 1. 74 export def F32_EPS: f32 = 1.1920928955078125e-7; 75 76 // The mask that gets an f64's sign. 77 def F64_SIGN_MASK: u64 = 1u64 << 63; 78 79 // The mask that sets all exponent bits to 0. 80 // NOTE: Replace with the following expression once the lexer supports it 81 // 0u64 & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS); 82 def F64_EXP_REMOVAL_MASK: u64 = 83 0b1000000000001111111111111111111111111111111111111111111111111111; 84 85 // The f64 that contains only an exponent that evaluates to zero. 86 def F64_EXP_ZERO: u64 = ((F64_EXPONENT_BIAS: u64) - 1) << F64_MANTISSA_BITS; 87 88 // The mask that gets an f32's sign. 89 def F32_SIGN_MASK: u32 = 1u32 << 31; 90 91 // The mask that sets all exponent bits to 0. 92 // NOTE: Replace with the following expression once the lexer supports it 93 // 0u32 & ~(F32_EXPONENT_MASK << F32_MANTISSA_BITS); 94 def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111; 95 96 // The f32 that contains only an exponent that evaluates to zero. 97 def F32_EXP_ZERO: u32 = 98 ((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32); 99 100 // The bits that represent the number 1f64. 101 def F64_ONE: u64 = 0x3FF0000000000000; 102 103 // Contains information about the structure of a specific floating point number 104 // type. 105 export type floatinfo = struct { 106 // Bits in significand. 107 mantbits: u64, 108 // Bits in exponent. 109 expbits: u64, 110 // Bias of exponent. 111 expbias: int, 112 // Mask for mantissa. 113 mantmask: u64, 114 // Mask for exponent. 115 expmask: u64, 116 }; 117 118 // A [[floatinfo]] structure defining the structure of the f64 type. 119 export const f64info: floatinfo = floatinfo { 120 mantbits = 52, 121 expbits = 11, 122 expbias = 1023, 123 mantmask = (1 << 52) - 1, 124 expmask = (1 << 11) - 1, 125 }; 126 127 // A [[floatinfo]] structure defining the structure of the f32 type. 128 export const f32info: floatinfo = floatinfo { 129 mantbits = 23, 130 expbits = 8, 131 expbias = 127, 132 mantmask = (1 << 23) - 1, 133 expmask = (1 << 8) - 1, 134 }; 135 136 // The floating point value representing Not a Number, i.e. an undefined or 137 // unrepresentable value. You cannot test if a number is NaN by comparing to 138 // this value; see [[isnan]] instead. 139 export def NAN: f32 = 0.0 / 0.0; 140 141 // The floating point value representing positive infinity. Use -[[INF]] for 142 // negative infinity. 143 export def INF: f32 = 1.0 / 0.0; 144 145 // Returns true if the given floating-point number is NaN. 146 export fn isnan(n: f64) bool = n != n; 147 148 // Returns true if the given floating-point number is infinite. 149 export fn isinf(n: f64) bool = { 150 const bits = f64bits(n); 151 const mant = bits & F64_MANTISSA_MASK; 152 const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK; 153 return exp == F64_EXPONENT_MASK && mant == 0; 154 }; 155 156 @test fn isinf() void = { 157 assert(isinf(INF)); 158 assert(isinf(-INF)); 159 assert(!isinf(NAN)); 160 assert(!isinf(1.23)); 161 assert(!isinf(-1.23f32)); 162 }; 163 164 // Returns true if the given floating-point number is normal. 165 export fn isnormal(n: types::floating) bool = { 166 match (n) { 167 case let n: f32 => 168 return isnormalf32(n); 169 case let n: f64 => 170 return isnormalf64(n); 171 }; 172 }; 173 174 // Returns true if the given f64 is normal. 175 export fn isnormalf64(n: f64) bool = { 176 const bits = f64bits(n); 177 const mant = bits & F64_MANTISSA_MASK; 178 const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK; 179 return exp != F64_EXPONENT_MASK && (exp > 0 || mant == 0); 180 }; 181 182 // Returns true if the given f32 is normal. 183 export fn isnormalf32(n: f32) bool = { 184 const bits = f32bits(n); 185 const mant = bits & F32_MANTISSA_MASK; 186 const exp = bits >> F32_MANTISSA_BITS & F32_EXPONENT_MASK; 187 return exp != F32_EXPONENT_MASK && (exp > 0 || mant == 0); 188 }; 189 190 // Returns true if the given floating-point number is subnormal. 191 export fn issubnormal(n: types::floating) bool = { 192 match (n) { 193 case let n: f32 => 194 return issubnormalf32(n); 195 case let n: f64 => 196 return issubnormalf64(n); 197 }; 198 }; 199 200 // Returns true if the given f64 is subnormal. 201 export fn issubnormalf64(n: f64) bool = { 202 const bits = f64bits(n); 203 const mant = bits & F64_MANTISSA_MASK; 204 const exp = bits >> F64_MANTISSA_BITS & F64_EXPONENT_MASK; 205 return exp == 0 && mant != 0; 206 }; 207 208 // Returns true if the given f32 is subnormal. 209 export fn issubnormalf32(n: f32) bool = { 210 const bits = f32bits(n); 211 const mant = bits & F32_MANTISSA_MASK; 212 const exp = bits >> F32_MANTISSA_BITS & F32_EXPONENT_MASK; 213 return exp == 0 && mant != 0; 214 }; 215 216 // Returns the absolute value of f64 n. 217 export fn absf64(n: f64) f64 = { 218 if (isnan(n)) { 219 return n; 220 }; 221 return f64frombits(f64bits(n) & ~F64_SIGN_MASK); 222 }; 223 224 // Returns the absolute value of f32 n. 225 export fn absf32(n: f32) f32 = { 226 if (isnan(n)) { 227 return n; 228 }; 229 return f32frombits(f32bits(n) & ~F32_SIGN_MASK); 230 }; 231 232 // Returns the absolute value of floating-point number n. 233 export fn absf(n: types::floating) f64 = { 234 match (n) { 235 case let n: f64 => 236 return absf64(n); 237 case let n: f32 => 238 return (absf32(n): f64); 239 }; 240 }; 241 242 // Returns 1 if x is positive and -1 if x is negative. Note that zero is also 243 // signed. 244 export fn signf64(x: f64) i64 = { 245 if (f64bits(x) & F64_SIGN_MASK == 0) { 246 return 1i64; 247 } else { 248 return -1i64; 249 }; 250 }; 251 252 // Returns 1 if x is positive and -1 if x is negative. Note that zero is also 253 // signed. 254 export fn signf32(x: f32) i64 = { 255 if (f32bits(x) & F32_SIGN_MASK == 0) { 256 return 1i64; 257 } else { 258 return -1i64; 259 }; 260 }; 261 262 // Returns 1 if x is positive and -1 if x is negative. Note that zero is also 263 // signed. 264 export fn signf(x: types::floating) i64 = { 265 match (x) { 266 case let n: f64 => 267 return signf64(n); 268 case let n: f32 => 269 return signf32(n); 270 }; 271 }; 272 273 // Returns whether or not x is positive. 274 export fn ispositivef64(x: f64) bool = signf64(x) == 1i64; 275 276 // Returns whether or not x is positive. 277 export fn ispositivef32(x: f32) bool = signf32(x) == 1i32; 278 279 // Returns whether or not x is positive. 280 export fn ispositive(x: types::floating) bool = { 281 match (x) { 282 case let n: f64 => 283 return ispositivef64(n); 284 case let n: f32 => 285 return ispositivef32(n); 286 }; 287 }; 288 289 // Returns whether or not x is negative. 290 export fn isnegativef64(x: f64) bool = signf64(x) == -1i64; 291 292 // Returns whether or not x is negative. 293 export fn isnegativef32(x: f32) bool = signf32(x) == -1i32; 294 295 // Returns whether or not x is negative. 296 export fn isnegative(x: types::floating) bool = { 297 match (x) { 298 case let n: f64 => 299 return isnegativef64(n); 300 case let n: f32 => 301 return isnegativef32(n); 302 }; 303 }; 304 305 // Returns x, but with the sign of y. 306 export fn copysignf64(x: f64, y: f64) f64 = { 307 return f64frombits((f64bits(x) & ~F64_SIGN_MASK) | 308 (f64bits(y) & F64_SIGN_MASK)); 309 }; 310 311 // Returns x, but with the sign of y. 312 export fn copysignf32(x: f32, y: f32) f32 = { 313 return f32frombits((f32bits(x) & ~F32_SIGN_MASK) | 314 (f32bits(y) & F32_SIGN_MASK)); 315 }; 316 317 // Returns x, but with the sign of y. 318 export fn copysign(x: types::floating, y: types::floating) f64 = { 319 match (x) { 320 case let n: f64 => 321 return copysignf64(n, (y: f64)); 322 case let n: f32 => 323 return (copysignf32(n, (y: f32)): f64); 324 }; 325 }; 326 327 // Takes a potentially subnormal f64 n and returns a normal f64 normal_float 328 // and an exponent exp such that n == normal_float * 2^{exp}. 329 export fn normalizef64(n: f64) (f64, i64) = { 330 if (issubnormalf64(n)) { 331 const factor = 1i64 << (F64_MANTISSA_BITS: i64); 332 const normal_float = (n * (factor: f64)); 333 return (normal_float, -(F64_MANTISSA_BITS: i64)); 334 }; 335 return (n, 0); 336 }; 337 338 // Takes a potentially subnormal f32 n and returns a normal f32 normal_float 339 // and an exponent exp such that n == normal_float * 2^{exp}. 340 export fn normalizef32(n: f32) (f32, i64) = { 341 if (issubnormalf32(n)) { 342 const factor = 1i32 << (F32_MANTISSA_BITS: i32); 343 const normal_float = n * factor: f32; 344 return (normal_float, -(F32_MANTISSA_BITS: i64)); 345 }; 346 return (n, 0); 347 }; 348 349 // Breaks a f64 down into its mantissa and exponent. The mantissa will be 350 // between 0.5 and 1. 351 export fn frexpf64(n: f64) (f64, i64) = { 352 if (isnan(n) || isinf(n) || n == 0f64) { 353 return (n, 0); 354 }; 355 const normalized = normalizef64(n); 356 const normal_float = normalized.0; 357 const normalization_exp = normalized.1; 358 const bits = f64bits(normal_float); 359 const raw_exp: u64 = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK; 360 const exp: i64 = normalization_exp + 361 (raw_exp: i64) - (F64_EXPONENT_BIAS: i64) + 1; 362 const mantissa: f64 = 363 f64frombits((bits & F64_EXP_REMOVAL_MASK) | F64_EXP_ZERO); 364 return (mantissa, exp); 365 }; 366 367 // Breaks a f32 down into its mantissa and exponent. The mantissa will be 368 // between 0.5 and 1. 369 export fn frexpf32(n: f32) (f32, i64) = { 370 if (isnan(n) || isinf(n) || n == 0f32) { 371 return (n, 0); 372 }; 373 const normalized = normalizef32(n); 374 const normal_float = normalized.0; 375 const normalization_exp = normalized.1; 376 const bits = f32bits(normal_float); 377 const raw_exp: u64 = ((bits >> (F32_MANTISSA_BITS: u32)) & 378 (F32_EXPONENT_MASK: u32)); 379 const exp: i64 = normalization_exp + 380 (raw_exp: i64) - (F32_EXPONENT_BIAS: i64) + 1; 381 const mantissa: f32 = 382 f32frombits((bits & F32_EXP_REMOVAL_MASK) | F32_EXP_ZERO); 383 return (mantissa, exp); 384 }; 385 386 // Breaks a float down into its mantissa and exponent. The mantissa will be 387 // between 0.5 and 1. 388 export fn frexp(n: types::floating) (f64, i64) = { 389 match (n) { 390 case let n: f64 => 391 return frexpf64(n); 392 case let n: f32 => 393 let (mantissa, exp) = frexpf32(n); 394 return (mantissa, exp); 395 }; 396 }; 397 398 // Creates an f64 from a mantissa and an exponent. 399 export fn ldexpf64(mantissa: f64, exp: i64) f64 = { 400 if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f64) { 401 return mantissa; 402 }; 403 const normalized = normalizef64(mantissa); 404 const normal_float = normalized.0; 405 const normalization_exp = normalized.1; 406 const bits = f64bits(normal_float); 407 const mantissa_exp = 408 (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) - 409 (F64_EXPONENT_BIAS: i64); 410 let res_exp = exp + normalization_exp + mantissa_exp; 411 // Underflow 412 if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) { 413 return copysign(0f64, mantissa); 414 }; 415 // Overflow 416 if (res_exp > (F64_EXPONENT_BIAS: i64)) { 417 if (mantissa < 0f64) { 418 return -INF; 419 } else { 420 return INF; 421 }; 422 }; 423 // Subnormal 424 let subnormal_factor = 1f64; 425 if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) { 426 res_exp += (F64_MANTISSA_BITS: i64) - 1; 427 subnormal_factor = 1f64 / 428 ((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64); 429 }; 430 const res: u64 = (bits & F64_EXP_REMOVAL_MASK) | 431 ( 432 ((res_exp: u64) + F64_EXPONENT_BIAS) 433 << F64_MANTISSA_BITS 434 ); 435 return subnormal_factor * f64frombits(res); 436 }; 437 438 // Creates an f32 from a mantissa and an exponent. 439 export fn ldexpf32(mantissa: f32, exp: i64) f32 = { 440 if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f32) { 441 return mantissa; 442 }; 443 const normalized = normalizef32(mantissa); 444 const normal_float = normalized.0; 445 const normalization_exp = normalized.1; 446 const bits = f32bits(normal_float); 447 const mantissa_exp = 448 (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) - 449 (F32_EXPONENT_BIAS: i32); 450 let res_exp = exp + normalization_exp + mantissa_exp; 451 // Underflow 452 if (res_exp < -(F32_EXPONENT_BIAS: i32) - (F32_MANTISSA_BITS: i32)) { 453 return copysignf32(0.0f32, mantissa); 454 }; 455 // Overflow 456 if (res_exp > (F32_EXPONENT_BIAS: i32)) { 457 if (mantissa < 0.0f32) { 458 return -INF; 459 } else { 460 return INF; 461 }; 462 }; 463 // Subnormal 464 let subnormal_factor = 1.0f32; 465 if (res_exp < -(F32_EXPONENT_BIAS: i32) + 1) { 466 res_exp += (F32_MANTISSA_BITS: i32) - 1; 467 subnormal_factor = 1.0f32 / 468 ((1i32 << ((F32_MANTISSA_BITS: i32) - 1)): f32); 469 }; 470 const res: u32 = (bits & F32_EXP_REMOVAL_MASK) | 471 ( 472 ((res_exp: u32) + F32_EXPONENT_BIAS) 473 << (F32_MANTISSA_BITS: u32) 474 ); 475 return subnormal_factor * f32frombits(res); 476 }; 477 478 // Returns the integer and fractional parts of an f64. 479 export fn modfracf64(n: f64) (f64, f64) = { 480 if (n < 1f64) { 481 if (n < 0f64) { 482 let positive_parts = modfracf64(-n); 483 return (-positive_parts.0, -positive_parts.1); 484 }; 485 if (n == 0f64) { 486 return (n, n); 487 }; 488 return (0f64, n); 489 }; 490 let bits = f64bits(n); 491 const exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) - 492 (F64_EXPONENT_BIAS: i64); 493 // For exponent exp, all integers can be represented with the top exp 494 // bits of the mantissa 495 const sign_and_exp_bits = 64u64 - (F64_EXPONENT_BITS: u64) - 1u64; 496 if (exp < (sign_and_exp_bits: i64)) { 497 const bits_to_shift = (((sign_and_exp_bits: i64) - exp): u64); 498 bits = bits & ~((1u64 << bits_to_shift) - 1); 499 }; 500 const int_part = f64frombits(bits); 501 const frac_part = n - int_part; 502 return (int_part, frac_part); 503 }; 504 505 // Returns the integer and fractional parts of an f32. 506 export fn modfracf32(n: f32) (f32, f32) = { 507 if (n < 1.0f32) { 508 if (n < 0.0f32) { 509 let positive_parts = modfracf32(-n); 510 return (-positive_parts.0, -positive_parts.1); 511 }; 512 if (n == 0.0f32) { 513 return (n, n); 514 }; 515 return (0f32, n); 516 }; 517 let bits = f32bits(n); 518 const exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) - 519 (F32_EXPONENT_BIAS: i32); 520 // For exponent exp, all integers can be represented with the top exp 521 // bits of the mantissa 522 const sign_and_exp_bits = 32u32 - (F32_EXPONENT_BITS: u32) - 1u32; 523 if (exp < (sign_and_exp_bits: i32)) { 524 const bits_to_shift = (((sign_and_exp_bits: i32) - exp): u32); 525 bits = bits & ~((1u32 << bits_to_shift) - 1); 526 }; 527 const int_part = f32frombits(bits); 528 const frac_part = n - int_part; 529 return (int_part, frac_part); 530 }; 531 532 // Returns the f32 that is closest to 'x' in direction of 'y'. Returns NaN 533 // if either parameter is NaN. Returns 'x' if both parameters are same. 534 export fn nextafterf32(x: f32, y: f32) f32 = { 535 if (isnan(x) || isnan(y)) { 536 return x + y; 537 }; 538 let ux = f32bits(x); 539 let uy = f32bits(y); 540 if (ux == uy) { 541 return x; 542 }; 543 544 let absx = ux & 0x7fffffff, absy = uy & 0x7fffffff; 545 if (absx == 0) { 546 if (absy == 0) { 547 return x; 548 }; 549 ux = uy & 0x80000000 | 1; 550 } else if (absx > absy || (ux ^ uy) & 0x80000000 != 0) { 551 ux -= 1; 552 } else { 553 ux += 1; 554 }; 555 // TODO handle over/underflow 556 return f32frombits(ux); 557 }; 558 559 // Returns the f64 that is closest to 'x' in direction of 'y' Returns NaN 560 // if either parameter is NaN. Returns 'x' if both parameters are same. 561 export fn nextafterf64(x: f64, y: f64) f64 = { 562 if (isnan(x) || isnan(y)) { 563 return x + y; 564 }; 565 let ux = f64bits(x); 566 let uy = f64bits(y); 567 if (ux == uy) { 568 return x; 569 }; 570 571 let absx = ux & ~(1u64 << 63), absy = uy & ~(1u64 << 63); 572 if (absx == 0) { 573 if (absy == 0) { 574 return x; 575 }; 576 ux = uy & (1u64 << 63) | 1u64; 577 } else if (absx > absy || (ux ^ uy) & (1u64 << 63) != 0) { 578 ux -= 1; 579 } else { 580 ux += 1; 581 }; 582 // TODO handle over/underflow 583 return f64frombits(ux); 584 }; 585 586 // Round a f32 to nearest integer value in floating point format 587 fn nearbyintf32(x: f32) f32 = { 588 let n = f32bits(x); 589 let e = n >> 23 & 0xff; 590 if (e >= 0x7f + 23) { 591 return x; 592 }; 593 let s = n >> 31; 594 let y = if (s != 0) 595 x - 1.0 / F32_EPS + 1.0 / F32_EPS 596 else 597 x + 1.0 / F32_EPS - 1.0 / F32_EPS; 598 599 if (y == 0.0f32) 600 return if (s != 0) -0.0f32 else 0.0f32 601 else 602 return y; 603 }; 604 605 // Round a f64 to nearest integer value in floating point format 606 fn nearbyintf64(x: f64) f64 = { 607 let n = f64bits(x); 608 let e = n >> 52 & 0x7ff; 609 if (e >= 0x3ff + 52) { 610 return x; 611 }; 612 let s = n >> 63; 613 let y = if (s != 0) 614 x - 1.0 / F64_EPS + 1.0 / F64_EPS 615 else 616 x + 1.0 / F64_EPS - 1.0 / F64_EPS; 617 618 if (y == 0.0f64) 619 return if (s != 0) -0.0f64 else 0.0f64 620 else 621 return y; 622 };