stof.ha (16575B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 // (c) 2010 The Go Authors. All rights reserved. 4 5 // Using the Eisel-Lemire algorithm [1] for fast parsing of floating-point 6 // numbers, with Simple Decimal Conversion algorithm [2] as fallback. 7 // [1]: https://nigeltao.github.io/blog/2020/eisel-lemire.html 8 // [2]: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html 9 10 use ascii; 11 use math; 12 use strings; 13 14 fn todig(c: u8) u8 = { 15 if ('0' <= c && c <= '9') { 16 return c - '0'; 17 } else if ('a' <= c && c <= 'f') { 18 return c - 'a' + 10; 19 } else if ('A' <= c && c <= 'F') { 20 return c - 'A' + 10; 21 }; 22 abort("unreachable"); 23 }; 24 25 type fast_parsed_float = struct { 26 mantissa: u64, 27 exponent: i32, 28 negative: bool, 29 truncated: bool, 30 }; 31 32 fn fast_parse(s: str, b: base) (fast_parsed_float | invalid) = { 33 let buf = strings::toutf8(s); 34 let i = 0z, neg = false, trunc = false; 35 if (buf[i] == '-') { 36 neg = true; 37 i += 1; 38 } else if (buf[i] == '+') { 39 i += 1; 40 }; 41 42 let (expchr, max_ndmant, isdigit) = switch (b) { 43 case base::DEC => 44 yield ('e', 19, &ascii::isdigit); 45 case base::HEX => 46 yield ('p', 16, &ascii::isxdigit); 47 case => abort("unreachable"); 48 }; 49 50 let sawdot = false, sawdigits = false; 51 let nd = 0, ndmant = 0, dp = 0; 52 let mant = 0u64, exp = 0i32; 53 for (i < len(s); i += 1) { 54 if (buf[i] == '.') { 55 if (sawdot) return i: invalid; 56 sawdot = true; 57 dp = nd; 58 } else if (isdigit(buf[i]: rune)) { 59 sawdigits = true; 60 if (buf[i] == '0' && nd == 0) { 61 dp -= 1; 62 continue; 63 }; 64 nd += 1; 65 if (ndmant < max_ndmant) { 66 mant = mant * b + todig(buf[i]); 67 ndmant += 1; 68 } else if (buf[i] != '0') { 69 trunc = true; 70 }; 71 } else break; 72 }; 73 if (!sawdigits) return i: invalid; 74 if (!sawdot) { 75 dp = nd; 76 }; 77 if (b == base::HEX) { 78 dp *= 4; 79 ndmant *= 4; 80 }; 81 if (i < len(s) && ascii::tolower(buf[i]: rune) == expchr) { 82 i += 1; 83 if (i >= len(s)) return i: invalid; 84 let expsign: int = 1; 85 if (buf[i] == '+') { 86 i += 1; 87 } else if (buf[i] == '-') { 88 expsign = -1; 89 i += 1; 90 }; 91 if (i >= len(s) || !ascii::isdigit(buf[i]: rune)) 92 return i: invalid; 93 let e: int = 0; 94 for (i < len(s) && ascii::isdigit(buf[i]: rune); i += 1) { 95 if (e < 10000) { 96 e = e * 10 + (buf[i] - '0'): int; 97 }; 98 }; 99 dp += e * expsign; 100 } else if (b == base::HEX) { 101 return i: invalid; // hex floats must have exponent 102 }; 103 if (i != len(s)) return i: invalid; 104 if (mant != 0) { 105 exp = dp - ndmant; 106 }; 107 return fast_parsed_float { 108 mantissa = mant, 109 exponent = exp, 110 negative = neg, 111 truncated = trunc, 112 }; 113 }; 114 115 fn decimal_parse(d: *decimal, s: str) (void | invalid) = { 116 let i = 0z; 117 const buf = strings::toutf8(s); 118 d.negative = false; 119 d.truncated = false; 120 if (buf[0] == '+') { 121 i += 1; 122 } else if (buf[0] == '-') { 123 d.negative = true; 124 i += 1; 125 }; 126 let sawdot = false, sawdigits = false; 127 for (i < len(s); i += 1) { 128 if (buf[i] == '.') { 129 if (sawdot) return i: invalid; 130 sawdot = true; 131 d.dp = d.nd: int; 132 } else if (ascii::isdigit(buf[i]: rune)) { 133 sawdigits = true; 134 if (buf[i] == '0' && d.nd == 0) { 135 d.dp -= 1; 136 continue; 137 }; 138 if (d.nd < len(d.digits)) { 139 d.digits[d.nd] = buf[i] - '0'; 140 d.nd += 1; 141 } else if (buf[i] != '0') { 142 d.truncated = true; 143 }; 144 } else break; 145 }; 146 if (!sawdigits) return i: invalid; 147 if (!sawdot) { 148 d.dp = d.nd: int; 149 }; 150 if (i < len(s) && (buf[i] == 'e' || buf[i] == 'E')) { 151 i += 1; 152 if (i >= len(s)) return i: invalid; 153 let expsign: int = 1; 154 if (buf[i] == '+') { 155 i += 1; 156 } else if (buf[i] == '-') { 157 expsign = -1; 158 i += 1; 159 }; 160 if (i >= len(s) || !ascii::isdigit(buf[i]: rune)) 161 return i: invalid; 162 let e: int = 0; 163 for (i < len(s) && ascii::isdigit(buf[i]: rune); i += 1) { 164 if (e < 10000) { 165 e = e * 10 + (buf[i] - '0'): int; 166 }; 167 }; 168 d.dp += e * expsign; 169 }; 170 if (i != len(s)) return i: invalid; 171 }; 172 173 fn leading_zeroes(n: u64) uint = { 174 assert(n > 0); 175 let b = 0u; 176 if ((n & 0b1111111111111111111111111111111100000000000000000000000000000000u64) > 0) { 177 n >>= 32; 178 b |= 32; 179 }; 180 if ((n & 0b11111111111111110000000000000000u64) > 0) { 181 n >>= 16; 182 b |= 16; 183 }; 184 if ((n & 0b1111111100000000u64) > 0) { 185 n >>= 8; 186 b |= 8; 187 }; 188 if ((n & 0b11110000) > 0) { 189 n >>= 4; 190 b |= 4; 191 }; 192 if ((n & 0b1100) > 0) { 193 n >>= 2; 194 b |= 2; 195 }; 196 if ((n & 0b10) > 0) { 197 n >>= 1; 198 b |= 1; 199 }; 200 return 63 - b; 201 }; 202 203 fn eisel_lemire( 204 mantissa: u64, 205 exp10: i32, 206 neg: bool, 207 f: *math::floatinfo 208 ) (u64 | void) = { 209 if (mantissa == 0 || exp10 > 288 || exp10 < -307) return; 210 const po10 = powers_of_ten[exp10 + 307]; 211 const clz = leading_zeroes(mantissa); 212 mantissa <<= clz; 213 let shift = 64 - f.mantbits - 3, mask = (1 << shift) - 1; 214 // log(10) / log(2) ≈ 217706 / 65536; x / 65536 = x >> 16 215 let exp = (217706 * exp10) >> 16; 216 let e2 = (exp + f.expbias: i32 + 64): u64 - clz: u64; 217 let x = u128mul(mantissa, po10[1]); 218 if ((x.hi & mask) == mask && ((x.lo + mantissa) < mantissa)) { 219 const y = u128mul(mantissa, po10[0]); 220 let merged = r128 { hi = x.hi, lo = x.lo + y.hi }; 221 if (merged.lo < x.lo) { 222 merged.hi += 1; 223 }; 224 if (((merged.hi & mask) == mask) && ((merged.lo + 1) == 0) && 225 (y.lo + mantissa < mantissa)) { 226 return; 227 }; 228 x = merged; 229 }; 230 let msb = x.hi >> 63, mant = x.hi >> (msb + shift); 231 e2 -= 1 ^ msb; 232 if (x.lo == 0 && (x.hi & mask == 0) && (mant & 3 == 1)) { 233 return; 234 }; 235 mant += mant & 1; 236 mant >>= 1; 237 if ((mant >> (f.mantbits + 1)) > 0) { 238 mant >>= 1; 239 e2 += 1; 240 }; 241 if (e2 <= 0 || e2 >= (1 << f.expbits) - 1) { 242 return; 243 }; 244 return mkfloat(mant, e2: uint, neg, f); 245 }; 246 247 fn floatbits(d: *decimal, f: *math::floatinfo) (u64 | overflow) = { 248 let e: int = 0, m: u64 = 0; 249 const powtab: [19]i8 = [ 250 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 251 33, 36, 39, 43, 46, 49, 53, 56, 59, 252 ]; 253 if (d.nd == 0 || d.dp < -326) { 254 return if (d.negative) mkfloat(0, 0, d.negative, f) 255 else 0; 256 } else if (d.dp > 310) { 257 return overflow; 258 }; 259 if (d.nd <= 19) { 260 let mant = 0u64; 261 for (let i = 0z; i < d.nd; i += 1) { 262 mant = (10 * mant) + d.digits[i]; 263 }; 264 const exp10 = d.dp - d.nd: i32; 265 const r = eisel_lemire(mant, exp10, d.negative, f); 266 if (r is u64) { 267 return r: u64; 268 }; 269 }; 270 for (d.dp > 0) { 271 const n: int = if (d.dp: uint >= len(powtab)) 272 maxshift: int 273 else powtab[d.dp]; 274 decimal_shift(d, -n); 275 e += n; 276 }; 277 for (d.dp <= 0) { 278 const n: int = if (d.dp == 0) { 279 if (d.digits[0] >= 5) break; 280 yield if (d.digits[0] < 2) 2 else 1; 281 } else if (-d.dp >= len(powtab): i32) 282 maxshift: int 283 else powtab[-d.dp]; 284 decimal_shift(d, n); 285 e -= n; 286 }; 287 e -= 1; 288 if (e <= -f.expbias + 1) { 289 const n = -f.expbias - e + 1; 290 decimal_shift(d, -n); 291 e += n; 292 }; 293 if (e + f.expbias >= (1 << f.expbits: int) - 1) { 294 return overflow; 295 }; 296 decimal_shift(d, f.mantbits: int + 1); 297 m = decimal_round(d); 298 if (m == 2 << f.mantbits) { 299 m >>= 1; 300 e += 1; 301 if (e + f.expbias >= (1 << f.expbits: int) - 1) { 302 return overflow; 303 }; 304 }; 305 if (m & (1 << f.mantbits) == 0) { 306 e = -f.expbias; 307 }; 308 return mkfloat(m, (e + f.expbias): uint, d.negative, f); 309 }; 310 311 fn mkfloat(m: u64, e: uint, negative: bool, f: *math::floatinfo) u64 = { 312 let n: u64 = m & ((1 << f.mantbits) - 1); 313 n |= (e & ((1 << f.expbits) - 1)) << f.mantbits; 314 if (negative) { 315 n |= 1 << (f.mantbits + f.expbits); 316 }; 317 return n; 318 }; 319 320 const f64pow10: [_]f64 = [ 321 1.0e0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, 322 1.0e10, 1.0e11, 1.0e12, 1.0e13, 1.0e14, 1.0e15, 1.0e16, 1.0e17, 1.0e18, 323 1.0e19, 1.0e20, 1.0e21, 1.0e22 324 ]; 325 326 fn stof64exact(mant: u64, exp: i32, neg: bool) (f64 | void) = { 327 if (mant >> math::F64_MANTISSA_BITS != 0) return; 328 let n = mant: i64: f64; // XXX: ARCH 329 if (neg) { 330 n = -n; 331 }; 332 if (exp == 0) { 333 return n; 334 }; 335 if (-22 <= exp && exp <= 22) { 336 if (exp >= 0) { 337 n *= f64pow10[exp]; 338 } else { 339 n /= f64pow10[-exp]; 340 }; 341 } else return; 342 return n; 343 }; 344 345 const f32pow10: [_]f32 = [ 346 1.0e0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10 347 ]; 348 349 fn stof32exact(mant: u64, exp: i32, neg: bool) (f32 | void) = { 350 if (mant >> math::F32_MANTISSA_BITS != 0) return; 351 let n = mant: i32: f32; // XXX: ARCH 352 if (neg) { 353 n = -n; 354 }; 355 if (exp == 0) { 356 return n; 357 }; 358 if (-10 <= exp && exp <= 10) { 359 if (exp >= 0) { 360 n *= f32pow10[exp]; 361 } else { 362 n /= f64pow10[-exp]: f32; 363 }; 364 } else return; 365 return n; 366 }; 367 368 // Adapted from golang's atofHex. 369 fn hex_to_bits( 370 p: fast_parsed_float, 371 info: *math::floatinfo, 372 ) (u64 | overflow) = { 373 const max_exp = (1 << info.expbits): int - info.expbias - 2; 374 const min_exp = -info.expbias + 1; 375 p.exponent += info.mantbits: i32; 376 377 // Shift left until we have a leading 1 bit in the mantissa followed by 378 // mantbits, plus two more for rounding. 379 for (p.mantissa != 0 && p.mantissa >> (info.mantbits + 2) == 0) { 380 p.mantissa <<= 1; 381 p.exponent -= 1; 382 }; 383 // The lowest of the two rounding bits is set if we truncated. 384 if (p.truncated) { 385 p.mantissa |= 1; 386 }; 387 // If we have too many bits, shift right. 388 for (p.mantissa >> (3 + info.mantbits) != 0) { 389 p.mantissa = (p.mantissa >> 1) | (p.mantissa & 1); 390 p.exponent += 1; 391 }; 392 // Denormalize if the exponent is small. 393 for (p.mantissa > 1 && p.exponent < min_exp: i32 - 2) { 394 p.mantissa = (p.mantissa >> 1) | (p.mantissa & 1); 395 p.exponent += 1; 396 }; 397 // Round to even. 398 let round = p.mantissa & 3; 399 p.mantissa >>= 2; 400 round |= p.mantissa & 1; 401 p.exponent += 2; 402 if (round == 3) { 403 p.mantissa += 1; 404 if (p.mantissa == 1 << (1 + info.mantbits)) { 405 p.mantissa >>= 1; 406 p.exponent += 1; 407 }; 408 }; 409 // Denormal or zero. 410 if (p.mantissa >> info.mantbits == 0) { 411 p.exponent = -info.expbias; 412 }; 413 if (p.exponent > max_exp: i32) { 414 return overflow; 415 }; 416 let bits = p.mantissa & info.mantmask; 417 bits |= ((p.exponent + info.expbias: i32): u64 & info.expmask) 418 << info.mantbits; 419 if (p.negative) { 420 bits |= 1 << (info.mantbits + info.expbits); 421 }; 422 return bits; 423 }; 424 425 fn special(s: str) (f32 | void) = { 426 if (ascii::strcasecmp(s, "nan") == 0) { 427 return math::NAN; 428 } else if (ascii::strcasecmp(s, "infinity") == 0) { 429 return math::INF; 430 } else if (ascii::strcasecmp(s, "+infinity") == 0) { 431 return math::INF; 432 } else if (ascii::strcasecmp(s, "-infinity") == 0) { 433 return -math::INF; 434 }; 435 }; 436 437 // Converts a string to a f64 in [[base::DEC]] or [[base::HEX]]. If base is not 438 // provided, [[base::DEC]] is used. If the string is not a syntactically 439 // well-formed floating-point number, [[invalid]] is returned. If the string 440 // represents a floating-point number that is larger than the largest finite 441 // f64 number, [[overflow]] is returned. Zero is returned if the string 442 // represents a floating-point number that is smaller than the f64 number 443 // nearest to zero with respective sign. Recognizes "Infinity", "+Infinity", 444 // "-Infinity", and "NaN", case insensitive. 445 export fn stof64(s: str, b: base = base::DEC) (f64 | invalid | overflow) = { 446 if (b == base::DEFAULT) { 447 b = base::DEC; 448 } else if (b == base::HEX_LOWER) { 449 b = base::HEX; 450 }; 451 assert(b == base::DEC || b == base::HEX); 452 453 if (len(s) == 0) { 454 return 0z: invalid; 455 }; 456 457 match (special(s)) { 458 case let f: f32 => 459 return f; 460 case void => void; 461 }; 462 463 const p = fast_parse(s, b)?; 464 if (b == base::HEX) { 465 return math::f64frombits(hex_to_bits(p, &math::f64info)?); 466 } else if (!p.truncated) { 467 let n = stof64exact(p.mantissa, p.exponent, p.negative); 468 if (n is f64) { 469 return n: f64; 470 }; 471 let n = eisel_lemire(p.mantissa, p.exponent, p.negative, 472 &math::f64info); 473 if (n is u64) { 474 return math::f64frombits(n: u64); 475 }; 476 }; 477 let d = decimal { ... }; 478 decimal_parse(&d, s)?; 479 const n = floatbits(&d, &math::f64info)?; 480 return math::f64frombits(n); 481 }; 482 483 // Converts a string to a f32 in [[base::DEC]] or [[base::HEX]]. If base is not 484 // provided, [[base::DEC]] is used. If the string is not a syntactically 485 // well-formed floating-point number, [[invalid]] is returned. If the string 486 // represents a floating-point number that is larger than the largest finite 487 // f32 number, [[overflow]] is returned. Zero is returned if the string 488 // represents a floating-point number that is smaller than the f32 number 489 // nearest to zero with respective sign. Recognizes "Infinity", "+Infinity", 490 // "-Infinity", and "NaN", case insensitive. 491 export fn stof32(s: str, b: base = base::DEC) (f32 | invalid | overflow) = { 492 if (b == base::DEFAULT) { 493 b = base::DEC; 494 } else if (b == base::HEX_LOWER) { 495 b = base::HEX; 496 }; 497 assert(b == base::DEC || b == base::HEX); 498 499 if (len(s) == 0) { 500 return 0z: invalid; 501 }; 502 503 match (special(s)) { 504 case let f: f32 => 505 return f; 506 case void => void; 507 }; 508 509 const p = fast_parse(s, b)?; 510 if (b == base::HEX) { 511 return math::f32frombits(hex_to_bits(p, &math::f32info)?: u32); 512 } else if (!p.truncated) { 513 let n = stof32exact(p.mantissa, p.exponent, p.negative); 514 if (n is f32) { 515 return n: f32; 516 }; 517 let n = eisel_lemire(p.mantissa, p.exponent, p.negative, 518 &math::f32info); 519 if (n is u64) { 520 return math::f32frombits(n: u64: u32); 521 }; 522 }; 523 let d = decimal { ... }; 524 decimal_parse(&d, s)?; 525 const n = floatbits(&d, &math::f32info)?: u32; 526 return math::f32frombits(n); 527 }; 528 529 530 @test fn stof64() void = { 531 assert(stof64("0")! == 0.0); 532 assert(stof64("200")! == 200.0); 533 assert(stof64("12345")! == 12345.0); 534 assert(stof64("+112233445566778899")! == 1.122334455667789e17); 535 assert(stof64("3.14")! == 3.14); 536 assert(stof64("2.99792458E+8")! == 299792458.0); 537 assert(stof64("6.022e23")! == 6.022e23); 538 assert(stof64("1e310") is overflow); 539 assert(stof64("9007199254740991")! == 9007199254740991.0); 540 assert(stof64("90071992547409915")! == 90071992547409920.0); 541 assert(stof64("90071992547409925")! == 90071992547409920.0); 542 assert(stof64("2.2250738585072014e-308")! == 2.2250738585072014e-308); 543 assert(stof64("-1e-324")! == -0.0); 544 assert(stof64("5e-324")! == 5.0e-324); 545 assert(stof64("") as invalid == 0); 546 assert(stof64("0ZO") as invalid == 1); 547 assert(stof64("1.23ezz") as invalid == 5); 548 assert(stof64("Infinity")! == math::INF); 549 assert(stof64("+Infinity")! == math::INF); 550 assert(stof64("-Infinity")! == -math::INF); 551 assert(stof64("infinity")! == math::INF); 552 assert(stof64("inFinIty")! == math::INF); 553 assert(stof64("-infinity")! == -math::INF); 554 assert(stof64("-infiNity")! == -math::INF); 555 assert(math::isnan(stof64("NaN")!)); 556 assert(math::isnan(stof64("nan")!)); 557 assert(math::isnan(stof64("naN")!)); 558 }; 559 560 @test fn stof32() void = { 561 assert(stof32("0")! == 0.0); 562 assert(stof32("1e10")! == 1.0e10); 563 assert(stof32("299792458")! == 299792458.0); 564 assert(stof32("6.022e23")! == 6.022e23); 565 assert(stof32("1e40") is overflow); 566 assert(stof32("16777215")! == 16777215.0); 567 assert(stof32("167772155")! == 167772160.0); 568 assert(stof32("167772145")! == 167772140.0); 569 assert(stof32("6.62607015e-34")! == 6.62607015e-34); 570 assert(stof32("1.1754944e-38")! == 1.1754944e-38); 571 assert(stof32("-1e-50")! == -0.0); 572 assert(stof32("1e-45")! == 1.0e-45); 573 assert(stof32("") as invalid == 0); 574 assert(stof32("0ZO") as invalid == 1); 575 assert(stof32("1.23e-zz") as invalid == 6); 576 assert(stof32("Infinity")! == math::INF); 577 assert(stof32("+Infinity")! == math::INF); 578 assert(stof32("-Infinity")! == -math::INF); 579 assert(stof32("infinity")! == math::INF); 580 assert(stof32("inFinIty")! == math::INF); 581 assert(stof32("-infinity")! == -math::INF); 582 assert(stof32("-infiniTy")! == -math::INF); 583 assert(math::isnan(stof32("NaN")!)); 584 assert(math::isnan(stof32("nan")!)); 585 assert(math::isnan(stof32("naN")!)); 586 assert(stof32("9.19100241453305036800e+20") 587 == 9.19100241453305036800e+20); 588 }; 589 590 @test fn stofhex() void = { 591 assert(stof64("0p0", base::HEX)! == 0x0.0p0); 592 assert(stof64("1p0", base::HEX)! == 0x1.0p0); 593 assert(stof64("-1p0", base::HEX_LOWER)! == -0x1.0p0); 594 assert(stof64("1.fp-2", base::HEX)! == 0x1.fp-2); 595 assert(stof64("1.fffffffffffffp+1023", base::HEX)! 596 == math::F64_MAX_NORMAL); 597 assert(stof64("1.0000000000000p-1022", base::HEX)! 598 == math::F64_MIN_NORMAL); 599 assert(stof64("0.0000000000001p-1022", base::HEX)! 600 == math::F64_MIN_SUBNORMAL); 601 assert(stof64("1p+1024", base::HEX) is overflow); 602 assert(stof64("0.00000000000001p-1022", base::HEX)! == 0.0); 603 604 assert(stof32("0p0", base::HEX)! == 0x0.0p0); 605 assert(stof32("1p0", base::HEX)! == 0x1.0p0); 606 assert(stof32("-1p0", base::HEX)! == -0x1.0p0); 607 assert(stof32("1.fp-2", base::HEX)! == 0x1.fp-2); 608 assert(stof32("1.fffffd586b834p+127", base::HEX)! 609 == math::F32_MAX_NORMAL); 610 assert(stof32("1.0p-126", base::HEX)! == math::F32_MIN_NORMAL); 611 assert(stof32("1.6p-150", base::HEX)! == math::F32_MIN_SUBNORMAL); 612 assert(stof32("1.0p+128", base::HEX) is overflow); 613 assert(stof32("1.0p-151", base::HEX)! == 0.0); 614 };