ftos_ryu.ha (14668B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Using Ryƫ: fast float-to-string conversion algorithm by Ulf Adams. 5 // https://doi.org/10.1145/3192366.3192369 6 // This Hare implementation is translated from the original 7 // C implementation here: https://github.com/ulfjack/ryu 8 9 use ascii; 10 use math; 11 use types; 12 13 type r128 = struct { 14 hi: u64, 15 lo: u64, 16 }; 17 18 // TODO: use 128-bit integers when implemented 19 fn u128mul(a: u64, b: u64) r128 = { 20 const a0 = a: u32: u64, a1 = a >> 32; 21 const b0 = b: u32: u64, b1 = b >> 32; 22 const p00 = a0 * b0, p01 = a0 * b1, p10 = a1 * b0, p11 = a1 * b1; 23 const p00_lo = p00: u32: u64, p00_hi = p00 >> 32; 24 const mid1 = p10 + p00_hi; 25 const mid1_lo = mid1: u32: u64, mid1_hi = mid1 >> 32; 26 const mid2 = p01 + mid1_lo; 27 const mid2_lo = mid2: u32: u64, mid2_hi = mid2 >> 32; 28 const r_hi = p11 + mid1_hi + mid2_hi; 29 const r_lo = (mid2_lo << 32) | p00_lo; 30 return r128 { hi = r_hi, lo = r_lo }; 31 }; 32 33 // TODO: Same as above 34 fn u128rshift(lo: u64, hi: u64, s: u32) u64 = { 35 assert(0 <= s); 36 assert(s <= 64); 37 return (hi << (64 - s)) | (lo >> s); 38 }; 39 40 fn pow5fac(value: u64) u32 = { 41 const m_inv_5: u64 = 14757395258967641293; // 5 * m_inv_5 = 1 (mod 2^64) 42 const n_div_5: u64 = 3689348814741910323; 43 let count: u32 = 0; 44 for (true) { 45 assert(value != 0); 46 value *= m_inv_5; 47 if (value > n_div_5) break; 48 count += 1; 49 }; 50 return count; 51 }; 52 53 fn pow5fac32(value: u32) u32 = { 54 let count: u32 = 0; 55 for (true) { 56 assert(value != 0); 57 const q = value / 5, r = value % 5; 58 if (r != 0) break; 59 value = q; 60 count += 1; 61 }; 62 return count; 63 }; 64 65 fn ibool(b: bool) u8 = if (b) 1 else 0; 66 67 fn pow5multiple(v: u64, p: u32) bool = pow5fac(v) >= p; 68 fn pow5multiple32(v: u32, p: u32) bool = pow5fac32(v) >= p; 69 70 fn pow2multiple(v: u64, p: u32) bool = { 71 assert(v > 0); 72 assert(p < 64); 73 return (v & ((1u64 << p) - 1)) == 0; 74 }; 75 76 fn pow2multiple32(v: u32, p: u32) bool = { 77 assert(v > 0); 78 assert(p < 32); 79 return (v & ((1u32 << p) - 1)) == 0; 80 }; 81 82 fn mulshift64(m: u64, mul: (u64, u64), j: u32) u64 = { 83 // m is maximum 55 bits 84 let r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1); 85 const sum = r1.lo + r0.hi; 86 r1.hi += ibool(sum < r0.hi); 87 return u128rshift(sum, r1.hi, j - 64); 88 }; 89 90 fn mulshiftall64( 91 m: u64, 92 mul: (u64, u64), 93 j: i32, 94 mm_shift: u32, 95 ) (u64, u64, u64) = { 96 m <<= 1; 97 const r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1); 98 const lo = r0.lo, tmp = r0.hi, mid = tmp + r1.lo; 99 const hi = r1.hi + ibool(mid < tmp); 100 const lo2 = lo + mul.0; 101 const mid2 = mid + mul.1 + ibool(lo2 < lo); 102 const hi2 = hi + ibool(mid2 < mid); 103 const v_plus = u128rshift(mid2, hi2, (j - 64 - 1): u32); 104 const v_minus = if (mm_shift == 1) { 105 const lo3 = lo - mul.0; 106 const mid3 = mid - mul.1 - ibool(lo3 > lo); 107 const hi3 = hi - ibool(mid3 > mid); 108 yield u128rshift(mid3, hi3, (j - 64 - 1): u32); 109 } else { 110 const lo3 = lo + lo; 111 const mid3 = mid + mid + ibool(lo3 < lo); 112 const hi3 = hi + hi + ibool(mid3 < mid); 113 const lo4 = lo3 - mul.0; 114 const mid4 = mid3 - mul.1 - ibool(lo4 > lo3); 115 const hi4 = hi3 - ibool(mid4 > mid3); 116 yield u128rshift(mid4, hi4, (j - 64): u32); 117 }; 118 const v_rounded = u128rshift(mid, hi, (j - 64 - 1): u32); 119 return (v_plus, v_rounded, v_minus); 120 }; 121 122 fn mulshift32(m: u32, a: u64, s: u32) u32 = { 123 assert(s > 32); 124 const a_lo = a: u32: u64, a_hi = a >> 32; 125 const b0 = m * a_lo, b1 = m * a_hi; 126 const sum = (b0 >> 32) + b1, ss = sum >> (s - 32); 127 assert(ss <= types::U32_MAX); 128 return ss: u32; 129 }; 130 131 fn mulpow5inv_divpow2(m: u32, q: u32, j: i32) u32 = { 132 const pow5 = f64computeinvpow5(q); 133 return mulshift32(m, pow5.1 + 1, j: u32); 134 }; 135 136 fn mulpow5_divpow2(m: u32, i: u32, j: i32) u32 = { 137 const pow5 = f64computepow5(i); 138 return mulshift32(m, pow5.1, j: u32); 139 }; 140 141 fn log2pow5(e: u32) u32 = { 142 assert(e <= 3528); 143 return ((e * 1217359) >> 19); 144 }; 145 146 fn ceil_log2pow5(e: u32) u32 = log2pow5(e) + 1; 147 148 fn pow5bits(e: u32) u32 = ceil_log2pow5(e); 149 150 fn log10pow2(e: u32) u32 = { 151 assert(e <= 1650); 152 return ((e * 78913) >> 18); 153 }; 154 155 fn log10pow5(e: u32) u32 = { 156 assert(e <= 2620); 157 return ((e * 732923) >> 20); 158 }; 159 160 def F64_POW5_INV_BITCOUNT: u8 = 125; 161 def F64_POW5_BITCOUNT: u8 = 125; 162 163 def F32_POW5_INV_BITCOUNT: u8 = F64_POW5_INV_BITCOUNT - 64; 164 def F32_POW5_BITCOUNT: u8 = F64_POW5_BITCOUNT - 64; 165 166 const F64_POW5_INV_SPLIT2: [15][2]u64 = [ 167 [1, 2305843009213693952], 168 [5955668970331000884, 1784059615882449851], 169 [8982663654677661702, 1380349269358112757], 170 [7286864317269821294, 2135987035920910082], 171 [7005857020398200553, 1652639921975621497], 172 [17965325103354776697, 1278668206209430417], 173 [8928596168509315048, 1978643211784836272], 174 [10075671573058298858, 1530901034580419511], 175 [597001226353042382, 1184477304306571148], 176 [1527430471115325346, 1832889850782397517], 177 [12533209867169019542, 1418129833677084982], 178 [5577825024675947042, 2194449627517475473], 179 [11006974540203867551, 1697873161311732311], 180 [10313493231639821582, 1313665730009899186], 181 [12701016819766672773, 2032799256770390445], 182 ]; 183 184 const POW5_INV_OFFSETS: [19]u32 = [ 185 0x54544554, 0x04055545, 0x10041000, 0x00400414, 0x40010000, 0x41155555, 186 0x00000454, 0x00010044, 0x40000000, 0x44000041, 0x50454450, 0x55550054, 187 0x51655554, 0x40004000, 0x01000001, 0x00010500, 0x51515411, 0x05555554, 188 0x00000000 189 ]; 190 191 const F64_POW5_SPLIT2: [13][2]u64 = [ 192 [0, 1152921504606846976], 193 [0, 1490116119384765625], 194 [1032610780636961552, 1925929944387235853], 195 [7910200175544436838, 1244603055572228341], 196 [16941905809032713930, 1608611746708759036], 197 [13024893955298202172, 2079081953128979843], 198 [6607496772837067824, 1343575221513417750], 199 [17332926989895652603, 1736530273035216783], 200 [13037379183483547984, 2244412773384604712], 201 [1605989338741628675, 1450417759929778918], 202 [9630225068416591280, 1874621017369538693], 203 [665883850346957067, 1211445438634777304], 204 [14931890668723713708, 1565756531257009982] 205 ]; 206 207 const POW5_OFFSETS: [21]u32 = [ 208 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x59695995, 209 0x55545555, 0x56555515, 0x41150504, 0x40555410, 0x44555145, 0x44504540, 210 0x45555550, 0x40004000, 0x96440440, 0x55565565, 0x54454045, 0x40154151, 211 0x55559155, 0x51405555, 0x00000105 212 ]; 213 214 def POW5_TABLE_SZ: u8 = 26; 215 216 const POW5_TABLE: [POW5_TABLE_SZ]u64 = [ 217 1u64, 5u64, 25u64, 125u64, 625u64, 3125u64, 15625u64, 78125u64, 218 390625u64, 1953125u64, 9765625u64, 48828125u64, 244140625u64, 219 1220703125u64, 6103515625u64, 30517578125u64, 152587890625u64, 220 762939453125u64, 3814697265625u64, 19073486328125u64, 95367431640625u64, 221 476837158203125u64, 2384185791015625u64, 11920928955078125u64, 222 59604644775390625u64, 298023223876953125u64 //, 1490116119384765625u64 223 ]; 224 225 fn f64computeinvpow5(i: u32) (u64, u64) = { 226 const base = ((i + POW5_TABLE_SZ - 1) / POW5_TABLE_SZ): u32; 227 const base2 = base * POW5_TABLE_SZ; 228 const mul = F64_POW5_INV_SPLIT2[base]; 229 const off = base2 - i; 230 if (off == 0) { 231 return (mul[0], mul[1]); 232 }; 233 const m = POW5_TABLE[off]; 234 const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0] - 1); 235 let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo; 236 const sum = high0 + low1; 237 if (sum < high0) { 238 high1 += 1; 239 }; 240 const delta = pow5bits(base2) - pow5bits(i); 241 const res0 = u128rshift(low0, sum, delta) + 1 + 242 ((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3); 243 const res1 = u128rshift(sum, high1, delta); 244 return (res0, res1); 245 }; 246 247 fn f64computepow5(i: u32) (u64, u64) = { 248 const base = i / POW5_TABLE_SZ, base2 = base * POW5_TABLE_SZ; 249 const mul = F64_POW5_SPLIT2[base]; 250 const off = i - base2; 251 if (off == 0) { 252 return (mul[0], mul[1]); 253 }; 254 const m = POW5_TABLE[off]; 255 const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0]); 256 let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo; 257 const sum = high0 + low1; 258 if (sum < high0) { 259 high1 += 1; 260 }; 261 const delta = pow5bits(i) - pow5bits(base2); 262 const res0 = u128rshift(low0, sum, delta) + 263 ((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3); 264 const res1 = u128rshift(sum, high1, delta); 265 return (res0, res1); 266 }; 267 268 type decf64 = struct { 269 mantissa: u64, 270 exponent: i32, 271 }; 272 273 fn f64todecf64(mantissa: u64, exponent: u32) decf64 = { 274 let e2 = (math::F64_EXPONENT_BIAS + math::F64_MANTISSA_BITS + 2): i32; 275 let m2: u64 = 0; 276 if (exponent == 0) { 277 e2 = 1 - e2; 278 m2 = mantissa; 279 } else { 280 e2 = (exponent: i32) - e2; 281 m2 = (1u64 << math::F64_MANTISSA_BITS) | mantissa; 282 }; 283 const accept_bounds = (m2 & 1) == 0; 284 const mv = 4 * m2; 285 const mm_shift = ibool(mantissa != 0 || exponent <= 1); 286 let vp: u64 = 0, vr: u64 = 0, vm: u64 = 0; 287 let e10: i32 = 0; 288 let vm_trailing_zeros = false, vr_trailing_zeros = false; 289 if (e2 >= 0) { 290 const q = log10pow2(e2: u32) - ibool(e2 > 3); 291 e10 = q: i32; 292 const k = F64_POW5_INV_BITCOUNT + pow5bits(q) - 1; 293 const i = -e2 + (q + k): i32; 294 let pow5 = f64computeinvpow5(q); 295 const res = mulshiftall64(m2, pow5, i, mm_shift); 296 vp = res.0; vr = res.1; vm = res.2; 297 if (q <= 21) { 298 if ((mv - 5 * (mv / 5)) == 0) { 299 vr_trailing_zeros = pow5multiple(mv, q); 300 } else if (accept_bounds) { 301 vm_trailing_zeros = pow5multiple(mv - 1 - 302 mm_shift, q); 303 } else { 304 vp -= ibool(pow5multiple(mv + 2, q)); 305 }; 306 }; 307 } else { 308 const q = log10pow5((-e2): u32) - ibool(-e2 > 1); 309 e10 = e2 + (q: i32); 310 const i = -e2 - (q: i32); 311 const k = pow5bits(i: u32): i32 - F64_POW5_BITCOUNT: i32; 312 const j = (q: i32) - k; 313 let pow5 = f64computepow5(i: u32); 314 const res = mulshiftall64(m2, pow5, j, mm_shift); 315 vp = res.0; vr = res.1; vm = res.2; 316 if (q <= 1) { 317 vr_trailing_zeros = true; 318 if (accept_bounds) { 319 vm_trailing_zeros = mm_shift == 1; 320 } else { 321 vp -= 1; 322 }; 323 } else if (q < 63) { 324 vr_trailing_zeros = pow2multiple(mv, q); 325 }; 326 }; 327 let removed: i32 = 0, last_removed_digit: u8 = 0; 328 let output: u64 = 0; 329 if (vm_trailing_zeros || vr_trailing_zeros) { 330 for (true) { 331 const vpby10 = vp / 10, vmby10 = vm / 10; 332 if (vpby10 <= vmby10) break; 333 const vmmod10 = (vm: u32) - 10 * (vmby10: u32); 334 const vrby10 = vr / 10; 335 const vrmod10 = (vr: u32) - 10 * (vrby10: u32); 336 vm_trailing_zeros &&= vmmod10 == 0; 337 vr_trailing_zeros &&= last_removed_digit == 0; 338 last_removed_digit = vrmod10: u8; 339 vr = vrby10; vp = vpby10; vm = vmby10; 340 removed += 1; 341 }; 342 if (vm_trailing_zeros) { 343 for (true) { 344 const vmby10 = vm / 10; 345 const vmmod10 = (vm: u32) - 10 * (vmby10: u32); 346 if (vmmod10 != 0) break; 347 const vpby10 = vp / 10, vrby10 = vr / 10; 348 const vrmod10 = (vr: u32) - 10 * (vrby10: u32); 349 vr_trailing_zeros &&= last_removed_digit == 0; 350 last_removed_digit = vrmod10: u8; 351 vr = vrby10; vp = vpby10; vm = vmby10; 352 removed += 1; 353 }; 354 }; 355 if (vr_trailing_zeros && last_removed_digit == 5 && 356 (vr & 1 == 0)) { 357 // round to even 358 last_removed_digit = 4; 359 }; 360 output = vr + ibool((vr == vm && 361 (!accept_bounds || !vm_trailing_zeros)) || 362 last_removed_digit >= 5); 363 } else { 364 let round_up = false; 365 const vpby100 = vp / 100, vmby100 = vm / 100; 366 if (vpby100 > vmby100) { 367 const vrby100 = vr / 100; 368 const vrmod100 = (vr: u32) - 100 * (vrby100: u32); 369 round_up = vrmod100 >= 50; 370 vr = vrby100; vp = vpby100; vm = vmby100; 371 removed += 2; 372 }; 373 for (true) { 374 const vmby10 = vm / 10, vpby10 = vp / 10; 375 if (vpby10 <= vmby10) break; 376 const vrby10 = vr / 10; 377 const vrmod10 = (vr: u32) - 10 * (vrby10: u32); 378 round_up = vrmod10 >= 5; 379 vr = vrby10; vp = vpby10; vm = vmby10; 380 removed += 1; 381 }; 382 output = vr + ibool(vr == vm || round_up); 383 }; 384 const exp = e10 + removed; 385 return decf64 { exponent = exp, mantissa = output }; 386 }; 387 388 type decf32 = struct { 389 mantissa: u32, 390 exponent: i32, 391 }; 392 393 fn f32todecf32(mantissa: u32, exponent: u32) decf32 = { 394 let e2 = (math::F32_EXPONENT_BIAS + math::F32_MANTISSA_BITS + 2): i32; 395 let m2: u32 = 0; 396 if (exponent == 0) { 397 e2 = 1 - e2; 398 m2 = mantissa; 399 } else { 400 e2 = (exponent: i32) - e2; 401 m2 = (1u32 << math::F32_MANTISSA_BITS: u32) | mantissa; 402 }; 403 const accept_bounds = (m2 & 1) == 0; 404 const mv = 4 * m2, mp = mv + 2; 405 const mm_shift = ibool(mantissa != 0 || exponent <= 1); 406 const mm = mv - 1 - mm_shift; 407 let vr: u32 = 0, vp: u32 = 0, vm: u32 = 0; 408 let e10: i32 = 0; 409 let vm_trailing_zeroes = false, vr_trailing_zeroes = false; 410 let last_removed_digit: u8 = 0; 411 if (e2 >= 0) { 412 const q = log10pow2(e2: u32); 413 e10 = q: i32; 414 const k = F32_POW5_INV_BITCOUNT + pow5bits(q) - 1; 415 const i = -e2 + (q + k): i32; 416 vr = mulpow5inv_divpow2(mv, q, i); 417 vp = mulpow5inv_divpow2(mp, q, i); 418 vm = mulpow5inv_divpow2(mm, q, i); 419 if (q != 0 && (vp - 1) / 10 <= vm / 10) { 420 const l = F32_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1; 421 last_removed_digit = (mulpow5inv_divpow2(mv, q - 1, 422 -e2 + ((q + l): i32) - 1) % 10): u8; 423 }; 424 if (q <= 9) { 425 if (mv % 5 == 0) { 426 vr_trailing_zeroes = pow5multiple32(mv, q); 427 } else if (accept_bounds) { 428 vm_trailing_zeroes = pow5multiple32(mm, q); 429 } else { 430 vp -= ibool(pow5multiple32(mp, q)); 431 }; 432 }; 433 } else { 434 const q = log10pow5((-e2): u32); 435 e10 = (q: i32) + e2; 436 const i = (-e2 - (q: i32)): u32; 437 const k = pow5bits(i) - F32_POW5_BITCOUNT; 438 let j = (q: i32) - k: i32; 439 vr = mulpow5_divpow2(mv, i, j); 440 vp = mulpow5_divpow2(mp, i, j); 441 vm = mulpow5_divpow2(mm, i, j); 442 if (q != 0 && (vp - 1) / 10 <= vm / 10) { 443 j = (q: i32) - 1 - (pow5bits(i + 1): i32 - 444 F32_POW5_BITCOUNT: i32); 445 last_removed_digit = (mulpow5_divpow2(mv, 446 (i + 1), j) % 10): u8; 447 }; 448 if (q <= 1) { 449 vr_trailing_zeroes = true; 450 if (accept_bounds) { 451 vm_trailing_zeroes = mm_shift == 1; 452 } else { 453 vp -= 1; 454 }; 455 } else if (q < 31) { 456 vr_trailing_zeroes = pow2multiple32(mv, q - 1); 457 }; 458 }; 459 let removed: i32 = 0, output: u32 = 0; 460 if (vm_trailing_zeroes || vr_trailing_zeroes) { 461 for (vp / 10 > vm / 10) { 462 vm_trailing_zeroes &&= (vm - (vm / 10) * 10) == 0; 463 vr_trailing_zeroes &&= last_removed_digit == 0; 464 last_removed_digit = (vr % 10): u8; 465 vr /= 10; 466 vp /= 10; 467 vm /= 10; 468 removed += 1; 469 }; 470 if (vm_trailing_zeroes) { 471 for (vm % 10 == 0) { 472 vr_trailing_zeroes &&= last_removed_digit == 0; 473 last_removed_digit = (vr % 10): u8; 474 vr /= 10; 475 vp /= 10; 476 vm /= 10; 477 removed += 1; 478 }; 479 }; 480 if (vr_trailing_zeroes && last_removed_digit == 5 && 481 vr % 2 == 0) { 482 // round to even 483 last_removed_digit = 4; 484 }; 485 output = vr + ibool((vr == vm && 486 (!accept_bounds || !vm_trailing_zeroes)) || 487 last_removed_digit >= 5); 488 } else { 489 for (vp / 10 > vm / 10) { 490 last_removed_digit = (vr % 10): u8; 491 vr /= 10; 492 vp /= 10; 493 vm /= 10; 494 removed += 1; 495 }; 496 output = vr + ibool(vr == vm || last_removed_digit >= 5); 497 }; 498 const exp = e10 + removed; 499 return decf32 { mantissa = output, exponent = exp }; 500 }; 501 502 def F32_DECIMAL_DIGITS: i32 = 9; 503 def F64_DECIMAL_DIGITS: i32 = 17;