ftos_ryu.ha (14657B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Using Ryƫ: fast float-to-string conversion algorithm by Ulf Adams. 5 // https://doi.org/10.1145/3192366.3192369 6 // This Hare implementation is translated from the original 7 // C implementation here: https://github.com/ulfjack/ryu 8 9 use math; 10 use types; 11 12 type r128 = struct { 13 hi: u64, 14 lo: u64, 15 }; 16 17 // TODO: use 128-bit integers when implemented 18 fn u128mul(a: u64, b: u64) r128 = { 19 const a0 = a: u32: u64, a1 = a >> 32; 20 const b0 = b: u32: u64, b1 = b >> 32; 21 const p00 = a0 * b0, p01 = a0 * b1, p10 = a1 * b0, p11 = a1 * b1; 22 const p00_lo = p00: u32: u64, p00_hi = p00 >> 32; 23 const mid1 = p10 + p00_hi; 24 const mid1_lo = mid1: u32: u64, mid1_hi = mid1 >> 32; 25 const mid2 = p01 + mid1_lo; 26 const mid2_lo = mid2: u32: u64, mid2_hi = mid2 >> 32; 27 const r_hi = p11 + mid1_hi + mid2_hi; 28 const r_lo = (mid2_lo << 32) | p00_lo; 29 return r128 { hi = r_hi, lo = r_lo }; 30 }; 31 32 // TODO: Same as above 33 fn u128rshift(lo: u64, hi: u64, s: u32) u64 = { 34 assert(0 <= s); 35 assert(s <= 64); 36 return (hi << (64 - s)) | (lo >> s); 37 }; 38 39 fn pow5fac(value: u64) u32 = { 40 const m_inv_5: u64 = 14757395258967641293; // 5 * m_inv_5 = 1 (mod 2^64) 41 const n_div_5: u64 = 3689348814741910323; 42 let count: u32 = 0; 43 for (true) { 44 assert(value != 0); 45 value *= m_inv_5; 46 if (value > n_div_5) break; 47 count += 1; 48 }; 49 return count; 50 }; 51 52 fn pow5fac32(value: u32) u32 = { 53 let count: u32 = 0; 54 for (true) { 55 assert(value != 0); 56 const q = value / 5, r = value % 5; 57 if (r != 0) break; 58 value = q; 59 count += 1; 60 }; 61 return count; 62 }; 63 64 fn ibool(b: bool) u8 = if (b) 1 else 0; 65 66 fn pow5multiple(v: u64, p: u32) bool = pow5fac(v) >= p; 67 fn pow5multiple32(v: u32, p: u32) bool = pow5fac32(v) >= p; 68 69 fn pow2multiple(v: u64, p: u32) bool = { 70 assert(v > 0); 71 assert(p < 64); 72 return (v & ((1u64 << p) - 1)) == 0; 73 }; 74 75 fn pow2multiple32(v: u32, p: u32) bool = { 76 assert(v > 0); 77 assert(p < 32); 78 return (v & ((1u32 << p) - 1)) == 0; 79 }; 80 81 fn mulshift64(m: u64, mul: (u64, u64), j: u32) u64 = { 82 // m is maximum 55 bits 83 let r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1); 84 const sum = r1.lo + r0.hi; 85 r1.hi += ibool(sum < r0.hi); 86 return u128rshift(sum, r1.hi, j - 64); 87 }; 88 89 fn mulshiftall64( 90 m: u64, 91 mul: (u64, u64), 92 j: i32, 93 mm_shift: u32, 94 ) (u64, u64, u64) = { 95 m <<= 1; 96 const r0 = u128mul(m, mul.0), r1 = u128mul(m, mul.1); 97 const lo = r0.lo, tmp = r0.hi, mid = tmp + r1.lo; 98 const hi = r1.hi + ibool(mid < tmp); 99 const lo2 = lo + mul.0; 100 const mid2 = mid + mul.1 + ibool(lo2 < lo); 101 const hi2 = hi + ibool(mid2 < mid); 102 const v_plus = u128rshift(mid2, hi2, (j - 64 - 1): u32); 103 const v_minus = if (mm_shift == 1) { 104 const lo3 = lo - mul.0; 105 const mid3 = mid - mul.1 - ibool(lo3 > lo); 106 const hi3 = hi - ibool(mid3 > mid); 107 yield u128rshift(mid3, hi3, (j - 64 - 1): u32); 108 } else { 109 const lo3 = lo + lo; 110 const mid3 = mid + mid + ibool(lo3 < lo); 111 const hi3 = hi + hi + ibool(mid3 < mid); 112 const lo4 = lo3 - mul.0; 113 const mid4 = mid3 - mul.1 - ibool(lo4 > lo3); 114 const hi4 = hi3 - ibool(mid4 > mid3); 115 yield u128rshift(mid4, hi4, (j - 64): u32); 116 }; 117 const v_rounded = u128rshift(mid, hi, (j - 64 - 1): u32); 118 return (v_plus, v_rounded, v_minus); 119 }; 120 121 fn mulshift32(m: u32, a: u64, s: u32) u32 = { 122 assert(s > 32); 123 const a_lo = a: u32: u64, a_hi = a >> 32; 124 const b0 = m * a_lo, b1 = m * a_hi; 125 const sum = (b0 >> 32) + b1, ss = sum >> (s - 32); 126 assert(ss <= types::U32_MAX); 127 return ss: u32; 128 }; 129 130 fn mulpow5inv_divpow2(m: u32, q: u32, j: i32) u32 = { 131 const pow5 = f64computeinvpow5(q); 132 return mulshift32(m, pow5.1 + 1, j: u32); 133 }; 134 135 fn mulpow5_divpow2(m: u32, i: u32, j: i32) u32 = { 136 const pow5 = f64computepow5(i); 137 return mulshift32(m, pow5.1, j: u32); 138 }; 139 140 fn log2pow5(e: u32) u32 = { 141 assert(e <= 3528); 142 return ((e * 1217359) >> 19); 143 }; 144 145 fn ceil_log2pow5(e: u32) u32 = log2pow5(e) + 1; 146 147 fn pow5bits(e: u32) u32 = ceil_log2pow5(e); 148 149 fn log10pow2(e: u32) u32 = { 150 assert(e <= 1650); 151 return ((e * 78913) >> 18); 152 }; 153 154 fn log10pow5(e: u32) u32 = { 155 assert(e <= 2620); 156 return ((e * 732923) >> 20); 157 }; 158 159 def F64_POW5_INV_BITCOUNT: u8 = 125; 160 def F64_POW5_BITCOUNT: u8 = 125; 161 162 def F32_POW5_INV_BITCOUNT: u8 = F64_POW5_INV_BITCOUNT - 64; 163 def F32_POW5_BITCOUNT: u8 = F64_POW5_BITCOUNT - 64; 164 165 const F64_POW5_INV_SPLIT2: [15][2]u64 = [ 166 [1, 2305843009213693952], 167 [5955668970331000884, 1784059615882449851], 168 [8982663654677661702, 1380349269358112757], 169 [7286864317269821294, 2135987035920910082], 170 [7005857020398200553, 1652639921975621497], 171 [17965325103354776697, 1278668206209430417], 172 [8928596168509315048, 1978643211784836272], 173 [10075671573058298858, 1530901034580419511], 174 [597001226353042382, 1184477304306571148], 175 [1527430471115325346, 1832889850782397517], 176 [12533209867169019542, 1418129833677084982], 177 [5577825024675947042, 2194449627517475473], 178 [11006974540203867551, 1697873161311732311], 179 [10313493231639821582, 1313665730009899186], 180 [12701016819766672773, 2032799256770390445], 181 ]; 182 183 const POW5_INV_OFFSETS: [19]u32 = [ 184 0x54544554, 0x04055545, 0x10041000, 0x00400414, 0x40010000, 0x41155555, 185 0x00000454, 0x00010044, 0x40000000, 0x44000041, 0x50454450, 0x55550054, 186 0x51655554, 0x40004000, 0x01000001, 0x00010500, 0x51515411, 0x05555554, 187 0x00000000 188 ]; 189 190 const F64_POW5_SPLIT2: [13][2]u64 = [ 191 [0, 1152921504606846976], 192 [0, 1490116119384765625], 193 [1032610780636961552, 1925929944387235853], 194 [7910200175544436838, 1244603055572228341], 195 [16941905809032713930, 1608611746708759036], 196 [13024893955298202172, 2079081953128979843], 197 [6607496772837067824, 1343575221513417750], 198 [17332926989895652603, 1736530273035216783], 199 [13037379183483547984, 2244412773384604712], 200 [1605989338741628675, 1450417759929778918], 201 [9630225068416591280, 1874621017369538693], 202 [665883850346957067, 1211445438634777304], 203 [14931890668723713708, 1565756531257009982] 204 ]; 205 206 const POW5_OFFSETS: [21]u32 = [ 207 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x59695995, 208 0x55545555, 0x56555515, 0x41150504, 0x40555410, 0x44555145, 0x44504540, 209 0x45555550, 0x40004000, 0x96440440, 0x55565565, 0x54454045, 0x40154151, 210 0x55559155, 0x51405555, 0x00000105 211 ]; 212 213 def POW5_TABLE_SZ: u8 = 26; 214 215 const POW5_TABLE: [POW5_TABLE_SZ]u64 = [ 216 1u64, 5u64, 25u64, 125u64, 625u64, 3125u64, 15625u64, 78125u64, 217 390625u64, 1953125u64, 9765625u64, 48828125u64, 244140625u64, 218 1220703125u64, 6103515625u64, 30517578125u64, 152587890625u64, 219 762939453125u64, 3814697265625u64, 19073486328125u64, 95367431640625u64, 220 476837158203125u64, 2384185791015625u64, 11920928955078125u64, 221 59604644775390625u64, 298023223876953125u64 //, 1490116119384765625u64 222 ]; 223 224 fn f64computeinvpow5(i: u32) (u64, u64) = { 225 const base = ((i + POW5_TABLE_SZ - 1) / POW5_TABLE_SZ): u32; 226 const base2 = base * POW5_TABLE_SZ; 227 const mul = F64_POW5_INV_SPLIT2[base]; 228 const off = base2 - i; 229 if (off == 0) { 230 return (mul[0], mul[1]); 231 }; 232 const m = POW5_TABLE[off]; 233 const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0] - 1); 234 let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo; 235 const sum = high0 + low1; 236 if (sum < high0) { 237 high1 += 1; 238 }; 239 const delta = pow5bits(base2) - pow5bits(i); 240 const res0 = u128rshift(low0, sum, delta) + 1 + 241 ((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3); 242 const res1 = u128rshift(sum, high1, delta); 243 return (res0, res1); 244 }; 245 246 fn f64computepow5(i: u32) (u64, u64) = { 247 const base = i / POW5_TABLE_SZ, base2 = base * POW5_TABLE_SZ; 248 const mul = F64_POW5_SPLIT2[base]; 249 const off = i - base2; 250 if (off == 0) { 251 return (mul[0], mul[1]); 252 }; 253 const m = POW5_TABLE[off]; 254 const r1 = u128mul(m, mul[1]), r0 = u128mul(m, mul[0]); 255 let high1 = r1.hi, low1 = r1.lo, high0 = r0.hi, low0 = r0.lo; 256 const sum = high0 + low1; 257 if (sum < high0) { 258 high1 += 1; 259 }; 260 const delta = pow5bits(i) - pow5bits(base2); 261 const res0 = u128rshift(low0, sum, delta) + 262 ((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3); 263 const res1 = u128rshift(sum, high1, delta); 264 return (res0, res1); 265 }; 266 267 type decf64 = struct { 268 mantissa: u64, 269 exponent: i32, 270 }; 271 272 fn f64todecf64(mantissa: u64, exponent: u32) decf64 = { 273 let e2 = (math::F64_EXPONENT_BIAS + math::F64_MANTISSA_BITS + 2): i32; 274 let m2: u64 = 0; 275 if (exponent == 0) { 276 e2 = 1 - e2; 277 m2 = mantissa; 278 } else { 279 e2 = (exponent: i32) - e2; 280 m2 = (1u64 << math::F64_MANTISSA_BITS) | mantissa; 281 }; 282 const accept_bounds = (m2 & 1) == 0; 283 const mv = 4 * m2; 284 const mm_shift = ibool(mantissa != 0 || exponent <= 1); 285 let vp: u64 = 0, vr: u64 = 0, vm: u64 = 0; 286 let e10: i32 = 0; 287 let vm_trailing_zeros = false, vr_trailing_zeros = false; 288 if (e2 >= 0) { 289 const q = log10pow2(e2: u32) - ibool(e2 > 3); 290 e10 = q: i32; 291 const k = F64_POW5_INV_BITCOUNT + pow5bits(q) - 1; 292 const i = -e2 + (q + k): i32; 293 let pow5 = f64computeinvpow5(q); 294 const res = mulshiftall64(m2, pow5, i, mm_shift); 295 vp = res.0; vr = res.1; vm = res.2; 296 if (q <= 21) { 297 if ((mv - 5 * (mv / 5)) == 0) { 298 vr_trailing_zeros = pow5multiple(mv, q); 299 } else if (accept_bounds) { 300 vm_trailing_zeros = pow5multiple(mv - 1 - 301 mm_shift, q); 302 } else { 303 vp -= ibool(pow5multiple(mv + 2, q)); 304 }; 305 }; 306 } else { 307 const q = log10pow5((-e2): u32) - ibool(-e2 > 1); 308 e10 = e2 + (q: i32); 309 const i = -e2 - (q: i32); 310 const k = pow5bits(i: u32): i32 - F64_POW5_BITCOUNT: i32; 311 const j = (q: i32) - k; 312 let pow5 = f64computepow5(i: u32); 313 const res = mulshiftall64(m2, pow5, j, mm_shift); 314 vp = res.0; vr = res.1; vm = res.2; 315 if (q <= 1) { 316 vr_trailing_zeros = true; 317 if (accept_bounds) { 318 vm_trailing_zeros = mm_shift == 1; 319 } else { 320 vp -= 1; 321 }; 322 } else if (q < 63) { 323 vr_trailing_zeros = pow2multiple(mv, q); 324 }; 325 }; 326 let removed: i32 = 0, last_removed_digit: u8 = 0; 327 let output: u64 = 0; 328 if (vm_trailing_zeros || vr_trailing_zeros) { 329 for (true) { 330 const vpby10 = vp / 10, vmby10 = vm / 10; 331 if (vpby10 <= vmby10) break; 332 const vmmod10 = (vm: u32) - 10 * (vmby10: u32); 333 const vrby10 = vr / 10; 334 const vrmod10 = (vr: u32) - 10 * (vrby10: u32); 335 vm_trailing_zeros &&= vmmod10 == 0; 336 vr_trailing_zeros &&= last_removed_digit == 0; 337 last_removed_digit = vrmod10: u8; 338 vr = vrby10; vp = vpby10; vm = vmby10; 339 removed += 1; 340 }; 341 if (vm_trailing_zeros) { 342 for (true) { 343 const vmby10 = vm / 10; 344 const vmmod10 = (vm: u32) - 10 * (vmby10: u32); 345 if (vmmod10 != 0) break; 346 const vpby10 = vp / 10, vrby10 = vr / 10; 347 const vrmod10 = (vr: u32) - 10 * (vrby10: u32); 348 vr_trailing_zeros &&= last_removed_digit == 0; 349 last_removed_digit = vrmod10: u8; 350 vr = vrby10; vp = vpby10; vm = vmby10; 351 removed += 1; 352 }; 353 }; 354 if (vr_trailing_zeros && last_removed_digit == 5 && 355 (vr & 1 == 0)) { 356 // round to even 357 last_removed_digit = 4; 358 }; 359 output = vr + ibool((vr == vm && 360 (!accept_bounds || !vm_trailing_zeros)) || 361 last_removed_digit >= 5); 362 } else { 363 let round_up = false; 364 const vpby100 = vp / 100, vmby100 = vm / 100; 365 if (vpby100 > vmby100) { 366 const vrby100 = vr / 100; 367 const vrmod100 = (vr: u32) - 100 * (vrby100: u32); 368 round_up = vrmod100 >= 50; 369 vr = vrby100; vp = vpby100; vm = vmby100; 370 removed += 2; 371 }; 372 for (true) { 373 const vmby10 = vm / 10, vpby10 = vp / 10; 374 if (vpby10 <= vmby10) break; 375 const vrby10 = vr / 10; 376 const vrmod10 = (vr: u32) - 10 * (vrby10: u32); 377 round_up = vrmod10 >= 5; 378 vr = vrby10; vp = vpby10; vm = vmby10; 379 removed += 1; 380 }; 381 output = vr + ibool(vr == vm || round_up); 382 }; 383 const exp = e10 + removed; 384 return decf64 { exponent = exp, mantissa = output }; 385 }; 386 387 type decf32 = struct { 388 mantissa: u32, 389 exponent: i32, 390 }; 391 392 fn f32todecf32(mantissa: u32, exponent: u32) decf32 = { 393 let e2 = (math::F32_EXPONENT_BIAS + math::F32_MANTISSA_BITS + 2): i32; 394 let m2: u32 = 0; 395 if (exponent == 0) { 396 e2 = 1 - e2; 397 m2 = mantissa; 398 } else { 399 e2 = (exponent: i32) - e2; 400 m2 = (1u32 << math::F32_MANTISSA_BITS: u32) | mantissa; 401 }; 402 const accept_bounds = (m2 & 1) == 0; 403 const mv = 4 * m2, mp = mv + 2; 404 const mm_shift = ibool(mantissa != 0 || exponent <= 1); 405 const mm = mv - 1 - mm_shift; 406 let vr: u32 = 0, vp: u32 = 0, vm: u32 = 0; 407 let e10: i32 = 0; 408 let vm_trailing_zeroes = false, vr_trailing_zeroes = false; 409 let last_removed_digit: u8 = 0; 410 if (e2 >= 0) { 411 const q = log10pow2(e2: u32); 412 e10 = q: i32; 413 const k = F32_POW5_INV_BITCOUNT + pow5bits(q) - 1; 414 const i = -e2 + (q + k): i32; 415 vr = mulpow5inv_divpow2(mv, q, i); 416 vp = mulpow5inv_divpow2(mp, q, i); 417 vm = mulpow5inv_divpow2(mm, q, i); 418 if (q != 0 && (vp - 1) / 10 <= vm / 10) { 419 const l = F32_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1; 420 last_removed_digit = (mulpow5inv_divpow2(mv, q - 1, 421 -e2 + ((q + l): i32) - 1) % 10): u8; 422 }; 423 if (q <= 9) { 424 if (mv % 5 == 0) { 425 vr_trailing_zeroes = pow5multiple32(mv, q); 426 } else if (accept_bounds) { 427 vm_trailing_zeroes = pow5multiple32(mm, q); 428 } else { 429 vp -= ibool(pow5multiple32(mp, q)); 430 }; 431 }; 432 } else { 433 const q = log10pow5((-e2): u32); 434 e10 = (q: i32) + e2; 435 const i = (-e2 - (q: i32)): u32; 436 const k = pow5bits(i) - F32_POW5_BITCOUNT; 437 let j = (q: i32) - k: i32; 438 vr = mulpow5_divpow2(mv, i, j); 439 vp = mulpow5_divpow2(mp, i, j); 440 vm = mulpow5_divpow2(mm, i, j); 441 if (q != 0 && (vp - 1) / 10 <= vm / 10) { 442 j = (q: i32) - 1 - (pow5bits(i + 1): i32 - 443 F32_POW5_BITCOUNT: i32); 444 last_removed_digit = (mulpow5_divpow2(mv, 445 (i + 1), j) % 10): u8; 446 }; 447 if (q <= 1) { 448 vr_trailing_zeroes = true; 449 if (accept_bounds) { 450 vm_trailing_zeroes = mm_shift == 1; 451 } else { 452 vp -= 1; 453 }; 454 } else if (q < 31) { 455 vr_trailing_zeroes = pow2multiple32(mv, q - 1); 456 }; 457 }; 458 let removed: i32 = 0, output: u32 = 0; 459 if (vm_trailing_zeroes || vr_trailing_zeroes) { 460 for (vp / 10 > vm / 10) { 461 vm_trailing_zeroes &&= (vm - (vm / 10) * 10) == 0; 462 vr_trailing_zeroes &&= last_removed_digit == 0; 463 last_removed_digit = (vr % 10): u8; 464 vr /= 10; 465 vp /= 10; 466 vm /= 10; 467 removed += 1; 468 }; 469 if (vm_trailing_zeroes) { 470 for (vm % 10 == 0) { 471 vr_trailing_zeroes &&= last_removed_digit == 0; 472 last_removed_digit = (vr % 10): u8; 473 vr /= 10; 474 vp /= 10; 475 vm /= 10; 476 removed += 1; 477 }; 478 }; 479 if (vr_trailing_zeroes && last_removed_digit == 5 && 480 vr % 2 == 0) { 481 // round to even 482 last_removed_digit = 4; 483 }; 484 output = vr + ibool((vr == vm && 485 (!accept_bounds || !vm_trailing_zeroes)) || 486 last_removed_digit >= 5); 487 } else { 488 for (vp / 10 > vm / 10) { 489 last_removed_digit = (vr % 10): u8; 490 vr /= 10; 491 vp /= 10; 492 vm /= 10; 493 removed += 1; 494 }; 495 output = vr + ibool(vr == vm || last_removed_digit >= 5); 496 }; 497 const exp = e10 + removed; 498 return decf32 { mantissa = output, exponent = exp }; 499 }; 500 501 def F32_DECIMAL_DIGITS: i32 = 9; 502 def F64_DECIMAL_DIGITS: i32 = 17;