commit 02a82de541b029abf90ed8113d0faa1a484a9acc
parent da61643d84e3bf73d20eb48d590eb785859e1b30
Author: spxtr <me@spxtr.net>
Date: Sat, 17 Feb 2024 20:12:22 +0000
strconv: add sto(f64|f32)b with hex base support.
Signed-off-by: Joe Finney <me@spxtr.net>
Diffstat:
M | strconv/stof.ha | | | 239 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------- |
1 file changed, 187 insertions(+), 52 deletions(-)
diff --git a/strconv/stof.ha b/strconv/stof.ha
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>
+// (c) 2010 The Go Authors. All rights reserved.
// Using the Eisel-Lemire algorithm [1] for fast parsing of floating-point
// numbers, with Simple Decimal Conversion algorithm [2] as fallback.
@@ -195,6 +196,17 @@ fn decimal_round(d: *decimal) u64 = {
return n;
};
+fn todig(c: u8) u8 = {
+ if ('0' <= c && c <= '9') {
+ return c - '0';
+ } else if ('a' <= c && c <= 'f') {
+ return c - 'a' + 10;
+ } else if ('A' <= c && c <= 'F') {
+ return c - 'A' + 10;
+ };
+ abort("unreachable");
+};
+
type fast_parsed_float = struct {
mantissa: u64,
exponent: i32,
@@ -202,10 +214,7 @@ type fast_parsed_float = struct {
truncated: bool,
};
-fn fast_parse(s: str) (fast_parsed_float | void | invalid) = {
- if (len(s) == 0 || len(s) > 256) {
- return;
- };
+fn fast_parse(s: str, b: base) (fast_parsed_float | invalid) = {
let buf = strings::toutf8(s);
let i = 0z, neg = false, trunc = false;
if (buf[i] == '-') {
@@ -214,6 +223,15 @@ fn fast_parse(s: str) (fast_parsed_float | void | invalid) = {
} else if (buf[i] == '+') {
i += 1;
};
+
+ let (expchr, max_ndmant, isdigit) = switch (b) {
+ case base::DEC =>
+ yield ('e', 19, &ascii::isdigit);
+ case base::HEX =>
+ yield ('p', 16, &ascii::isxdigit);
+ case => abort("unreachable");
+ };
+
let sawdot = false, sawdigits = false;
let nd = 0, ndmant = 0, dp = 0;
let mant = 0u64, exp = 0i32;
@@ -222,15 +240,15 @@ fn fast_parse(s: str) (fast_parsed_float | void | invalid) = {
if (sawdot) return i: invalid;
sawdot = true;
dp = nd;
- } else if (ascii::isdigit(buf[i]: rune)) {
+ } else if (isdigit(buf[i]: rune)) {
sawdigits = true;
if (buf[i] == '0' && nd == 0) {
dp -= 1;
continue;
};
nd += 1;
- if (ndmant < 19) {
- mant = mant * 10 + buf[i] - '0';
+ if (ndmant < max_ndmant) {
+ mant = mant * b + todig(buf[i]);
ndmant += 1;
} else if (buf[i] != '0') {
trunc = true;
@@ -241,7 +259,11 @@ fn fast_parse(s: str) (fast_parsed_float | void | invalid) = {
if (!sawdot) {
dp = nd;
};
- if (i < len(s) && (buf[i] == 'e' || buf[i] == 'E')) {
+ if (b == base::HEX) {
+ dp *= 4;
+ ndmant *= 4;
+ };
+ if (i < len(s) && ascii::tolower(buf[i]: rune) == expchr) {
i += 1;
if (i >= len(s)) return i: invalid;
let expsign: int = 1;
@@ -260,6 +282,8 @@ fn fast_parse(s: str) (fast_parsed_float | void | invalid) = {
};
};
dp += e * expsign;
+ } else if (b == base::HEX) {
+ return i: invalid; // hex floats must have exponent
};
if (i != len(s)) return i: invalid;
if (mant != 0) {
@@ -275,7 +299,6 @@ fn fast_parse(s: str) (fast_parsed_float | void | invalid) = {
fn decimal_parse(d: *decimal, s: str) (void | invalid) = {
let i = 0z;
- if (len(s) == 0) return 0: invalid;
const buf = strings::toutf8(s);
d.negative = false;
d.truncated = false;
@@ -286,7 +309,6 @@ fn decimal_parse(d: *decimal, s: str) (void | invalid) = {
i += 1;
};
let sawdot = false, sawdigits = false;
- let nd: u32 = 0, dp: i32 = 0;
for (i < len(s); i += 1) {
if (buf[i] == '.') {
if (sawdot) return i: invalid;
@@ -528,6 +550,63 @@ fn stof32exact(mant: u64, exp: i32, neg: bool) (f32 | void) = {
return n;
};
+// Adapted from golang's atofHex.
+fn hex_to_bits(
+ p: fast_parsed_float,
+ info: *math::floatinfo,
+) (u64 | overflow) = {
+ const max_exp = (1 << info.expbits): int - info.expbias - 2;
+ const min_exp = -info.expbias + 1;
+ p.exponent += info.mantbits: i32;
+
+ // Shift left until we have a leading 1 bit in the mantissa followed by
+ // mantbits, plus two more for rounding.
+ for (p.mantissa != 0 && p.mantissa >> (info.mantbits + 2) == 0) {
+ p.mantissa <<= 1;
+ p.exponent -= 1;
+ };
+ // The lowest of the two rounding bits is set if we truncated.
+ if (p.truncated) {
+ p.mantissa |= 1;
+ };
+ // If we have too many bits, shift right.
+ for (p.mantissa >> (3 + info.mantbits) != 0) {
+ p.mantissa = (p.mantissa >> 1) | (p.mantissa & 1);
+ p.exponent += 1;
+ };
+ // Denormalize if the exponent is small.
+ for (p.mantissa > 1 && p.exponent < min_exp: i32 - 2) {
+ p.mantissa = (p.mantissa >> 1) | (p.mantissa & 1);
+ p.exponent += 1;
+ };
+ // Round to even.
+ let round = p.mantissa & 3;
+ p.mantissa >>= 2;
+ round |= p.mantissa & 1;
+ p.exponent += 2;
+ if (round == 3) {
+ p.mantissa += 1;
+ if (p.mantissa == 1 << (1 + info.mantbits)) {
+ p.mantissa >>= 1;
+ p.exponent += 1;
+ };
+ };
+ // Denormal or zero.
+ if (p.mantissa >> info.mantbits == 0) {
+ p.exponent = -info.expbias;
+ };
+ if (p.exponent > max_exp: i32) {
+ return overflow;
+ };
+ let bits = p.mantissa & info.mantmask;
+ bits |= ((p.exponent + info.expbias: i32): u64 & info.expmask)
+ << info.mantbits;
+ if (p.negative) {
+ bits |= 1 << (info.mantbits + info.expbits);
+ };
+ return bits;
+};
+
fn special(s: str) (f32 | void) = {
if (ascii::strcasecmp(s, "nan") == 0) {
return math::NAN;
@@ -540,33 +619,39 @@ fn special(s: str) (f32 | void) = {
};
};
-// Converts a string to a f64. If the string is not syntactically well-formed
-// floating-point number in base 10, [[invalid]] is returned. If the string
-// represents a floating-point number that is larger than the largest finite f64
-// number, [[overflow]] is returned. Zero is returned if the string represents a
-// floating-point number that is smaller than the f64 number nearest to zero
-// with respective sign.
+// Converts a string to a f64 in [[base::DEC]] or [[base::HEX]]. If the string
+// is not a syntactically well-formed floating-point number, [[invalid]] is
+// returned. If the string represents a floating-point number that is larger
+// than the largest finite f64 number, [[overflow]] is returned. Zero is
+// returned if the string represents a floating-point number that is smaller
+// than the f64 number nearest to zero with respective sign.
// Recognizes "Infinity", "+Infinity", "-Infinity", and "NaN", case insensitive.
-export fn stof64(s: str) (f64 | invalid | overflow) = {
+export fn stof64b(s: str, b: base) (f64 | invalid | overflow) = {
+ if (b == base::DEFAULT) b = base::DEC;
+ assert(b == base::DEC || b == base::HEX);
+
+ if (len(s) == 0) {
+ return 0z: invalid;
+ };
+
match (special(s)) {
case let f: f32 =>
return f;
case void => void;
};
- const p = fast_parse(s)?;
- if (p is fast_parsed_float) {
- const p = p: fast_parsed_float;
- if (!p.truncated) {
- let n = stof64exact(p.mantissa, p.exponent,
- p.negative);
- if (n is f64) {
- return n: f64;
- };
- let n = eisel_lemire(p.mantissa, p.exponent,
- p.negative, &math::f64info);
- if (n is u64) {
- return math::f64frombits(n: u64);
- };
+
+ const p = fast_parse(s, b)?;
+ if (b == base::HEX) {
+ return math::f64frombits(hex_to_bits(p, &math::f64info)?);
+ } else if (!p.truncated) {
+ let n = stof64exact(p.mantissa, p.exponent, p.negative);
+ if (n is f64) {
+ return n: f64;
+ };
+ let n = eisel_lemire(p.mantissa, p.exponent, p.negative,
+ &math::f64info);
+ if (n is u64) {
+ return math::f64frombits(n: u64);
};
};
let d = decimal { ... };
@@ -575,33 +660,39 @@ export fn stof64(s: str) (f64 | invalid | overflow) = {
return math::f64frombits(n);
};
-// Converts a string to a f32. If the string is not syntactically well-formed
-// floating-point number in base 10, [[invalid]] is returned. If the string
-// represents a floating-point number that is larger than the largest finite f32
-// number, [[overflow]] is returned. Zero is returned if the string represents a
-// floating-point number that is smaller than the f32 number nearest to zero
-// with respective sign.
+// Converts a string to a f32 in [[base::DEC]] or [[base::HEX]]. If the string
+// is not a syntactically well-formed floating-point number, [[invalid]] is
+// returned. If the string represents a floating-point number that is larger
+// than the largest finite f32 number, [[overflow]] is returned. Zero is
+// returned if the string represents a floating-point number that is smaller
+// than the f32 number nearest to zero with respective sign.
// Recognizes "Infinity", "+Infinity", "-Infinity", and "NaN", case insensitive.
-export fn stof32(s: str) (f32 | invalid | overflow) = {
+export fn stof32b(s: str, b: base) (f32 | invalid | overflow) = {
+ if (b == base::DEFAULT) b = base::DEC;
+ assert(b == base::DEC || b == base::HEX);
+
+ if (len(s) == 0) {
+ return 0z: invalid;
+ };
+
match (special(s)) {
case let f: f32 =>
return f;
case void => void;
};
- const p = fast_parse(s)?;
- if (p is fast_parsed_float) {
- const p = p: fast_parsed_float;
- if (!p.truncated) {
- let n = stof32exact(p.mantissa, p.exponent,
- p.negative);
- if (n is f32) {
- return n: f32;
- };
- let n = eisel_lemire(p.mantissa, p.exponent,
- p.negative, &math::f32info);
- if (n is u64) {
- return math::f32frombits(n: u64: u32);
- };
+
+ const p = fast_parse(s, b)?;
+ if (b == base::HEX) {
+ return math::f32frombits(hex_to_bits(p, &math::f32info)?: u32);
+ } else if (!p.truncated) {
+ let n = stof32exact(p.mantissa, p.exponent, p.negative);
+ if (n is f32) {
+ return n: f32;
+ };
+ let n = eisel_lemire(p.mantissa, p.exponent, p.negative,
+ &math::f32info);
+ if (n is u64) {
+ return math::f32frombits(n: u64: u32);
};
};
let d = decimal { ... };
@@ -610,6 +701,25 @@ export fn stof32(s: str) (f32 | invalid | overflow) = {
return math::f32frombits(n);
};
+
+// Converts a string to a f64. If the string is not a syntactically well-formed
+// floating-point number in base 10, [[invalid]] is returned. If the string
+// represents a floating-point number that is larger than the largest finite f64
+// number, [[overflow]] is returned. Zero is returned if the string represents a
+// floating-point number that is smaller than the f64 number nearest to zero
+// with respective sign.
+// Recognizes "Infinity", "+Infinity", "-Infinity", and "NaN", case insensitive.
+export fn stof64(s: str) (f64 | invalid | overflow) = stof64b(s, base::DEC);
+
+// Converts a string to a f32. If the string is not a syntactically well-formed
+// floating-point number in base 10, [[invalid]] is returned. If the string
+// represents a floating-point number that is larger than the largest finite f32
+// number, [[overflow]] is returned. Zero is returned if the string represents a
+// floating-point number that is smaller than the f32 number nearest to zero
+// with respective sign.
+// Recognizes "Infinity", "+Infinity", "-Infinity", and "NaN", case insensitive.
+export fn stof32(s: str) (f32 | invalid | overflow) = stof32b(s, base::DEC);
+
@test fn stof64() void = {
assert(stof64("0"): f64 == 0.0);
assert(stof64("200"): f64 == 200.0);
@@ -670,3 +780,28 @@ export fn stof32(s: str) (f32 | invalid | overflow) = {
== 9.19100241453305036800e+20);
};
+@test fn stofhex() void = {
+ assert(stof64b("0p0", base::HEX)! == 0x0.0p0);
+ assert(stof64b("1p0", base::HEX)! == 0x1.0p0);
+ assert(stof64b("-1p0", base::HEX)! == -0x1.0p0);
+ assert(stof64b("1.fp-2", base::HEX)! == 0x1.fp-2);
+ assert(stof64b("1.fffffffffffffp+1023", base::HEX)!
+ == math::F64_MAX_NORMAL);
+ assert(stof64b("1.0000000000000p-1022", base::HEX)!
+ == math::F64_MIN_NORMAL);
+ assert(stof64b("0.0000000000001p-1022", base::HEX)!
+ == math::F64_MIN);
+ assert(stof64b("1p+1024", base::HEX) is overflow);
+ assert(stof64b("0.00000000000001p-1022", base::HEX)! == 0.0);
+
+ assert(stof32b("0p0", base::HEX)! == 0x0.0p0);
+ assert(stof32b("1p0", base::HEX)! == 0x1.0p0);
+ assert(stof32b("-1p0", base::HEX)! == -0x1.0p0);
+ assert(stof32b("1.fp-2", base::HEX)! == 0x1.fp-2);
+ assert(stof32b("1.fffffd586b834p+127", base::HEX)!
+ == math::F32_MAX_NORMAL);
+ assert(stof32b("1.0p-126", base::HEX)! == math::F32_MIN_NORMAL);
+ assert(stof32b("1.6p-150", base::HEX)! == math::F32_MIN);
+ assert(stof32b("1.0p+128", base::HEX) is overflow);
+ assert(stof32b("1.0p-151", base::HEX)! == 0.0);
+};