commit 4f93fac296f461eab3a02a545d1f2dc958a75191
parent d38b03121f3c01415b350f08c847a6dd22fed9de
Author: Sebastian <sebastian@sebsite.pw>
Date: Sat, 14 May 2022 22:46:12 -0400
encoding::json: fix number lexing
Fixes: https://todo.sr.ht/~sircmpwn/hare/689
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
2 files changed, 58 insertions(+), 7 deletions(-)
diff --git a/encoding/json/+test/lexer.ha b/encoding/json/+test/lexer.ha
@@ -11,6 +11,8 @@ use io;
("12.34", [12.34]),
("12.34e5", [12.34e5]),
("12.34E5", [12.34e5]),
+ ("12.34e+5", [12.34e5]),
+ ("12.34e-5", [12.34e-5]),
("12e5", [12.0e5]),
("-1234", [-1234.0]),
(`"hello world"`, ["hello world"]),
diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha
@@ -73,7 +73,7 @@ export fn lex(lex: *lexer) (token | io::EOF | error) = {
yield;
};
- if (ascii::isdigit(rn) || rn == '+' || rn == '-') {
+ if (ascii::isdigit(rn) || rn == '-') {
unget(lex, rn);
return scan_number(lex)?;
};
@@ -123,15 +123,21 @@ fn scan_word(lex: *lexer) (str | error) = {
};
type numstate = enum {
+ SIGN,
+ START,
+ ZERO,
INTEGER,
+ FRACSTART,
FRACTION,
+ EXPSIGN,
+ EXPSTART,
EXPONENT,
};
fn scan_number(lex: *lexer) (token | error) = {
strio::reset(&lex.strbuf);
- let state = numstate::INTEGER;
+ let state = numstate::SIGN;
for (true) {
const rn = match (nextrune(lex)?) {
case let rn: rune =>
@@ -141,30 +147,73 @@ fn scan_number(lex: *lexer) (token | error) = {
};
switch (state) {
+ case numstate::SIGN =>
+ state = numstate::START;
+ if (rn != '-') {
+ unget(lex, rn);
+ continue;
+ };
+ case numstate::START =>
+ switch (rn) {
+ case '0' =>
+ state = numstate::ZERO;
+ case =>
+ if (!ascii::isdigit(rn)) {
+ return invalid;
+ };
+ state = numstate::INTEGER;
+ };
+ case numstate::ZERO =>
+ switch (rn) {
+ case '.' =>
+ state = numstate::FRACSTART;
+ case 'e', 'E' =>
+ state = numstate::EXPSIGN;
+ case =>
+ if (ascii::isdigit(rn)) {
+ return invalid;
+ };
+ unget(lex, rn);
+ break;
+ };
case numstate::INTEGER =>
switch (rn) {
case '.' =>
- state = numstate::FRACTION;
+ state = numstate::FRACSTART;
case 'e', 'E' =>
- state = numstate::EXPONENT;
- case '+', '-' =>
- void;
+ state = numstate::EXPSIGN;
case =>
if (!ascii::isdigit(rn)) {
unget(lex, rn);
break;
};
};
+ case numstate::FRACSTART =>
+ if (!ascii::isdigit(rn)) {
+ return invalid;
+ };
+ state = numstate::FRACTION;
case numstate::FRACTION =>
switch (rn) {
case 'e', 'E' =>
- state = numstate::EXPONENT;
+ state = numstate::EXPSIGN;
case =>
if (!ascii::isdigit(rn)) {
unget(lex, rn);
break;
};
};
+ case numstate::EXPSIGN =>
+ state = numstate::EXPSTART;
+ if (rn != '+' && rn != '-') {
+ unget(lex, rn);
+ continue;
+ };
+ case numstate::EXPSTART =>
+ if (!ascii::isdigit(rn)) {
+ return invalid;
+ };
+ state = numstate::EXPONENT;
case numstate::EXPONENT =>
if (!ascii::isdigit(rn)) {
unget(lex, rn);