hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 4f93fac296f461eab3a02a545d1f2dc958a75191
parent d38b03121f3c01415b350f08c847a6dd22fed9de
Author: Sebastian <sebastian@sebsite.pw>
Date:   Sat, 14 May 2022 22:46:12 -0400

encoding::json: fix number lexing

Fixes: https://todo.sr.ht/~sircmpwn/hare/689
Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mencoding/json/+test/lexer.ha | 2++
Mencoding/json/lex.ha | 63++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/encoding/json/+test/lexer.ha b/encoding/json/+test/lexer.ha @@ -11,6 +11,8 @@ use io; ("12.34", [12.34]), ("12.34e5", [12.34e5]), ("12.34E5", [12.34e5]), + ("12.34e+5", [12.34e5]), + ("12.34e-5", [12.34e-5]), ("12e5", [12.0e5]), ("-1234", [-1234.0]), (`"hello world"`, ["hello world"]), diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha @@ -73,7 +73,7 @@ export fn lex(lex: *lexer) (token | io::EOF | error) = { yield; }; - if (ascii::isdigit(rn) || rn == '+' || rn == '-') { + if (ascii::isdigit(rn) || rn == '-') { unget(lex, rn); return scan_number(lex)?; }; @@ -123,15 +123,21 @@ fn scan_word(lex: *lexer) (str | error) = { }; type numstate = enum { + SIGN, + START, + ZERO, INTEGER, + FRACSTART, FRACTION, + EXPSIGN, + EXPSTART, EXPONENT, }; fn scan_number(lex: *lexer) (token | error) = { strio::reset(&lex.strbuf); - let state = numstate::INTEGER; + let state = numstate::SIGN; for (true) { const rn = match (nextrune(lex)?) { case let rn: rune => @@ -141,30 +147,73 @@ fn scan_number(lex: *lexer) (token | error) = { }; switch (state) { + case numstate::SIGN => + state = numstate::START; + if (rn != '-') { + unget(lex, rn); + continue; + }; + case numstate::START => + switch (rn) { + case '0' => + state = numstate::ZERO; + case => + if (!ascii::isdigit(rn)) { + return invalid; + }; + state = numstate::INTEGER; + }; + case numstate::ZERO => + switch (rn) { + case '.' => + state = numstate::FRACSTART; + case 'e', 'E' => + state = numstate::EXPSIGN; + case => + if (ascii::isdigit(rn)) { + return invalid; + }; + unget(lex, rn); + break; + }; case numstate::INTEGER => switch (rn) { case '.' => - state = numstate::FRACTION; + state = numstate::FRACSTART; case 'e', 'E' => - state = numstate::EXPONENT; - case '+', '-' => - void; + state = numstate::EXPSIGN; case => if (!ascii::isdigit(rn)) { unget(lex, rn); break; }; }; + case numstate::FRACSTART => + if (!ascii::isdigit(rn)) { + return invalid; + }; + state = numstate::FRACTION; case numstate::FRACTION => switch (rn) { case 'e', 'E' => - state = numstate::EXPONENT; + state = numstate::EXPSIGN; case => if (!ascii::isdigit(rn)) { unget(lex, rn); break; }; }; + case numstate::EXPSIGN => + state = numstate::EXPSTART; + if (rn != '+' && rn != '-') { + unget(lex, rn); + continue; + }; + case numstate::EXPSTART => + if (!ascii::isdigit(rn)) { + return invalid; + }; + state = numstate::EXPONENT; case numstate::EXPONENT => if (!ascii::isdigit(rn)) { unget(lex, rn);