commit f266b1fc8c4bae556005e5a0615f8582b0ecc088
parent 195f4c13d487174cfd66eb8eff30eeceae71625c
Author: Sebastian <sebastian@sebsite.pw>
Date: Wed, 18 May 2022 23:41:15 -0400
encoding::json: store location in invalid
Tests aren't included; those are added in the "expand tests" commit.
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
3 files changed, 68 insertions(+), 31 deletions(-)
diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha
@@ -15,6 +15,10 @@ export type lexer = struct {
strbuf: strio::stream,
un: (token | void),
rb: (rune | void),
+ loc: (uint, uint),
+ prevloc: (uint, uint),
+ nextloc: (uint, uint),
+ prevrloc: (uint, uint),
};
// Creates a new JSON lexer. The caller may obtain tokens with [[lex]] and
@@ -27,6 +31,8 @@ export fn newlexer(src: io::handle) lexer = {
strbuf = strio::dynamic(),
un = void,
rb = void,
+ loc = (1, 0),
+ ...
};
};
@@ -39,9 +45,12 @@ export fn close(lex: *lexer) void = {
// the lexer and will be overwritten on subsequent calls.
export fn lex(lex: *lexer) (token | io::EOF | error) = {
match (lex.un) {
- case void => void;
+ case void =>
+ lex.prevloc = lex.loc;
case let tok: token =>
lex.un = void;
+ lex.prevloc = lex.loc;
+ lex.loc = lex.nextloc;
return tok;
};
@@ -68,8 +77,6 @@ export fn lex(lex: *lexer) (token | io::EOF | error) = {
case '"' =>
return scan_str(lex)?;
case =>
- return invalid;
- case =>
yield;
};
@@ -78,6 +85,10 @@ export fn lex(lex: *lexer) (token | io::EOF | error) = {
return scan_number(lex)?;
};
+ if (!ascii::isalpha(rn)) {
+ return lex.loc: invalid;
+ };
+
unget(lex, rn);
const word = scan_word(lex)?;
switch (word) {
@@ -88,7 +99,7 @@ export fn lex(lex: *lexer) (token | io::EOF | error) = {
case "null" =>
return _null;
case =>
- return invalid;
+ return lex.loc: invalid;
};
};
@@ -98,6 +109,8 @@ export fn lex(lex: *lexer) (token | io::EOF | error) = {
export fn unlex(lex: *lexer, tok: token) void = {
assert(lex.un is void, "encoding::json::unlex called twice in a row");
lex.un = tok;
+ lex.nextloc = lex.loc;
+ lex.loc = lex.prevloc;
};
// Scans until encountering a non-alphabetical character, returning the
@@ -159,7 +172,7 @@ fn scan_number(lex: *lexer) (token | error) = {
state = numstate::ZERO;
case =>
if (!ascii::isdigit(rn)) {
- return invalid;
+ return lex.loc: invalid;
};
state = numstate::INTEGER;
};
@@ -171,7 +184,7 @@ fn scan_number(lex: *lexer) (token | error) = {
state = numstate::EXPSIGN;
case =>
if (ascii::isdigit(rn)) {
- return invalid;
+ return lex.loc: invalid;
};
unget(lex, rn);
break;
@@ -190,7 +203,7 @@ fn scan_number(lex: *lexer) (token | error) = {
};
case numstate::FRACSTART =>
if (!ascii::isdigit(rn)) {
- return invalid;
+ return lex.loc: invalid;
};
state = numstate::FRACTION;
case numstate::FRACTION =>
@@ -211,7 +224,7 @@ fn scan_number(lex: *lexer) (token | error) = {
};
case numstate::EXPSTART =>
if (!ascii::isdigit(rn)) {
- return invalid;
+ return lex.loc: invalid;
};
state = numstate::EXPONENT;
case numstate::EXPONENT =>
@@ -228,7 +241,7 @@ fn scan_number(lex: *lexer) (token | error) = {
case let f: f64 =>
return f;
case =>
- return invalid;
+ return lex.loc: invalid;
};
};
@@ -240,7 +253,8 @@ fn scan_str(lex: *lexer) (token | error) = {
case let rn: rune =>
yield rn;
case io::EOF =>
- return invalid;
+ lex.loc.1 += 1;
+ return lex.loc: invalid;
};
switch (rn) {
@@ -251,7 +265,7 @@ fn scan_str(lex: *lexer) (token | error) = {
strio::appendrune(&lex.strbuf, rn)!;
case =>
if (iscntrl(rn)) {
- return invalid;
+ return lex.loc: invalid;
};
strio::appendrune(&lex.strbuf, rn)!;
};
@@ -265,7 +279,7 @@ fn scan_escape(lex: *lexer) (rune | error) = {
case let rn: rune =>
yield rn;
case io::EOF =>
- return invalid;
+ return lex.loc: invalid;
};
switch (rn) {
@@ -289,7 +303,7 @@ fn scan_escape(lex: *lexer) (rune | error) = {
let buf: [4]u8 = [0...];
match (io::readall(lex.src, buf)?) {
case io::EOF =>
- return invalid;
+ return lex.loc: invalid;
case size =>
yield;
};
@@ -297,34 +311,47 @@ fn scan_escape(lex: *lexer) (rune | error) = {
case let s: str =>
yield s;
case =>
- return invalid;
+ return lex.loc: invalid;
};
match (strconv::stou32b(s, strconv::base::HEX)) {
case let u: u32 =>
+ lex.loc.1 += 4;
return u: rune;
case =>
- return invalid;
+ return lex.loc: invalid;
};
case =>
- return invalid;
+ return lex.loc: invalid;
};
};
// Gets the next rune from the lexer.
fn nextrune(lex: *lexer) (rune | io::EOF | error) = {
if (lex.rb is rune) {
+ lex.prevrloc = lex.loc;
const r = lex.rb as rune;
lex.rb = void;
+ if (r == '\n') {
+ lex.loc = (lex.loc.0 + 1, 0);
+ } else {
+ lex.loc.1 += 1;
+ };
return r;
};
match (bufio::scanrune(lex.src)) {
case let err: io::error =>
return err;
case utf8::invalid =>
- return invalid;
+ return lex.loc: invalid;
case io::EOF =>
return io::EOF;
case let rn: rune =>
+ lex.prevrloc = lex.loc;
+ if (rn == '\n') {
+ lex.loc = (lex.loc.0 + 1, 0);
+ } else {
+ lex.loc.1 += 1;
+ };
return rn;
};
};
@@ -348,6 +375,7 @@ fn nextrunews(lex: *lexer) (rune | io::EOF | error) = {
fn unget(lex: *lexer, r: rune) void = {
assert(lex.rb is void);
lex.rb = r;
+ lex.loc = lex.prevrloc;
};
fn iscntrl(r: rune) bool = r: u32 < 0x20;
diff --git a/encoding/json/load.ha b/encoding/json/load.ha
@@ -62,7 +62,7 @@ fn _load(lexer: *lexer, level: uint, limit: uint) (value | error) = {
};
return _load_obj(lexer, level + 1, limit);
case (arrayend | objend | colon | comma) =>
- return invalid;
+ return lexer.loc: invalid;
};
};
@@ -83,7 +83,8 @@ fn _load_array(lexer: *lexer, level: uint, limit: uint) (value | error) = {
match (tok) {
case comma => void;
case arrayend => break;
- case => return invalid;
+ case =>
+ return lexer.loc: invalid;
};
};
return array;
@@ -104,13 +105,14 @@ fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = {
const key = match (tok) {
case let s: str =>
yield strings::dup(s);
- case => return invalid;
+ case =>
+ return lexer.loc: invalid;
};
defer free(key);
tok = mustscan(lexer)?;
if (!(tok is colon)) {
- return invalid;
+ return lexer.loc: invalid;
};
const val = _load(lexer, level, limit)?;
@@ -121,7 +123,8 @@ fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = {
match (tok) {
case comma => void;
case objend => break;
- case => return invalid;
+ case =>
+ return lexer.loc: invalid;
};
};
@@ -131,7 +134,8 @@ fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = {
fn mustscan(lexer: *lexer) (token | error) = {
match (lex(lexer)?) {
case io::EOF =>
- return invalid;
+ lexer.loc.1 += 1;
+ return lexer.loc: invalid;
case let tok: token =>
return tok;
};
diff --git a/encoding/json/types.ha b/encoding/json/types.ha
@@ -1,9 +1,10 @@
// License: MPL-2.0
// (c) 2022 Drew DeVault <sir@cmpwn.com>
+use fmt;
use io;
-// An invalid JSON token was encountered.
-export type invalid = !void;
+// An invalid JSON token was encountered at this location (line, column).
+export type invalid = !(uint, uint);
// The maximum nesting limit was reached.
export type limitreached = !void;
@@ -36,10 +37,14 @@ export type comma = void;
export type token = (arraystart | arrayend | objstart |
objend | colon | comma | str | f64 | bool | _null);
-// Converts an [[error]] into a user-friendly string.
-export fn strerror(err: error) const str = match (err) {
-case invalid =>
- yield "Invalid JSON token encountered";
-case let err: io::error =>
- yield io::strerror(err);
+// Converts an [[error]] into a human-friendly string.
+export fn strerror(err: error) const str = {
+ static let buf: [53]u8 = [0...];
+ match (err) {
+ case let err: invalid =>
+ return fmt::bsprintf(buf,
+ "{}:{}: Invalid JSON token encountered", err.0, err.1);
+ case let err: io::error =>
+ return io::strerror(err);
+ };
};