commit b0f5261dd76523630ae5c35c08c147b2eb603de3
parent 50aab0b2807f23167f8a42d78d04502ba5664294
Author: Drew DeVault <sir@cmpwn.com>
Date: Wed, 11 May 2022 13:49:50 +0200
encoding::json: add load
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Diffstat:
5 files changed, 167 insertions(+), 1 deletion(-)
diff --git a/encoding/json/+test/load.ha b/encoding/json/+test/load.ha
@@ -0,0 +1,43 @@
+use bufio;
+use io;
+use strings;
+use fmt;
+
+@test fn load() void = {
+ const input = `1234`;
+ const reader = bufio::fixed(strings::toutf8(input), io::mode::READ);
+ const val = load(&reader)!;
+ defer finish(val);
+ assert(val as f64 == 1234.0);
+
+ const input = `[]`;
+ const reader = bufio::fixed(strings::toutf8(input), io::mode::READ);
+ const val = load(&reader)!;
+ defer finish(val);
+ assert(len(val as []value) == 0);
+
+ const input = `[1, 2, 3, null]`;
+ const reader = bufio::fixed(strings::toutf8(input), io::mode::READ);
+ const val = load(&reader)!;
+ defer finish(val);
+ const vals = val as []value;
+ assert(vals[0] as f64 == 1.0);
+ assert(vals[1] as f64 == 2.0);
+ assert(vals[2] as f64 == 3.0);
+ assert(vals[3] is _null);
+
+ const input = `{}`;
+ const reader = bufio::fixed(strings::toutf8(input), io::mode::READ);
+ const val = load(&reader)!;
+ defer finish(val);
+ assert(val is object);
+
+ const input = `{ "hello": "world", "answer": 42 }`;
+ const reader = bufio::fixed(strings::toutf8(input), io::mode::READ);
+ const val = load(&reader)!;
+ defer finish(val);
+ assert(val is object);
+ const obj = val as object;
+ assert(*(get(&obj, "hello") as *value) as str == "world");
+ assert(*(get(&obj, "answer") as *value) as f64 == 42.0);
+};
diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha
@@ -13,6 +13,7 @@ export type lexer = struct {
src: bufio::bufstream,
buffer: []u8,
strbuf: strio::stream,
+ un: (token | void),
};
// Creates a new JSON lexer. The caller can obtain tokens with [[next]] and
@@ -23,6 +24,7 @@ export fn lex(src: io::handle) lexer = {
src = bufio::buffered(src, buf, []),
buffer = buf,
strbuf = strio::dynamic(),
+ un = void,
};
};
@@ -35,6 +37,13 @@ export fn close(lex: *lexer) void = {
// Returns the next token from a JSON lexer. The return value is borrowed from
// the lexer and will be overwritten on subsequent calls.
export fn next(lex: *lexer) (token | io::EOF | error) = {
+ match (lex.un) {
+ case void => void;
+ case let tok: token =>
+ lex.un = void;
+ return tok;
+ };
+
const rn = match (nextrunews(lex)?) {
case io::EOF =>
return io::EOF;
@@ -82,6 +91,14 @@ export fn next(lex: *lexer) (token | io::EOF | error) = {
};
};
+// "Unlexes" a token from the lexer, such that the next call to [[next]] will
+// return that token again. Only one token can be unlexed at a time, otherwise
+// the program will abort.
+fn unlex(lex: *lexer, tok: token) void = {
+ assert(lex.un is void, "encoding::json::unlex called twice in a row");
+ lex.un = tok;
+};
+
// Scans until encountering a non-alphabetical character, returning the
// resulting word.
fn scan_word(lex: *lexer) (str | error) = {
@@ -131,7 +148,8 @@ fn scan_number(lex: *lexer) (token | error) = {
void;
case =>
if (!ascii::isdigit(rn)) {
- return invalid;
+ bufio::unreadrune(&lex.src, rn);
+ break;
};
};
case numstate::FRACTION =>
diff --git a/encoding/json/load.ha b/encoding/json/load.ha
@@ -0,0 +1,99 @@
+use io;
+use strings;
+
+// Parses a JSON value from the given [[io::handle]], returning the value or an
+// error. The return value is allocated on the heap; use [[finish]] to free it
+// up when you're done using it.
+//
+// This code assumes non-antagonistic inputs, and does not limit recursion depth
+// or memory usage. You may want to incorporate something like
+// [[io::limitreader]] or similar, or use the JSON lexer ([[lex]]) directly into
+// your program if dealing with potentially malicious inputs.
+export fn load(src: io::handle) (value | error) = {
+ const lex = lex(src);
+ defer close(&lex);
+ return _load(&lex);
+};
+
+fn _load(lex: *lexer) (value | error) = {
+ const tok = mustscan(lex)?;
+ match (tok) {
+ case _null =>
+ return _null;
+ case let b: bool =>
+ return b;
+ case let f: f64 =>
+ return f;
+ case let s: str =>
+ return s;
+ case arraystart =>
+ return _load_array(lex);
+ case objstart =>
+ return _load_obj(lex);
+ case (arrayend | objend | colon | comma) =>
+ return invalid;
+ };
+};
+
+fn _load_array(lex: *lexer) (value | error) = {
+ let array: []value = [];
+ for (true) {
+ let tok = mustscan(lex)?;
+ match (tok) {
+ case arrayend =>
+ break;
+ case =>
+ unlex(lex, tok);
+ };
+
+ append(array, _load(lex)?);
+
+ tok = mustscan(lex)?;
+ match (tok) {
+ case comma => void;
+ case arrayend => break;
+ case => return invalid;
+ };
+ };
+ return array;
+};
+
+fn _load_obj(lex: *lexer) (value | error) = {
+ let obj = newobject();
+ for (true) {
+ let tok = mustscan(lex)?;
+ const key = match (tok) {
+ case let s: str =>
+ yield strings::dup(s);
+ case objend => break;
+ case => return invalid;
+ };
+ defer free(key);
+
+ tok = mustscan(lex)?;
+ if (!(tok is colon)) {
+ return invalid;
+ };
+
+ const val = _load(lex)?;
+ set(&obj, key, val);
+
+ tok = mustscan(lex)?;
+ match (tok) {
+ case comma => void;
+ case objend => break;
+ case => return invalid;
+ };
+ };
+
+ return obj;
+};
+
+fn mustscan(lex: *lexer) (token | error) = {
+ match (next(lex)?) {
+ case io::EOF =>
+ return invalid;
+ case let tok: token =>
+ return tok;
+ };
+};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -518,13 +518,16 @@ encoding_json() {
gen_srcs encoding::json \
types.ha \
lex.ha \
+ load.ha \
value.ha
else
gen_srcs encoding::json \
types.ha \
lex.ha \
+ load.ha \
value.ha \
+test/lexer.ha \
+ +test/load.ha \
+test/value.ha
fi
gen_ssa encoding::json ascii bufio io strio os encoding::utf8 strings \
diff --git a/stdlib.mk b/stdlib.mk
@@ -1061,6 +1061,7 @@ $(HARECACHE)/encoding/hex/encoding_hex-any.ssa: $(stdlib_encoding_hex_any_srcs)
stdlib_encoding_json_any_srcs = \
$(STDLIB)/encoding/json/types.ha \
$(STDLIB)/encoding/json/lex.ha \
+ $(STDLIB)/encoding/json/load.ha \
$(STDLIB)/encoding/json/value.ha
$(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM))
@@ -3161,8 +3162,10 @@ $(TESTCACHE)/encoding/hex/encoding_hex-any.ssa: $(testlib_encoding_hex_any_srcs)
testlib_encoding_json_any_srcs = \
$(STDLIB)/encoding/json/types.ha \
$(STDLIB)/encoding/json/lex.ha \
+ $(STDLIB)/encoding/json/load.ha \
$(STDLIB)/encoding/json/value.ha \
$(STDLIB)/encoding/json/+test/lexer.ha \
+ $(STDLIB)/encoding/json/+test/load.ha \
$(STDLIB)/encoding/json/+test/value.ha
$(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM))