hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit b0f5261dd76523630ae5c35c08c147b2eb603de3
parent 50aab0b2807f23167f8a42d78d04502ba5664294
Author: Drew DeVault <sir@cmpwn.com>
Date:   Wed, 11 May 2022 13:49:50 +0200

encoding::json: add load

Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
Aencoding/json/+test/load.ha | 43+++++++++++++++++++++++++++++++++++++++++++
Mencoding/json/lex.ha | 20+++++++++++++++++++-
Aencoding/json/load.ha | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 3+++
Mstdlib.mk | 3+++
5 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/encoding/json/+test/load.ha b/encoding/json/+test/load.ha @@ -0,0 +1,43 @@ +use bufio; +use io; +use strings; +use fmt; + +@test fn load() void = { + const input = `1234`; + const reader = bufio::fixed(strings::toutf8(input), io::mode::READ); + const val = load(&reader)!; + defer finish(val); + assert(val as f64 == 1234.0); + + const input = `[]`; + const reader = bufio::fixed(strings::toutf8(input), io::mode::READ); + const val = load(&reader)!; + defer finish(val); + assert(len(val as []value) == 0); + + const input = `[1, 2, 3, null]`; + const reader = bufio::fixed(strings::toutf8(input), io::mode::READ); + const val = load(&reader)!; + defer finish(val); + const vals = val as []value; + assert(vals[0] as f64 == 1.0); + assert(vals[1] as f64 == 2.0); + assert(vals[2] as f64 == 3.0); + assert(vals[3] is _null); + + const input = `{}`; + const reader = bufio::fixed(strings::toutf8(input), io::mode::READ); + const val = load(&reader)!; + defer finish(val); + assert(val is object); + + const input = `{ "hello": "world", "answer": 42 }`; + const reader = bufio::fixed(strings::toutf8(input), io::mode::READ); + const val = load(&reader)!; + defer finish(val); + assert(val is object); + const obj = val as object; + assert(*(get(&obj, "hello") as *value) as str == "world"); + assert(*(get(&obj, "answer") as *value) as f64 == 42.0); +}; diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha @@ -13,6 +13,7 @@ export type lexer = struct { src: bufio::bufstream, buffer: []u8, strbuf: strio::stream, + un: (token | void), }; // Creates a new JSON lexer. The caller can obtain tokens with [[next]] and @@ -23,6 +24,7 @@ export fn lex(src: io::handle) lexer = { src = bufio::buffered(src, buf, []), buffer = buf, strbuf = strio::dynamic(), + un = void, }; }; @@ -35,6 +37,13 @@ export fn close(lex: *lexer) void = { // Returns the next token from a JSON lexer. The return value is borrowed from // the lexer and will be overwritten on subsequent calls. export fn next(lex: *lexer) (token | io::EOF | error) = { + match (lex.un) { + case void => void; + case let tok: token => + lex.un = void; + return tok; + }; + const rn = match (nextrunews(lex)?) { case io::EOF => return io::EOF; @@ -82,6 +91,14 @@ export fn next(lex: *lexer) (token | io::EOF | error) = { }; }; +// "Unlexes" a token from the lexer, such that the next call to [[next]] will +// return that token again. Only one token can be unlexed at a time, otherwise +// the program will abort. +fn unlex(lex: *lexer, tok: token) void = { + assert(lex.un is void, "encoding::json::unlex called twice in a row"); + lex.un = tok; +}; + // Scans until encountering a non-alphabetical character, returning the // resulting word. fn scan_word(lex: *lexer) (str | error) = { @@ -131,7 +148,8 @@ fn scan_number(lex: *lexer) (token | error) = { void; case => if (!ascii::isdigit(rn)) { - return invalid; + bufio::unreadrune(&lex.src, rn); + break; }; }; case numstate::FRACTION => diff --git a/encoding/json/load.ha b/encoding/json/load.ha @@ -0,0 +1,99 @@ +use io; +use strings; + +// Parses a JSON value from the given [[io::handle]], returning the value or an +// error. The return value is allocated on the heap; use [[finish]] to free it +// up when you're done using it. +// +// This code assumes non-antagonistic inputs, and does not limit recursion depth +// or memory usage. You may want to incorporate something like +// [[io::limitreader]] or similar, or use the JSON lexer ([[lex]]) directly into +// your program if dealing with potentially malicious inputs. +export fn load(src: io::handle) (value | error) = { + const lex = lex(src); + defer close(&lex); + return _load(&lex); +}; + +fn _load(lex: *lexer) (value | error) = { + const tok = mustscan(lex)?; + match (tok) { + case _null => + return _null; + case let b: bool => + return b; + case let f: f64 => + return f; + case let s: str => + return s; + case arraystart => + return _load_array(lex); + case objstart => + return _load_obj(lex); + case (arrayend | objend | colon | comma) => + return invalid; + }; +}; + +fn _load_array(lex: *lexer) (value | error) = { + let array: []value = []; + for (true) { + let tok = mustscan(lex)?; + match (tok) { + case arrayend => + break; + case => + unlex(lex, tok); + }; + + append(array, _load(lex)?); + + tok = mustscan(lex)?; + match (tok) { + case comma => void; + case arrayend => break; + case => return invalid; + }; + }; + return array; +}; + +fn _load_obj(lex: *lexer) (value | error) = { + let obj = newobject(); + for (true) { + let tok = mustscan(lex)?; + const key = match (tok) { + case let s: str => + yield strings::dup(s); + case objend => break; + case => return invalid; + }; + defer free(key); + + tok = mustscan(lex)?; + if (!(tok is colon)) { + return invalid; + }; + + const val = _load(lex)?; + set(&obj, key, val); + + tok = mustscan(lex)?; + match (tok) { + case comma => void; + case objend => break; + case => return invalid; + }; + }; + + return obj; +}; + +fn mustscan(lex: *lexer) (token | error) = { + match (next(lex)?) { + case io::EOF => + return invalid; + case let tok: token => + return tok; + }; +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -518,13 +518,16 @@ encoding_json() { gen_srcs encoding::json \ types.ha \ lex.ha \ + load.ha \ value.ha else gen_srcs encoding::json \ types.ha \ lex.ha \ + load.ha \ value.ha \ +test/lexer.ha \ + +test/load.ha \ +test/value.ha fi gen_ssa encoding::json ascii bufio io strio os encoding::utf8 strings \ diff --git a/stdlib.mk b/stdlib.mk @@ -1061,6 +1061,7 @@ $(HARECACHE)/encoding/hex/encoding_hex-any.ssa: $(stdlib_encoding_hex_any_srcs) stdlib_encoding_json_any_srcs = \ $(STDLIB)/encoding/json/types.ha \ $(STDLIB)/encoding/json/lex.ha \ + $(STDLIB)/encoding/json/load.ha \ $(STDLIB)/encoding/json/value.ha $(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM)) @@ -3161,8 +3162,10 @@ $(TESTCACHE)/encoding/hex/encoding_hex-any.ssa: $(testlib_encoding_hex_any_srcs) testlib_encoding_json_any_srcs = \ $(STDLIB)/encoding/json/types.ha \ $(STDLIB)/encoding/json/lex.ha \ + $(STDLIB)/encoding/json/load.ha \ $(STDLIB)/encoding/json/value.ha \ $(STDLIB)/encoding/json/+test/lexer.ha \ + $(STDLIB)/encoding/json/+test/load.ha \ $(STDLIB)/encoding/json/+test/value.ha $(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM))