hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 74f11edc6bc7e7f107eed10261e9f0b6c6604707
parent bc0049fec9d7fc9965b2f6cd1d9696cba1c979e1
Author: Sebastian <sebastian@sebsite.pw>
Date:   Wed, 18 May 2022 23:41:10 -0400

encoding::json: allow setting nesting limit for load

Adds a load_option type, which for now is just nestlimit (which is a
uint).

Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mencoding/json/+test/load.ha | 10+++++++++-
Mencoding/json/load.ha | 45++++++++++++++++++++++++++++++++-------------
Mencoding/json/types.ha | 5++++-
Mscripts/gen-stdlib | 2+-
Mstdlib.mk | 4++--
5 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/encoding/json/+test/load.ha b/encoding/json/+test/load.ha @@ -1,7 +1,6 @@ use bufio; use io; use strings; -use fmt; @test fn load() void = { const input = `1234`; @@ -41,3 +40,12 @@ use fmt; assert(*(get(&obj, "hello") as *value) as str == "world"); assert(*(get(&obj, "answer") as *value) as f64 == 42.0); }; + +@test fn nestlimit() void = { + const input = `{ "foo": [[[{"bar": ["baz"]}]]] }`; + const reader = bufio::fixed(strings::toutf8(input), io::mode::READ); + const val = load(&reader, 6: nestlimit)!; + finish(val); + io::seek(&reader, 0, io::whence::SET)!; + assert(load(&reader, 5: nestlimit) is limitreached); +}; diff --git a/encoding/json/load.ha b/encoding/json/load.ha @@ -1,21 +1,34 @@ use io; use strings; +use types; + +// Options for [[load]]. +export type load_option = nestlimit; + +// The maximum number of nested objects or arrays that can be entered before +// erroring out. +export type nestlimit = uint; // Parses a JSON value from the given [[io::handle]], returning the value or an // error. The return value is allocated on the heap; use [[finish]] to free it // up when you're done using it. // -// This code assumes non-antagonistic inputs, and does not limit recursion depth -// or memory usage. You may want to incorporate something like -// [[io::limitreader]] or similar, or use the JSON lexer ([[lex]]) directly into -// your program if dealing with potentially malicious inputs. -export fn load(src: io::handle) (value | error) = { +// By default, this function assumes non-antagonistic inputs, and does not limit +// recursion depth or memory usage. You may want to set a custom [[nestlimit]], +// or incorporate an [[io::limitreader]] or similar. Alternatively, you can use +// the JSON lexer ([[lex]]) directly if dealing with potentially malicious +// inputs. +export fn load(src: io::handle, opts: load_option...) (value | error) = { + let limit = types::UINT_MAX; + for (let i = 0z; i < len(opts); i += 1) { + limit = opts[i]: nestlimit: uint; + }; const lex = newlexer(src); defer close(&lex); - return _load(&lex); + return _load(&lex, 0, limit); }; -fn _load(lexer: *lexer) (value | error) = { +fn _load(lexer: *lexer, level: uint, limit: uint) (value | error) = { const tok = mustscan(lexer)?; match (tok) { case _null => @@ -27,15 +40,21 @@ fn _load(lexer: *lexer) (value | error) = { case let s: str => return strings::dup(s); case arraystart => - return _load_array(lexer); + if (level == limit) { + return limitreached; + }; + return _load_array(lexer, level + 1, limit); case objstart => - return _load_obj(lexer); + if (level == limit) { + return limitreached; + }; + return _load_obj(lexer, level + 1, limit); case (arrayend | objend | colon | comma) => return invalid; }; }; -fn _load_array(lexer: *lexer) (value | error) = { +fn _load_array(lexer: *lexer, level: uint, limit: uint) (value | error) = { let array: []value = []; let tok = mustscan(lexer)?; match (tok) { @@ -46,7 +65,7 @@ fn _load_array(lexer: *lexer) (value | error) = { }; for (true) { - append(array, _load(lexer)?); + append(array, _load(lexer, level, limit)?); tok = mustscan(lexer)?; match (tok) { @@ -58,7 +77,7 @@ fn _load_array(lexer: *lexer) (value | error) = { return array; }; -fn _load_obj(lexer: *lexer) (value | error) = { +fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = { let obj = newobject(); let tok = mustscan(lexer)?; match (tok) { @@ -82,7 +101,7 @@ fn _load_obj(lexer: *lexer) (value | error) = { return invalid; }; - const val = _load(lexer)?; + const val = _load(lexer, level, limit)?; defer finish(val); set(&obj, key, val); diff --git a/encoding/json/types.ha b/encoding/json/types.ha @@ -5,8 +5,11 @@ use io; // An invalid JSON token was encountered. export type invalid = !void; +// The maximum nesting limit was reached. +export type limitreached = !void; + // A tagged union of all possible errors returned from this module. -export type error = !(invalid | io::error); +export type error = !(invalid | limitreached | io::error); // The JSON null value. export type _null = void; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -534,7 +534,7 @@ encoding_json() { +test/value.ha fi gen_ssa encoding::json ascii bufio io strio os encoding::utf8 strings \ - strconv hash::fnv + strconv hash::fnv types } encoding_pem() { diff --git a/stdlib.mk b/stdlib.mk @@ -1072,7 +1072,7 @@ stdlib_encoding_json_any_srcs = \ $(STDLIB)/encoding/json/load.ha \ $(STDLIB)/encoding/json/value.ha -$(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM)) +$(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/encoding/json @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::json \ @@ -3195,7 +3195,7 @@ testlib_encoding_json_any_srcs = \ $(STDLIB)/encoding/json/+test/load.ha \ $(STDLIB)/encoding/json/+test/value.ha -$(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM)) +$(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM)) $(testlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/encoding/json @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::json \