commit 74f11edc6bc7e7f107eed10261e9f0b6c6604707
parent bc0049fec9d7fc9965b2f6cd1d9696cba1c979e1
Author: Sebastian <sebastian@sebsite.pw>
Date: Wed, 18 May 2022 23:41:10 -0400
encoding::json: allow setting nesting limit for load
Adds a load_option type, which for now is just nestlimit (which is a
uint).
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
5 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/encoding/json/+test/load.ha b/encoding/json/+test/load.ha
@@ -1,7 +1,6 @@
use bufio;
use io;
use strings;
-use fmt;
@test fn load() void = {
const input = `1234`;
@@ -41,3 +40,12 @@ use fmt;
assert(*(get(&obj, "hello") as *value) as str == "world");
assert(*(get(&obj, "answer") as *value) as f64 == 42.0);
};
+
+@test fn nestlimit() void = {
+ const input = `{ "foo": [[[{"bar": ["baz"]}]]] }`;
+ const reader = bufio::fixed(strings::toutf8(input), io::mode::READ);
+ const val = load(&reader, 6: nestlimit)!;
+ finish(val);
+ io::seek(&reader, 0, io::whence::SET)!;
+ assert(load(&reader, 5: nestlimit) is limitreached);
+};
diff --git a/encoding/json/load.ha b/encoding/json/load.ha
@@ -1,21 +1,34 @@
use io;
use strings;
+use types;
+
+// Options for [[load]].
+export type load_option = nestlimit;
+
+// The maximum number of nested objects or arrays that can be entered before
+// erroring out.
+export type nestlimit = uint;
// Parses a JSON value from the given [[io::handle]], returning the value or an
// error. The return value is allocated on the heap; use [[finish]] to free it
// up when you're done using it.
//
-// This code assumes non-antagonistic inputs, and does not limit recursion depth
-// or memory usage. You may want to incorporate something like
-// [[io::limitreader]] or similar, or use the JSON lexer ([[lex]]) directly into
-// your program if dealing with potentially malicious inputs.
-export fn load(src: io::handle) (value | error) = {
+// By default, this function assumes non-antagonistic inputs, and does not limit
+// recursion depth or memory usage. You may want to set a custom [[nestlimit]],
+// or incorporate an [[io::limitreader]] or similar. Alternatively, you can use
+// the JSON lexer ([[lex]]) directly if dealing with potentially malicious
+// inputs.
+export fn load(src: io::handle, opts: load_option...) (value | error) = {
+ let limit = types::UINT_MAX;
+ for (let i = 0z; i < len(opts); i += 1) {
+ limit = opts[i]: nestlimit: uint;
+ };
const lex = newlexer(src);
defer close(&lex);
- return _load(&lex);
+ return _load(&lex, 0, limit);
};
-fn _load(lexer: *lexer) (value | error) = {
+fn _load(lexer: *lexer, level: uint, limit: uint) (value | error) = {
const tok = mustscan(lexer)?;
match (tok) {
case _null =>
@@ -27,15 +40,21 @@ fn _load(lexer: *lexer) (value | error) = {
case let s: str =>
return strings::dup(s);
case arraystart =>
- return _load_array(lexer);
+ if (level == limit) {
+ return limitreached;
+ };
+ return _load_array(lexer, level + 1, limit);
case objstart =>
- return _load_obj(lexer);
+ if (level == limit) {
+ return limitreached;
+ };
+ return _load_obj(lexer, level + 1, limit);
case (arrayend | objend | colon | comma) =>
return invalid;
};
};
-fn _load_array(lexer: *lexer) (value | error) = {
+fn _load_array(lexer: *lexer, level: uint, limit: uint) (value | error) = {
let array: []value = [];
let tok = mustscan(lexer)?;
match (tok) {
@@ -46,7 +65,7 @@ fn _load_array(lexer: *lexer) (value | error) = {
};
for (true) {
- append(array, _load(lexer)?);
+ append(array, _load(lexer, level, limit)?);
tok = mustscan(lexer)?;
match (tok) {
@@ -58,7 +77,7 @@ fn _load_array(lexer: *lexer) (value | error) = {
return array;
};
-fn _load_obj(lexer: *lexer) (value | error) = {
+fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = {
let obj = newobject();
let tok = mustscan(lexer)?;
match (tok) {
@@ -82,7 +101,7 @@ fn _load_obj(lexer: *lexer) (value | error) = {
return invalid;
};
- const val = _load(lexer)?;
+ const val = _load(lexer, level, limit)?;
defer finish(val);
set(&obj, key, val);
diff --git a/encoding/json/types.ha b/encoding/json/types.ha
@@ -5,8 +5,11 @@ use io;
// An invalid JSON token was encountered.
export type invalid = !void;
+// The maximum nesting limit was reached.
+export type limitreached = !void;
+
// A tagged union of all possible errors returned from this module.
-export type error = !(invalid | io::error);
+export type error = !(invalid | limitreached | io::error);
// The JSON null value.
export type _null = void;
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -534,7 +534,7 @@ encoding_json() {
+test/value.ha
fi
gen_ssa encoding::json ascii bufio io strio os encoding::utf8 strings \
- strconv hash::fnv
+ strconv hash::fnv types
}
encoding_pem() {
diff --git a/stdlib.mk b/stdlib.mk
@@ -1072,7 +1072,7 @@ stdlib_encoding_json_any_srcs = \
$(STDLIB)/encoding/json/load.ha \
$(STDLIB)/encoding/json/value.ha
-$(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM))
+$(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM)) $(stdlib_types_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(HARECACHE)/encoding/json
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::json \
@@ -3195,7 +3195,7 @@ testlib_encoding_json_any_srcs = \
$(STDLIB)/encoding/json/+test/load.ha \
$(STDLIB)/encoding/json/+test/value.ha
-$(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM))
+$(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM)) $(testlib_types_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(TESTCACHE)/encoding/json
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::json \