hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 9f175d0bc9a389f3ae358e256a248d5858d2bb4d
parent b00d4a6f83424b1c5558a37e9c5a6bfd3801701c
Author: Drew DeVault <sir@cmpwn.com>
Date:   Tue, 25 Oct 2022 13:13:19 +0200

encoding::json: move to extlib

Its new home is here:

https://sr.ht/~sircmpwn/hare-json/

Diffstat:
Dencoding/json/+test/lexer.ha | 62--------------------------------------------------------------
Dencoding/json/+test/load.ha | 164-------------------------------------------------------------------------------
Dencoding/json/+test/value.ha | 35-----------------------------------
Dencoding/json/README | 15---------------
Dencoding/json/dump.ha | 84-------------------------------------------------------------------------------
Dencoding/json/lex.ha | 383-------------------------------------------------------------------------------
Dencoding/json/load.ha | 142-------------------------------------------------------------------------------
Dencoding/json/types.ha | 50--------------------------------------------------
Dencoding/json/value.ha | 193-------------------------------------------------------------------------------
Mscripts/gen-stdlib | 25-------------------------
Mstdlib.mk | 43-------------------------------------------
11 files changed, 0 insertions(+), 1196 deletions(-)

diff --git a/encoding/json/+test/lexer.ha b/encoding/json/+test/lexer.ha @@ -1,62 +0,0 @@ -use bufio; -use strings; -use io; - -@test fn lex() void = { - const cases: [_](str, []token) = [ - ("true", [true]), - ("false", [false]), - ("null", [_null]), - ("1234", [1234.0]), - ("12.34", [12.34]), - ("12.34e5", [12.34e5]), - ("12.34E5", [12.34e5]), - ("12.34e+5", [12.34e5]), - ("12.34e-5", [12.34e-5]), - ("12e5", [12.0e5]), - ("-1234", [-1234.0]), - (`"hello world"`, ["hello world"]), - (`"\"\\\/\b\f\n\r\t\u0020"`, ["\"\\/\b\f\n\r\t\u0020"]), - ("[ null, null ]", [arraystart, _null, comma, _null, arrayend]), - ]; - - for (let i = 0z; i < len(cases); i += 1) { - const src = strings::toutf8(cases[i].0); - const src = bufio::fixed(src, io::mode::READ); - const lexer = newlexer(&src); - defer close(&lexer); - - for (let j = 0z; j < len(cases[i].1); j += 1) { - const want = cases[i].1[j]; - const have = lex(&lexer)! as token; - assert(tokeq(want, have)); - }; - - assert(lex(&lexer) is io::EOF); - }; -}; - -fn tokeq(want: token, have: token) bool = { - match (want) { - case _null => - return have is _null; - case comma => - return have is comma; - case colon => - return have is colon; - case arraystart => - return have is arraystart; - case arrayend => - return have is arrayend; - case objstart => - return have is objstart; - case objend => - return have is objend; - case let b: bool => - return have as bool == b; - case let f: f64 => - return have as f64 == f; - case let s: str => - return have as str == s; - }; -}; diff --git a/encoding/json/+test/load.ha b/encoding/json/+test/load.ha @@ -1,164 +0,0 @@ -use fmt; - -fn roundtrip(input: str, expected: value) void = { - const val = loadstr(input)!; - defer finish(val); - assert(equal(val, expected)); - const s = dumpstr(val); - defer free(s); - const val = loadstr(input)!; - defer finish(val); - assert(equal(val, expected)); -}; - -fn errassert(input: str, expected_loc: (uint, uint)) void = { - const loc = loadstr(input) as invalid; - if (loc.0 != expected_loc.0 || loc.1 != expected_loc.1) { - fmt::errorfln("=== JSON:\n{}", input)!; - fmt::errorfln("=== expected error location:\n({}, {})", - expected_loc.0, expected_loc.1)!; - fmt::errorfln("=== actual error location:\n({}, {})", - loc.0, loc.1)!; - abort(); - }; -}; - -@test fn load() void = { - let obj = newobject(); - defer finish(obj); - let obj2 = newobject(); - defer finish(obj2); - - roundtrip(`1234`, 1234.0); - roundtrip(`[]`, []); - roundtrip(`[1, 2, 3, null]`, [1.0, 2.0, 3.0, _null]); - roundtrip(`{}`, obj); - set(&obj, "hello", "world"); - set(&obj, "answer", 42.0); - roundtrip(`{ "hello": "world", "answer": 42 }`, obj); - reset(&obj); - roundtrip(`[[] ]`, [[]]); - roundtrip(`[""]`, [""]); - roundtrip(`["a"]`, ["a"]); - roundtrip(`[false]`, [false]); - roundtrip(`[null, 1, "1", {}]`, [_null, 1.0, "1", obj]); - roundtrip(`[null]`, [_null]); - roundtrip("[1\n]", [1.0]); - roundtrip(`[1,null,null,null,2]`, [1.0, _null, _null, _null, 2.0]); - set(&obj, "", 0.0); - roundtrip(`{"":0}`, obj); - reset(&obj); - set(&obj, "foo\0bar", 42.0); - roundtrip(`{"foo\u0000bar": 42}`, obj); - reset(&obj); - set(&obj, "min", -1.0e+28); - set(&obj, "max", +1.0e+28); - roundtrip(`{"min": -1.0e+28, "max": 1.0e+28}`, obj); - reset(&obj); - set(&obj, "id", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - set(&obj2, "id", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - set(&obj, "x", [obj2]); - roundtrip(`{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}`, obj); - reset(&obj); - reset(&obj2); - set(&obj, "a", []); - roundtrip(`{"a":[]}`, obj); - roundtrip("{\n" `"a": []` "\n}", obj); - reset(&obj); - roundtrip(`"\u0060\u012a\u12AB"`, "\u0060\u012a\u12AB"); - roundtrip(`"\"\\\/\b\f\n\r\t"`, "\"\\/\b\f\n\r\t"); - roundtrip(`"\\u0000"`, `\u0000`); - roundtrip(`"\""`, `"`); - roundtrip(`"a/*b*/c/*d//e"`, "a/*b*/c/*d//e"); - roundtrip(`"\\a"`, `\a`); - roundtrip(`"\\n"`, `\n`); - roundtrip(`"\u0012"`, "\u0012"); - roundtrip(`[ "asd"]`, ["asd"]); - roundtrip(`"new\u000Aline"`, "new\nline"); - roundtrip(`"\u0000"`, "\0"); - roundtrip(`"\u002c"`, "\u002c"); - roundtrip(`"asd "`, "asd "); - roundtrip(`" "`, " "); - roundtrip(`"\u0821"`, "\u0821"); - roundtrip(`"\u0123"`, "\u0123"); - roundtrip(`"\u0061\u30af\u30EA\u30b9"`, "\u0061\u30af\u30EA\u30b9"); - roundtrip(`"\uA66D"`, "\uA66D"); - roundtrip(`"\u005C"`, `\`); - roundtrip(`"\u0022"`, `"`); - roundtrip(`""`, ""); - roundtrip(` [] `, []); - - errassert(`[1,,]`, (1, 4)); - errassert(`[1 true]`, (1, 7)); - errassert(`["": 1]`, (1, 4)); - errassert(`[,1]`, (1, 2)); - errassert(`[1,,2]`, (1, 4)); - errassert(`["",]`, (1, 5)); - errassert(`["x"`, (1, 5)); - errassert(`[x`, (1, 2)); - errassert(`[3[4]]`, (1, 3)); - errassert(`[1:2]`, (1, 3)); - errassert(`[,]`, (1, 2)); - errassert(`[-]`, (1, 3)); - errassert(`[ , ""]`, (1, 5)); - errassert("[\"a\",\n4\n,1,", (3, 4)); - errassert(`[1,]`, (1, 4)); - errassert("[\"\va\"\\f", (1, 3)); - errassert(`[*]`, (1, 2)); - errassert(`[1,`, (1, 4)); - errassert("[1,\n1\n,1", (3, 3)); - errassert(`[{}`, (1, 4)); - errassert(`["x", truth]`, (1, 11)); - errassert(`{[: "x"}`, (1, 2)); - errassert(`{"x", null}`, (1, 5)); - errassert(`{"x"::"b"}`, (1, 6)); - errassert(`{"a":"a" 123}`, (1, 12)); - errassert(`{"a" b}`, (1, 6)); - errassert(`{:"b"}`, (1, 2)); - errassert(`{"a" "b"}`, (1, 8)); - errassert(`{"a":`, (1, 6)); - errassert(`{"a"`, (1, 5)); - errassert(`{1:1}`, (1, 2)); - errassert(`{9999E9999:1}`, (1, 10)); - errassert(`{null:null,null:null}`, (1, 5)); - errassert(`{"id":0,,,,,}`, (1, 9)); - errassert(`{'a':0}`, (1, 2)); - errassert(`{"id":0,}`, (1, 9)); - errassert(`{"a":"b",,"c":"d"}`, (1, 10)); - errassert(`{true: false}`, (1, 5)); - errassert(`{"a":"a`, (1, 8)); - errassert(`{ "foo" : "bar", "a" }`, (1, 22)); - errassert(` `, (1, 2)); - errassert(`<null>`, (1, 1)); - errassert(`["asd]`, (1, 7)); - errassert(`True`, (1, 4)); - errassert(`]`, (1, 1)); - errassert(`}`, (1, 1)); - errassert(`{"x": true,`, (1, 12)); - errassert(`[`, (1, 2)); - errassert(`{`, (1, 2)); - errassert(``, (1, 1)); - errassert("\0", (1, 1)); - errassert(`{"":`, (1, 5)); - errassert(`['`, (1, 2)); - errassert(`["`, (1, 3)); - errassert(`[,`, (1, 2)); - errassert(`[{`, (1, 3)); - errassert(`{[`, (1, 2)); - errassert(`{]`, (1, 2)); - errassert(`[}`, (1, 2)); - errassert(`{'`, (1, 2)); - errassert(`{"`, (1, 3)); - errassert(`{,`, (1, 2)); - errassert(`["\{["\{["\{["\{`, (1, 4)); - errassert(`*`, (1, 1)); - errassert(`\u000A""`, (1, 1)); - errassert("\f", (1, 1)); -}; - -@test fn nestlimit() void = { - const s = `{ "foo": [[[{"bar": ["baz"]}]]] }`; - const val = loadstr(s, 6: nestlimit)!; - finish(val); - assert(loadstr(s, 5: nestlimit) is limitreached); -}; diff --git a/encoding/json/+test/value.ha b/encoding/json/+test/value.ha @@ -1,35 +0,0 @@ -// License: MPL-2.0 -// (c) 2022 Drew DeVault <sir@cmpwn.com> - -@test fn object() void = { - let obj = newobject(); - defer finish(obj); - - set(&obj, "hello", "world"); - set(&obj, "foo", "bar"); - set(&obj, "the answer", 42.0); - - // XXX: Match overhaul? - assert(*(get(&obj, "hello") as *value) as str == "world"); - assert(*(get(&obj, "foo") as *value) as str == "bar"); - assert(*(get(&obj, "the answer") as *value) as f64 == 42.0); - assert(get(&obj, "nonexistent") is void); - - del(&obj, "hello"); - assert(get(&obj, "hello") is void); -}; - -@test fn iterator() void = { - let obj = newobject(); - defer finish(obj); - - set(&obj, "hello", "world"); - set(&obj, "foo", "bar"); - set(&obj, "the answer", 42.0); - - let it = iter(&obj); - assert(next(&it) is (const str, const *value)); - assert(next(&it) is (const str, const *value)); - assert(next(&it) is (const str, const *value)); - assert(next(&it) is void); -}; diff --git a/encoding/json/README b/encoding/json/README @@ -1,15 +0,0 @@ -This module provides an implementation of the JavaScript Object Notation (JSON) -format, as defined by RFC 8259. Note that several other, incompatible -specifications exist. This implementation does not include any extensions; only -features which are strictly required by the spec are implemented. - -A lexer for JSON values is provided, which may be initialized with [[lex]] and -provides tokens via [[next]], and which uses a relatively small amount of memory -and provides relatively few gurantees regarding the compliance of the input with -the JSON grammar. - -Additionally, the [[value]] type is provided to store any value JSON value, as -well as helpers like [[newobject]], [[get]], and [[set]]. One can load a JSON -value from an input stream into a heap-allocated [[value]] via [[load]], which -enforces all of JSON's grammar constraints and returns an object which must be -freed with [[finish]]. diff --git a/encoding/json/dump.ha b/encoding/json/dump.ha @@ -1,84 +0,0 @@ -// License: MPL-2.0 -// (c) 2022 Sebastian <sebastian@sebsite.pw> -use fmt; -use io; -use strings; -use strio; - -// Dumps a [[value]] into an [[io::handle]] as a string without any additional -// formatting. -export fn dump(out: io::handle, val: value) (size | io::error) = { - let z = 0z; - match (val) { - case let v: (f64 | bool) => - z += fmt::fprint(out, v)?; - case let s: str => - z += fmt::fprint(out, `"`)?; - let it = strings::iter(s); - for (true) match (strings::next(&it)) { - case void => - break; - case let r: rune => - switch (r) { - case '\b' => - z += fmt::fprint(out, `\b`)?; - case '\f' => - z += fmt::fprint(out, `\f`)?; - case '\n' => - z += fmt::fprint(out, `\n`)?; - case '\r' => - z += fmt::fprint(out, `\r`)?; - case '\t' => - z += fmt::fprint(out, `\t`)?; - case '\"' => - z += fmt::fprint(out, `\"`)?; - case '\\' => - z += fmt::fprint(out, `\\`)?; - case => - if (iscntrl(r)) { - z += fmt::fprintf(out, `\u{:04x}`, - r: u32)?; - } else { - z += fmt::fprint(out, r)?; - }; - }; - }; - z += fmt::fprint(out, `"`)?; - case _null => - z += fmt::fprint(out, "null")?; - case let a: []value => - z += fmt::fprint(out, "[")?; - for (let i = 0z; i < len(a); i += 1) { - z += dump(out, a[i])?; - if (i < len(a) - 1) { - z += fmt::fprint(out, ",")?; - }; - }; - z += fmt::fprint(out, "]")?; - case let o: object => - z += fmt::fprint(out, "{")?; - let comma = false; - let it = iter(&o); - for (true) match (next(&it)) { - case void => break; - case let pair: (const str, const *value) => - if (comma) { - z += fmt::fprint(out, ",")?; - }; - comma = true; - z += dump(out, pair.0)?; - z += fmt::fprint(out, ":")?; - z += dump(out, *pair.1)?; - }; - z += fmt::fprint(out, "}")?; - }; - return z; -}; - -// Dumps a [[value]] into a string without any additional formatting. The caller -// must free the return value. -export fn dumpstr(val: value) str = { - let s = strio::dynamic(); - dump(&s, val)!; - return strio::string(&s); -}; diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha @@ -1,383 +0,0 @@ -// License: MPL-2.0 -// (c) 2022 Drew DeVault <sir@cmpwn.com> -use ascii; -use bufio; -use encoding::utf8; -use io; -use os; -use strconv; -use strings; -use strio; - -export type lexer = struct { - src: io::handle, - buffer: []u8, - strbuf: strio::stream, - un: (token | void), - rb: (rune | void), - loc: (uint, uint), - prevloc: (uint, uint), - nextloc: (uint, uint), - prevrloc: (uint, uint), -}; - -// Creates a new JSON lexer. The caller may obtain tokens with [[lex]] and -// should pass the result to [[close]] when they're done with it. -export fn newlexer(src: io::handle) lexer = { - let buf: []u8 = alloc([0...], os::BUFSIZ); - return lexer { - src = src, - buffer = buf, - strbuf = strio::dynamic(), - un = void, - rb = void, - loc = (1, 0), - ... - }; -}; - -// Frees state associated with a JSON lexer. -export fn close(lex: *lexer) void = { - free(lex.buffer); -}; - -// Returns the next token from a JSON lexer. The return value is borrowed from -// the lexer and will be overwritten on subsequent calls. -export fn lex(lex: *lexer) (token | io::EOF | error) = { - match (lex.un) { - case void => - lex.prevloc = lex.loc; - case let tok: token => - lex.un = void; - lex.prevloc = lex.loc; - lex.loc = lex.nextloc; - return tok; - }; - - const rn = match (nextrunews(lex)?) { - case io::EOF => - return io::EOF; - case let rn: rune => - yield rn; - }; - - switch (rn) { - case '[' => - return arraystart; - case ']' => - return arrayend; - case '{' => - return objstart; - case '}' => - return objend; - case ',' => - return comma; - case ':' => - return colon; - case '"' => - return scan_str(lex)?; - case => - yield; - }; - - if (ascii::isdigit(rn) || rn == '-') { - unget(lex, rn); - return scan_number(lex)?; - }; - - if (!ascii::isalpha(rn)) { - return lex.loc: invalid; - }; - - unget(lex, rn); - const word = scan_word(lex)?; - switch (word) { - case "true" => - return true; - case "false" => - return false; - case "null" => - return _null; - case => - return lex.loc: invalid; - }; -}; - -// "Unlexes" a token from the lexer, such that the next call to [[lex]] will -// return that token again. Only one token can be unlexed at a time, otherwise -// the program will abort. -export fn unlex(lex: *lexer, tok: token) void = { - assert(lex.un is void, "encoding::json::unlex called twice in a row"); - lex.un = tok; - lex.nextloc = lex.loc; - lex.loc = lex.prevloc; -}; - -// Scans until encountering a non-alphabetical character, returning the -// resulting word. -fn scan_word(lex: *lexer) (str | error) = { - strio::reset(&lex.strbuf); - - for (true) { - const rn = match (nextrune(lex)?) { - case let rn: rune => - yield rn; - case io::EOF => - break; - }; - if (!ascii::isalpha(rn)) { - unget(lex, rn); - break; - }; - strio::appendrune(&lex.strbuf, rn)!; - }; - - return strio::string(&lex.strbuf); -}; - -type numstate = enum { - SIGN, - START, - ZERO, - INTEGER, - FRACSTART, - FRACTION, - EXPSIGN, - EXPSTART, - EXPONENT, -}; - -fn scan_number(lex: *lexer) (token | error) = { - strio::reset(&lex.strbuf); - - let state = numstate::SIGN; - for (true) { - const rn = match (nextrune(lex)?) { - case let rn: rune => - yield rn; - case io::EOF => - break; - }; - - switch (state) { - case numstate::SIGN => - state = numstate::START; - if (rn != '-') { - unget(lex, rn); - continue; - }; - case numstate::START => - switch (rn) { - case '0' => - state = numstate::ZERO; - case => - if (!ascii::isdigit(rn)) { - return lex.loc: invalid; - }; - state = numstate::INTEGER; - }; - case numstate::ZERO => - switch (rn) { - case '.' => - state = numstate::FRACSTART; - case 'e', 'E' => - state = numstate::EXPSIGN; - case => - if (ascii::isdigit(rn)) { - return lex.loc: invalid; - }; - unget(lex, rn); - break; - }; - case numstate::INTEGER => - switch (rn) { - case '.' => - state = numstate::FRACSTART; - case 'e', 'E' => - state = numstate::EXPSIGN; - case => - if (!ascii::isdigit(rn)) { - unget(lex, rn); - break; - }; - }; - case numstate::FRACSTART => - if (!ascii::isdigit(rn)) { - return lex.loc: invalid; - }; - state = numstate::FRACTION; - case numstate::FRACTION => - switch (rn) { - case 'e', 'E' => - state = numstate::EXPSIGN; - case => - if (!ascii::isdigit(rn)) { - unget(lex, rn); - break; - }; - }; - case numstate::EXPSIGN => - state = numstate::EXPSTART; - if (rn != '+' && rn != '-') { - unget(lex, rn); - continue; - }; - case numstate::EXPSTART => - if (!ascii::isdigit(rn)) { - return lex.loc: invalid; - }; - state = numstate::EXPONENT; - case numstate::EXPONENT => - if (!ascii::isdigit(rn)) { - unget(lex, rn); - break; - }; - }; - - strio::appendrune(&lex.strbuf, rn)!; - }; - - match (strconv::stof64(strio::string(&lex.strbuf))) { - case let f: f64 => - return f; - case => - return lex.loc: invalid; - }; -}; - -fn scan_str(lex: *lexer) (token | error) = { - strio::reset(&lex.strbuf); - - for (true) { - const rn = match (nextrune(lex)?) { - case let rn: rune => - yield rn; - case io::EOF => - lex.loc.1 += 1; - return lex.loc: invalid; - }; - - switch (rn) { - case '"' => - break; - case '\\' => - const rn = scan_escape(lex)?; - strio::appendrune(&lex.strbuf, rn)!; - case => - if (iscntrl(rn)) { - return lex.loc: invalid; - }; - strio::appendrune(&lex.strbuf, rn)!; - }; - }; - - return strio::string(&lex.strbuf); -}; - -fn scan_escape(lex: *lexer) (rune | error) = { - const rn = match (nextrune(lex)?) { - case let rn: rune => - yield rn; - case io::EOF => - return lex.loc: invalid; - }; - - switch (rn) { - case '\"' => - return '\"'; - case '\\' => - return '\\'; - case '/' => - return '/'; - case 'b' => - return '\b'; - case 'f' => - return '\f'; - case 'n' => - return '\n'; - case 'r' => - return '\r'; - case 't' => - return '\t'; - case 'u' => - let buf: [4]u8 = [0...]; - match (io::readall(lex.src, buf)?) { - case io::EOF => - return lex.loc: invalid; - case size => - yield; - }; - const s = match (strings::fromutf8(buf)) { - case let s: str => - yield s; - case => - return lex.loc: invalid; - }; - match (strconv::stou32b(s, strconv::base::HEX)) { - case let u: u32 => - lex.loc.1 += 4; - return u: rune; - case => - return lex.loc: invalid; - }; - case => - return lex.loc: invalid; - }; -}; - -// Gets the next rune from the lexer. -fn nextrune(lex: *lexer) (rune | io::EOF | error) = { - if (lex.rb is rune) { - lex.prevrloc = lex.loc; - const r = lex.rb as rune; - lex.rb = void; - if (r == '\n') { - lex.loc = (lex.loc.0 + 1, 0); - } else { - lex.loc.1 += 1; - }; - return r; - }; - match (bufio::scanrune(lex.src)) { - case let err: io::error => - return err; - case utf8::invalid => - return lex.loc: invalid; - case io::EOF => - return io::EOF; - case let rn: rune => - lex.prevrloc = lex.loc; - if (rn == '\n') { - lex.loc = (lex.loc.0 + 1, 0); - } else { - lex.loc.1 += 1; - }; - return rn; - }; -}; - -// Like nextrune but skips whitespace. -fn nextrunews(lex: *lexer) (rune | io::EOF | error) = { - for (true) { - match (nextrune(lex)?) { - case let rn: rune => - if (isspace(rn)) { - continue; - }; - return rn; - case io::EOF => - return io::EOF; - }; - }; - abort(); // Unreachable -}; - -fn unget(lex: *lexer, r: rune) void = { - assert(lex.rb is void); - lex.rb = r; - lex.loc = lex.prevrloc; -}; - -fn iscntrl(r: rune) bool = r: u32 < 0x20; - -fn isspace(r: rune) bool = ascii::isspace(r) && r != '\f'; diff --git a/encoding/json/load.ha b/encoding/json/load.ha @@ -1,142 +0,0 @@ -use bufio; -use io; -use strings; -use types; - -// Options for [[load]]. -export type load_option = nestlimit; - -// The maximum number of nested objects or arrays that can be entered before -// erroring out. -export type nestlimit = uint; - -// Parses a JSON value from the given [[io::handle]], returning the value or an -// error. The return value is allocated on the heap; use [[finish]] to free it -// up when you're done using it. -// -// By default, this function assumes non-antagonistic inputs, and does not limit -// recursion depth or memory usage. You may want to set a custom [[nestlimit]], -// or incorporate an [[io::limitreader]] or similar. Alternatively, you can use -// the JSON lexer ([[lex]]) directly if dealing with potentially malicious -// inputs. -export fn load(src: io::handle, opts: load_option...) (value | error) = { - let limit = types::UINT_MAX; - for (let i = 0z; i < len(opts); i += 1) { - limit = opts[i]: nestlimit: uint; - }; - const lex = newlexer(src); - defer close(&lex); - return _load(&lex, 0, limit); -}; - -// Parses a JSON value from the given string, returning the value or an error. -// The return value is allocated on the heap; use [[finish]] to free it up when -// you're done using it. -// -// See the documentation for [[load]] for information on dealing with -// potentially malicious inputs. -export fn loadstr(input: str, opts: load_option...) (value | error) = { - let src = bufio::fixed(strings::toutf8(input), io::mode::READ); - return load(&src, opts...); -}; - -fn _load(lexer: *lexer, level: uint, limit: uint) (value | error) = { - const tok = mustscan(lexer)?; - match (tok) { - case _null => - return _null; - case let b: bool => - return b; - case let f: f64 => - return f; - case let s: str => - return strings::dup(s); - case arraystart => - if (level == limit) { - return limitreached; - }; - return _load_array(lexer, level + 1, limit); - case objstart => - if (level == limit) { - return limitreached; - }; - return _load_obj(lexer, level + 1, limit); - case (arrayend | objend | colon | comma) => - return lexer.loc: invalid; - }; -}; - -fn _load_array(lexer: *lexer, level: uint, limit: uint) (value | error) = { - let array: []value = []; - let tok = mustscan(lexer)?; - match (tok) { - case arrayend => - return array; - case => - unlex(lexer, tok); - }; - - for (true) { - append(array, _load(lexer, level, limit)?); - - tok = mustscan(lexer)?; - match (tok) { - case comma => void; - case arrayend => break; - case => - return lexer.loc: invalid; - }; - }; - return array; -}; - -fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = { - let obj = newobject(); - let tok = mustscan(lexer)?; - match (tok) { - case objend => - return obj; - case => - unlex(lexer, tok); - }; - - for (true) { - let tok = mustscan(lexer)?; - const key = match (tok) { - case let s: str => - yield strings::dup(s); - case => - return lexer.loc: invalid; - }; - defer free(key); - - tok = mustscan(lexer)?; - if (!(tok is colon)) { - return lexer.loc: invalid; - }; - - const val = _load(lexer, level, limit)?; - defer finish(val); - set(&obj, key, val); - - tok = mustscan(lexer)?; - match (tok) { - case comma => void; - case objend => break; - case => - return lexer.loc: invalid; - }; - }; - - return obj; -}; - -fn mustscan(lexer: *lexer) (token | error) = { - match (lex(lexer)?) { - case io::EOF => - lexer.loc.1 += 1; - return lexer.loc: invalid; - case let tok: token => - return tok; - }; -}; diff --git a/encoding/json/types.ha b/encoding/json/types.ha @@ -1,50 +0,0 @@ -// License: MPL-2.0 -// (c) 2022 Drew DeVault <sir@cmpwn.com> -use fmt; -use io; - -// An invalid JSON token was encountered at this location (line, column). -export type invalid = !(uint, uint); - -// The maximum nesting limit was reached. -export type limitreached = !void; - -// A tagged union of all possible errors returned from this module. -export type error = !(invalid | limitreached | io::error); - -// The JSON null value. -export type _null = void; - -// The '[' token, signaling the start of a JSON array. -export type arraystart = void; - -// The ']' token, signaling the end of a JSON array. -export type arrayend = void; - -// The '{' token, signaling the start of a JSON object. -export type objstart = void; - -// The '}' token, signaling the end of a JSON object. -export type objend = void; - -// The ':' token. -export type colon = void; - -// The ',' token. -export type comma = void; - -// All tokens which can be returned from the JSON tokenizer. -export type token = (arraystart | arrayend | objstart | - objend | colon | comma | str | f64 | bool | _null); - -// Converts an [[error]] into a human-friendly string. -export fn strerror(err: error) const str = { - static let buf: [53]u8 = [0...]; - match (err) { - case let err: invalid => - return fmt::bsprintf(buf, - "{}:{}: Invalid JSON token encountered", err.0, err.1); - case let err: io::error => - return io::strerror(err); - }; -}; diff --git a/encoding/json/value.ha b/encoding/json/value.ha @@ -1,193 +0,0 @@ -// License: MPL-2.0 -// (c) 2022 Drew DeVault <sir@cmpwn.com> -use hash::fnv; -use strings; - -// TODO: Resize table as appropriate -export def OBJECT_BUCKETS: size = 32; - -export type object = struct { - buckets: [OBJECT_BUCKETS][](str, value), -}; - -// A JSON value. -export type value = (f64 | str | bool | _null | []value | object); - -// Initializes a new (empty) JSON object. Call [[finish]] to free associated -// resources when you're done using it. -export fn newobject() object = { - return object { ... }; -}; - -// Gets a value from a JSON object. The return value is borrowed from the -// object. -export fn get(obj: *object, key: str) (*value | void) = { - const hash = fnv::string(key); - const bucket = &obj.buckets[hash % len(obj.buckets)]; - for (let i = 0z; i < len(bucket); i += 1) { - if (bucket[i].0 == key) { - return &bucket[i].1; - }; - }; -}; - -// Sets a value in a JSON object. The key and value will be duplicated. -export fn set(obj: *object, key: const str, val: const value) void = { - const hash = fnv::string(key); - const bucket = &obj.buckets[hash % len(obj.buckets)]; - for (let i = 0z; i < len(bucket); i += 1) { - if (bucket[i].0 == key) { - finish(bucket[i].1); - bucket[i].1 = dup(val); - return; - }; - }; - append(bucket, (strings::dup(key), dup(val))); -}; - -// Deletes values from a JSON object, if they are present. -export fn del(obj: *object, keys: const str...) void = { - for (let i = 0z; i < len(keys); i += 1) { - const key = keys[i]; - const hash = fnv::string(key); - const bucket = &obj.buckets[hash % len(obj.buckets)]; - for (let i = 0z; i < len(bucket); i += 1) { - if (bucket[i].0 == key) { - free(bucket[i].0); - finish(bucket[i].1); - delete(bucket[i]); - break; - }; - }; - }; -}; - -// Clears all values from a JSON object, leaving it empty. -export fn reset(obj: *object) void = { - let it = iter(obj); - for (true) match (next(&it)) { - case void => - break; - case let v: (const str, const *value) => - del(obj, v.0); - }; -}; - -export type iterator = struct { - obj: *object, - i: size, - j: size, -}; - -// Creates an iterator that enumerates over the key/value pairs in an -// [[object]]. -export fn iter(obj: *object) iterator = { - return iterator { obj = obj, ... }; -}; - -// Returns the next key/value pair from this iterator, or void if none remain. -export fn next(iter: *iterator) ((const str, const *value) | void) = { - for (iter.i < len(iter.obj.buckets); iter.i += 1) { - const bucket = &iter.obj.buckets[iter.i]; - for (iter.j < len(bucket)) { - const key = bucket[iter.j].0; - const val = &bucket[iter.j].1; - iter.j += 1; - return (key, val); - }; - iter.j = 0; - }; -}; - -// Duplicates a JSON value. The caller must pass the return value to [[finish]] -// to free associated resources when they're done using it. -export fn dup(val: value) value = { - match (val) { - case let s: str => - return strings::dup(s); - case let v: []value => - let new: []value = alloc([], len(v)); - for (let i = 0z; i < len(v); i += 1) { - append(new, dup(v[i])); - }; - return new; - case let o: object => - let new = newobject(); - const i = iter(&o); - for (true) { - const pair = match (next(&i)) { - case void => - break; - case let pair: (const str, const *value) => - yield pair; - }; - set(&new, pair.0, *pair.1); - }; - return new; - case => - return val; - }; -}; - -// Checks two JSON values for equality. -export fn equal(a: value, b: value) bool = { - match (a) { - case _null => - return b is _null; - case let a: bool => - return b is bool && a == b as bool; - case let a: f64 => - return b is f64 && a == b as f64; - case let a: str => - return b is str && a == b as str; - case let a: []value => - if (!(b is []value)) return false; - const b = b as []value; - if (len(a) != len(b)) return false; - for (let i = 0z; i < len(a); i += 1) { - if (!equal(a[i], b[i])) { - return false; - }; - }; - return true; - case let a: object => - if (!(b is object)) return false; - let a = iter(&a), b = iter(&(b as object)); - for (true) match (next(&a)) { - case let a: (const str, const *value) => - match (next(&b)) { - case let b: (const str, const *value) => - if (a.0 != b.0 || !equal(*a.1, *b.1)) { - return false; - }; - }; - case void => - return next(&b) is void; - }; - return true; - }; -}; - -// Frees state associated with a JSON value. -export fn finish(val: value) void = { - match (val) { - case let s: str => - free(s); - case let v: []value => - for (let i = 0z; i < len(v); i += 1) { - finish(v[i]); - }; - free(v); - case let o: object => - for (let i = 0z; i < len(o.buckets); i += 1) { - const bucket = &o.buckets[i]; - for (let j = 0z; j < len(bucket); j += 1) { - free(bucket[j].0); - finish(bucket[j].1); - }; - // TODO: https://todo.sr.ht/~sircmpwn/hare/690 - //free(bucket); - }; - case => void; - }; -}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -545,30 +545,6 @@ encoding_hex() { gen_ssa encoding::hex ascii bytes fmt io strconv strio strings } -encoding_json() { - if [ $testing -eq 0 ] - then - gen_srcs encoding::json \ - types.ha \ - lex.ha \ - load.ha \ - dump.ha \ - value.ha - else - gen_srcs encoding::json \ - types.ha \ - lex.ha \ - load.ha \ - dump.ha \ - value.ha \ - +test/lexer.ha \ - +test/load.ha \ - +test/value.ha - fi - gen_ssa encoding::json ascii bufio io strio os encoding::utf8 strings \ - strconv hash::fnv fmt types -} - encoding_pem() { if [ $testing -eq 0 ] then @@ -1478,7 +1454,6 @@ dirs encoding::base64 encoding::base32 encoding::hex -encoding::json encoding::pem encoding::utf8 endian diff --git a/stdlib.mk b/stdlib.mk @@ -320,12 +320,6 @@ stdlib_deps_any += $(stdlib_encoding_hex_any) stdlib_encoding_hex_linux = $(stdlib_encoding_hex_any) stdlib_encoding_hex_freebsd = $(stdlib_encoding_hex_any) -# gen_lib encoding::json (any) -stdlib_encoding_json_any = $(HARECACHE)/encoding/json/encoding_json-any.o -stdlib_deps_any += $(stdlib_encoding_json_any) -stdlib_encoding_json_linux = $(stdlib_encoding_json_any) -stdlib_encoding_json_freebsd = $(stdlib_encoding_json_any) - # gen_lib encoding::pem (any) stdlib_encoding_pem_any = $(HARECACHE)/encoding/pem/encoding_pem-any.o stdlib_deps_any += $(stdlib_encoding_pem_any) @@ -1120,20 +1114,6 @@ $(HARECACHE)/encoding/hex/encoding_hex-any.ssa: $(stdlib_encoding_hex_any_srcs) @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::hex \ -t$(HARECACHE)/encoding/hex/encoding_hex.td $(stdlib_encoding_hex_any_srcs) -# encoding::json (+any) -stdlib_encoding_json_any_srcs = \ - $(STDLIB)/encoding/json/types.ha \ - $(STDLIB)/encoding/json/lex.ha \ - $(STDLIB)/encoding/json/load.ha \ - $(STDLIB)/encoding/json/dump.ha \ - $(STDLIB)/encoding/json/value.ha - -$(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM)) $(stdlib_fmt_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) - @printf 'HAREC \t$@\n' - @mkdir -p $(HARECACHE)/encoding/json - @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::json \ - -t$(HARECACHE)/encoding/json/encoding_json.td $(stdlib_encoding_json_any_srcs) - # encoding::pem (+any) stdlib_encoding_pem_any_srcs = \ $(STDLIB)/encoding/pem/pem.ha @@ -2520,12 +2500,6 @@ testlib_deps_any += $(testlib_encoding_hex_any) testlib_encoding_hex_linux = $(testlib_encoding_hex_any) testlib_encoding_hex_freebsd = $(testlib_encoding_hex_any) -# gen_lib encoding::json (any) -testlib_encoding_json_any = $(TESTCACHE)/encoding/json/encoding_json-any.o -testlib_deps_any += $(testlib_encoding_json_any) -testlib_encoding_json_linux = $(testlib_encoding_json_any) -testlib_encoding_json_freebsd = $(testlib_encoding_json_any) - # gen_lib encoding::pem (any) testlib_encoding_pem_any = $(TESTCACHE)/encoding/pem/encoding_pem-any.o testlib_deps_any += $(testlib_encoding_pem_any) @@ -3346,23 +3320,6 @@ $(TESTCACHE)/encoding/hex/encoding_hex-any.ssa: $(testlib_encoding_hex_any_srcs) @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::hex \ -t$(TESTCACHE)/encoding/hex/encoding_hex.td $(testlib_encoding_hex_any_srcs) -# encoding::json (+any) -testlib_encoding_json_any_srcs = \ - $(STDLIB)/encoding/json/types.ha \ - $(STDLIB)/encoding/json/lex.ha \ - $(STDLIB)/encoding/json/load.ha \ - $(STDLIB)/encoding/json/dump.ha \ - $(STDLIB)/encoding/json/value.ha \ - $(STDLIB)/encoding/json/+test/lexer.ha \ - $(STDLIB)/encoding/json/+test/load.ha \ - $(STDLIB)/encoding/json/+test/value.ha - -$(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_types_$(PLATFORM)) - @printf 'HAREC \t$@\n' - @mkdir -p $(TESTCACHE)/encoding/json - @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::json \ - -t$(TESTCACHE)/encoding/json/encoding_json.td $(testlib_encoding_json_any_srcs) - # encoding::pem (+any) testlib_encoding_pem_any_srcs = \ $(STDLIB)/encoding/pem/pem.ha \