commit 9f175d0bc9a389f3ae358e256a248d5858d2bb4d
parent b00d4a6f83424b1c5558a37e9c5a6bfd3801701c
Author: Drew DeVault <sir@cmpwn.com>
Date: Tue, 25 Oct 2022 13:13:19 +0200
encoding::json: move to extlib
Its new home is here:
https://sr.ht/~sircmpwn/hare-json/
Diffstat:
11 files changed, 0 insertions(+), 1196 deletions(-)
diff --git a/encoding/json/+test/lexer.ha b/encoding/json/+test/lexer.ha
@@ -1,62 +0,0 @@
-use bufio;
-use strings;
-use io;
-
-@test fn lex() void = {
- const cases: [_](str, []token) = [
- ("true", [true]),
- ("false", [false]),
- ("null", [_null]),
- ("1234", [1234.0]),
- ("12.34", [12.34]),
- ("12.34e5", [12.34e5]),
- ("12.34E5", [12.34e5]),
- ("12.34e+5", [12.34e5]),
- ("12.34e-5", [12.34e-5]),
- ("12e5", [12.0e5]),
- ("-1234", [-1234.0]),
- (`"hello world"`, ["hello world"]),
- (`"\"\\\/\b\f\n\r\t\u0020"`, ["\"\\/\b\f\n\r\t\u0020"]),
- ("[ null, null ]", [arraystart, _null, comma, _null, arrayend]),
- ];
-
- for (let i = 0z; i < len(cases); i += 1) {
- const src = strings::toutf8(cases[i].0);
- const src = bufio::fixed(src, io::mode::READ);
- const lexer = newlexer(&src);
- defer close(&lexer);
-
- for (let j = 0z; j < len(cases[i].1); j += 1) {
- const want = cases[i].1[j];
- const have = lex(&lexer)! as token;
- assert(tokeq(want, have));
- };
-
- assert(lex(&lexer) is io::EOF);
- };
-};
-
-fn tokeq(want: token, have: token) bool = {
- match (want) {
- case _null =>
- return have is _null;
- case comma =>
- return have is comma;
- case colon =>
- return have is colon;
- case arraystart =>
- return have is arraystart;
- case arrayend =>
- return have is arrayend;
- case objstart =>
- return have is objstart;
- case objend =>
- return have is objend;
- case let b: bool =>
- return have as bool == b;
- case let f: f64 =>
- return have as f64 == f;
- case let s: str =>
- return have as str == s;
- };
-};
diff --git a/encoding/json/+test/load.ha b/encoding/json/+test/load.ha
@@ -1,164 +0,0 @@
-use fmt;
-
-fn roundtrip(input: str, expected: value) void = {
- const val = loadstr(input)!;
- defer finish(val);
- assert(equal(val, expected));
- const s = dumpstr(val);
- defer free(s);
- const val = loadstr(input)!;
- defer finish(val);
- assert(equal(val, expected));
-};
-
-fn errassert(input: str, expected_loc: (uint, uint)) void = {
- const loc = loadstr(input) as invalid;
- if (loc.0 != expected_loc.0 || loc.1 != expected_loc.1) {
- fmt::errorfln("=== JSON:\n{}", input)!;
- fmt::errorfln("=== expected error location:\n({}, {})",
- expected_loc.0, expected_loc.1)!;
- fmt::errorfln("=== actual error location:\n({}, {})",
- loc.0, loc.1)!;
- abort();
- };
-};
-
-@test fn load() void = {
- let obj = newobject();
- defer finish(obj);
- let obj2 = newobject();
- defer finish(obj2);
-
- roundtrip(`1234`, 1234.0);
- roundtrip(`[]`, []);
- roundtrip(`[1, 2, 3, null]`, [1.0, 2.0, 3.0, _null]);
- roundtrip(`{}`, obj);
- set(&obj, "hello", "world");
- set(&obj, "answer", 42.0);
- roundtrip(`{ "hello": "world", "answer": 42 }`, obj);
- reset(&obj);
- roundtrip(`[[] ]`, [[]]);
- roundtrip(`[""]`, [""]);
- roundtrip(`["a"]`, ["a"]);
- roundtrip(`[false]`, [false]);
- roundtrip(`[null, 1, "1", {}]`, [_null, 1.0, "1", obj]);
- roundtrip(`[null]`, [_null]);
- roundtrip("[1\n]", [1.0]);
- roundtrip(`[1,null,null,null,2]`, [1.0, _null, _null, _null, 2.0]);
- set(&obj, "", 0.0);
- roundtrip(`{"":0}`, obj);
- reset(&obj);
- set(&obj, "foo\0bar", 42.0);
- roundtrip(`{"foo\u0000bar": 42}`, obj);
- reset(&obj);
- set(&obj, "min", -1.0e+28);
- set(&obj, "max", +1.0e+28);
- roundtrip(`{"min": -1.0e+28, "max": 1.0e+28}`, obj);
- reset(&obj);
- set(&obj, "id", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
- set(&obj2, "id", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
- set(&obj, "x", [obj2]);
- roundtrip(`{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}`, obj);
- reset(&obj);
- reset(&obj2);
- set(&obj, "a", []);
- roundtrip(`{"a":[]}`, obj);
- roundtrip("{\n" `"a": []` "\n}", obj);
- reset(&obj);
- roundtrip(`"\u0060\u012a\u12AB"`, "\u0060\u012a\u12AB");
- roundtrip(`"\"\\\/\b\f\n\r\t"`, "\"\\/\b\f\n\r\t");
- roundtrip(`"\\u0000"`, `\u0000`);
- roundtrip(`"\""`, `"`);
- roundtrip(`"a/*b*/c/*d//e"`, "a/*b*/c/*d//e");
- roundtrip(`"\\a"`, `\a`);
- roundtrip(`"\\n"`, `\n`);
- roundtrip(`"\u0012"`, "\u0012");
- roundtrip(`[ "asd"]`, ["asd"]);
- roundtrip(`"new\u000Aline"`, "new\nline");
- roundtrip(`"\u0000"`, "\0");
- roundtrip(`"\u002c"`, "\u002c");
- roundtrip(`"asd "`, "asd ");
- roundtrip(`" "`, " ");
- roundtrip(`"\u0821"`, "\u0821");
- roundtrip(`"\u0123"`, "\u0123");
- roundtrip(`"\u0061\u30af\u30EA\u30b9"`, "\u0061\u30af\u30EA\u30b9");
- roundtrip(`"\uA66D"`, "\uA66D");
- roundtrip(`"\u005C"`, `\`);
- roundtrip(`"\u0022"`, `"`);
- roundtrip(`""`, "");
- roundtrip(` [] `, []);
-
- errassert(`[1,,]`, (1, 4));
- errassert(`[1 true]`, (1, 7));
- errassert(`["": 1]`, (1, 4));
- errassert(`[,1]`, (1, 2));
- errassert(`[1,,2]`, (1, 4));
- errassert(`["",]`, (1, 5));
- errassert(`["x"`, (1, 5));
- errassert(`[x`, (1, 2));
- errassert(`[3[4]]`, (1, 3));
- errassert(`[1:2]`, (1, 3));
- errassert(`[,]`, (1, 2));
- errassert(`[-]`, (1, 3));
- errassert(`[ , ""]`, (1, 5));
- errassert("[\"a\",\n4\n,1,", (3, 4));
- errassert(`[1,]`, (1, 4));
- errassert("[\"\va\"\\f", (1, 3));
- errassert(`[*]`, (1, 2));
- errassert(`[1,`, (1, 4));
- errassert("[1,\n1\n,1", (3, 3));
- errassert(`[{}`, (1, 4));
- errassert(`["x", truth]`, (1, 11));
- errassert(`{[: "x"}`, (1, 2));
- errassert(`{"x", null}`, (1, 5));
- errassert(`{"x"::"b"}`, (1, 6));
- errassert(`{"a":"a" 123}`, (1, 12));
- errassert(`{"a" b}`, (1, 6));
- errassert(`{:"b"}`, (1, 2));
- errassert(`{"a" "b"}`, (1, 8));
- errassert(`{"a":`, (1, 6));
- errassert(`{"a"`, (1, 5));
- errassert(`{1:1}`, (1, 2));
- errassert(`{9999E9999:1}`, (1, 10));
- errassert(`{null:null,null:null}`, (1, 5));
- errassert(`{"id":0,,,,,}`, (1, 9));
- errassert(`{'a':0}`, (1, 2));
- errassert(`{"id":0,}`, (1, 9));
- errassert(`{"a":"b",,"c":"d"}`, (1, 10));
- errassert(`{true: false}`, (1, 5));
- errassert(`{"a":"a`, (1, 8));
- errassert(`{ "foo" : "bar", "a" }`, (1, 22));
- errassert(` `, (1, 2));
- errassert(`<null>`, (1, 1));
- errassert(`["asd]`, (1, 7));
- errassert(`True`, (1, 4));
- errassert(`]`, (1, 1));
- errassert(`}`, (1, 1));
- errassert(`{"x": true,`, (1, 12));
- errassert(`[`, (1, 2));
- errassert(`{`, (1, 2));
- errassert(``, (1, 1));
- errassert("\0", (1, 1));
- errassert(`{"":`, (1, 5));
- errassert(`['`, (1, 2));
- errassert(`["`, (1, 3));
- errassert(`[,`, (1, 2));
- errassert(`[{`, (1, 3));
- errassert(`{[`, (1, 2));
- errassert(`{]`, (1, 2));
- errassert(`[}`, (1, 2));
- errassert(`{'`, (1, 2));
- errassert(`{"`, (1, 3));
- errassert(`{,`, (1, 2));
- errassert(`["\{["\{["\{["\{`, (1, 4));
- errassert(`*`, (1, 1));
- errassert(`\u000A""`, (1, 1));
- errassert("\f", (1, 1));
-};
-
-@test fn nestlimit() void = {
- const s = `{ "foo": [[[{"bar": ["baz"]}]]] }`;
- const val = loadstr(s, 6: nestlimit)!;
- finish(val);
- assert(loadstr(s, 5: nestlimit) is limitreached);
-};
diff --git a/encoding/json/+test/value.ha b/encoding/json/+test/value.ha
@@ -1,35 +0,0 @@
-// License: MPL-2.0
-// (c) 2022 Drew DeVault <sir@cmpwn.com>
-
-@test fn object() void = {
- let obj = newobject();
- defer finish(obj);
-
- set(&obj, "hello", "world");
- set(&obj, "foo", "bar");
- set(&obj, "the answer", 42.0);
-
- // XXX: Match overhaul?
- assert(*(get(&obj, "hello") as *value) as str == "world");
- assert(*(get(&obj, "foo") as *value) as str == "bar");
- assert(*(get(&obj, "the answer") as *value) as f64 == 42.0);
- assert(get(&obj, "nonexistent") is void);
-
- del(&obj, "hello");
- assert(get(&obj, "hello") is void);
-};
-
-@test fn iterator() void = {
- let obj = newobject();
- defer finish(obj);
-
- set(&obj, "hello", "world");
- set(&obj, "foo", "bar");
- set(&obj, "the answer", 42.0);
-
- let it = iter(&obj);
- assert(next(&it) is (const str, const *value));
- assert(next(&it) is (const str, const *value));
- assert(next(&it) is (const str, const *value));
- assert(next(&it) is void);
-};
diff --git a/encoding/json/README b/encoding/json/README
@@ -1,15 +0,0 @@
-This module provides an implementation of the JavaScript Object Notation (JSON)
-format, as defined by RFC 8259. Note that several other, incompatible
-specifications exist. This implementation does not include any extensions; only
-features which are strictly required by the spec are implemented.
-
-A lexer for JSON values is provided, which may be initialized with [[lex]] and
-provides tokens via [[next]], and which uses a relatively small amount of memory
-and provides relatively few gurantees regarding the compliance of the input with
-the JSON grammar.
-
-Additionally, the [[value]] type is provided to store any value JSON value, as
-well as helpers like [[newobject]], [[get]], and [[set]]. One can load a JSON
-value from an input stream into a heap-allocated [[value]] via [[load]], which
-enforces all of JSON's grammar constraints and returns an object which must be
-freed with [[finish]].
diff --git a/encoding/json/dump.ha b/encoding/json/dump.ha
@@ -1,84 +0,0 @@
-// License: MPL-2.0
-// (c) 2022 Sebastian <sebastian@sebsite.pw>
-use fmt;
-use io;
-use strings;
-use strio;
-
-// Dumps a [[value]] into an [[io::handle]] as a string without any additional
-// formatting.
-export fn dump(out: io::handle, val: value) (size | io::error) = {
- let z = 0z;
- match (val) {
- case let v: (f64 | bool) =>
- z += fmt::fprint(out, v)?;
- case let s: str =>
- z += fmt::fprint(out, `"`)?;
- let it = strings::iter(s);
- for (true) match (strings::next(&it)) {
- case void =>
- break;
- case let r: rune =>
- switch (r) {
- case '\b' =>
- z += fmt::fprint(out, `\b`)?;
- case '\f' =>
- z += fmt::fprint(out, `\f`)?;
- case '\n' =>
- z += fmt::fprint(out, `\n`)?;
- case '\r' =>
- z += fmt::fprint(out, `\r`)?;
- case '\t' =>
- z += fmt::fprint(out, `\t`)?;
- case '\"' =>
- z += fmt::fprint(out, `\"`)?;
- case '\\' =>
- z += fmt::fprint(out, `\\`)?;
- case =>
- if (iscntrl(r)) {
- z += fmt::fprintf(out, `\u{:04x}`,
- r: u32)?;
- } else {
- z += fmt::fprint(out, r)?;
- };
- };
- };
- z += fmt::fprint(out, `"`)?;
- case _null =>
- z += fmt::fprint(out, "null")?;
- case let a: []value =>
- z += fmt::fprint(out, "[")?;
- for (let i = 0z; i < len(a); i += 1) {
- z += dump(out, a[i])?;
- if (i < len(a) - 1) {
- z += fmt::fprint(out, ",")?;
- };
- };
- z += fmt::fprint(out, "]")?;
- case let o: object =>
- z += fmt::fprint(out, "{")?;
- let comma = false;
- let it = iter(&o);
- for (true) match (next(&it)) {
- case void => break;
- case let pair: (const str, const *value) =>
- if (comma) {
- z += fmt::fprint(out, ",")?;
- };
- comma = true;
- z += dump(out, pair.0)?;
- z += fmt::fprint(out, ":")?;
- z += dump(out, *pair.1)?;
- };
- z += fmt::fprint(out, "}")?;
- };
- return z;
-};
-
-// Dumps a [[value]] into a string without any additional formatting. The caller
-// must free the return value.
-export fn dumpstr(val: value) str = {
- let s = strio::dynamic();
- dump(&s, val)!;
- return strio::string(&s);
-};
diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha
@@ -1,383 +0,0 @@
-// License: MPL-2.0
-// (c) 2022 Drew DeVault <sir@cmpwn.com>
-use ascii;
-use bufio;
-use encoding::utf8;
-use io;
-use os;
-use strconv;
-use strings;
-use strio;
-
-export type lexer = struct {
- src: io::handle,
- buffer: []u8,
- strbuf: strio::stream,
- un: (token | void),
- rb: (rune | void),
- loc: (uint, uint),
- prevloc: (uint, uint),
- nextloc: (uint, uint),
- prevrloc: (uint, uint),
-};
-
-// Creates a new JSON lexer. The caller may obtain tokens with [[lex]] and
-// should pass the result to [[close]] when they're done with it.
-export fn newlexer(src: io::handle) lexer = {
- let buf: []u8 = alloc([0...], os::BUFSIZ);
- return lexer {
- src = src,
- buffer = buf,
- strbuf = strio::dynamic(),
- un = void,
- rb = void,
- loc = (1, 0),
- ...
- };
-};
-
-// Frees state associated with a JSON lexer.
-export fn close(lex: *lexer) void = {
- free(lex.buffer);
-};
-
-// Returns the next token from a JSON lexer. The return value is borrowed from
-// the lexer and will be overwritten on subsequent calls.
-export fn lex(lex: *lexer) (token | io::EOF | error) = {
- match (lex.un) {
- case void =>
- lex.prevloc = lex.loc;
- case let tok: token =>
- lex.un = void;
- lex.prevloc = lex.loc;
- lex.loc = lex.nextloc;
- return tok;
- };
-
- const rn = match (nextrunews(lex)?) {
- case io::EOF =>
- return io::EOF;
- case let rn: rune =>
- yield rn;
- };
-
- switch (rn) {
- case '[' =>
- return arraystart;
- case ']' =>
- return arrayend;
- case '{' =>
- return objstart;
- case '}' =>
- return objend;
- case ',' =>
- return comma;
- case ':' =>
- return colon;
- case '"' =>
- return scan_str(lex)?;
- case =>
- yield;
- };
-
- if (ascii::isdigit(rn) || rn == '-') {
- unget(lex, rn);
- return scan_number(lex)?;
- };
-
- if (!ascii::isalpha(rn)) {
- return lex.loc: invalid;
- };
-
- unget(lex, rn);
- const word = scan_word(lex)?;
- switch (word) {
- case "true" =>
- return true;
- case "false" =>
- return false;
- case "null" =>
- return _null;
- case =>
- return lex.loc: invalid;
- };
-};
-
-// "Unlexes" a token from the lexer, such that the next call to [[lex]] will
-// return that token again. Only one token can be unlexed at a time, otherwise
-// the program will abort.
-export fn unlex(lex: *lexer, tok: token) void = {
- assert(lex.un is void, "encoding::json::unlex called twice in a row");
- lex.un = tok;
- lex.nextloc = lex.loc;
- lex.loc = lex.prevloc;
-};
-
-// Scans until encountering a non-alphabetical character, returning the
-// resulting word.
-fn scan_word(lex: *lexer) (str | error) = {
- strio::reset(&lex.strbuf);
-
- for (true) {
- const rn = match (nextrune(lex)?) {
- case let rn: rune =>
- yield rn;
- case io::EOF =>
- break;
- };
- if (!ascii::isalpha(rn)) {
- unget(lex, rn);
- break;
- };
- strio::appendrune(&lex.strbuf, rn)!;
- };
-
- return strio::string(&lex.strbuf);
-};
-
-type numstate = enum {
- SIGN,
- START,
- ZERO,
- INTEGER,
- FRACSTART,
- FRACTION,
- EXPSIGN,
- EXPSTART,
- EXPONENT,
-};
-
-fn scan_number(lex: *lexer) (token | error) = {
- strio::reset(&lex.strbuf);
-
- let state = numstate::SIGN;
- for (true) {
- const rn = match (nextrune(lex)?) {
- case let rn: rune =>
- yield rn;
- case io::EOF =>
- break;
- };
-
- switch (state) {
- case numstate::SIGN =>
- state = numstate::START;
- if (rn != '-') {
- unget(lex, rn);
- continue;
- };
- case numstate::START =>
- switch (rn) {
- case '0' =>
- state = numstate::ZERO;
- case =>
- if (!ascii::isdigit(rn)) {
- return lex.loc: invalid;
- };
- state = numstate::INTEGER;
- };
- case numstate::ZERO =>
- switch (rn) {
- case '.' =>
- state = numstate::FRACSTART;
- case 'e', 'E' =>
- state = numstate::EXPSIGN;
- case =>
- if (ascii::isdigit(rn)) {
- return lex.loc: invalid;
- };
- unget(lex, rn);
- break;
- };
- case numstate::INTEGER =>
- switch (rn) {
- case '.' =>
- state = numstate::FRACSTART;
- case 'e', 'E' =>
- state = numstate::EXPSIGN;
- case =>
- if (!ascii::isdigit(rn)) {
- unget(lex, rn);
- break;
- };
- };
- case numstate::FRACSTART =>
- if (!ascii::isdigit(rn)) {
- return lex.loc: invalid;
- };
- state = numstate::FRACTION;
- case numstate::FRACTION =>
- switch (rn) {
- case 'e', 'E' =>
- state = numstate::EXPSIGN;
- case =>
- if (!ascii::isdigit(rn)) {
- unget(lex, rn);
- break;
- };
- };
- case numstate::EXPSIGN =>
- state = numstate::EXPSTART;
- if (rn != '+' && rn != '-') {
- unget(lex, rn);
- continue;
- };
- case numstate::EXPSTART =>
- if (!ascii::isdigit(rn)) {
- return lex.loc: invalid;
- };
- state = numstate::EXPONENT;
- case numstate::EXPONENT =>
- if (!ascii::isdigit(rn)) {
- unget(lex, rn);
- break;
- };
- };
-
- strio::appendrune(&lex.strbuf, rn)!;
- };
-
- match (strconv::stof64(strio::string(&lex.strbuf))) {
- case let f: f64 =>
- return f;
- case =>
- return lex.loc: invalid;
- };
-};
-
-fn scan_str(lex: *lexer) (token | error) = {
- strio::reset(&lex.strbuf);
-
- for (true) {
- const rn = match (nextrune(lex)?) {
- case let rn: rune =>
- yield rn;
- case io::EOF =>
- lex.loc.1 += 1;
- return lex.loc: invalid;
- };
-
- switch (rn) {
- case '"' =>
- break;
- case '\\' =>
- const rn = scan_escape(lex)?;
- strio::appendrune(&lex.strbuf, rn)!;
- case =>
- if (iscntrl(rn)) {
- return lex.loc: invalid;
- };
- strio::appendrune(&lex.strbuf, rn)!;
- };
- };
-
- return strio::string(&lex.strbuf);
-};
-
-fn scan_escape(lex: *lexer) (rune | error) = {
- const rn = match (nextrune(lex)?) {
- case let rn: rune =>
- yield rn;
- case io::EOF =>
- return lex.loc: invalid;
- };
-
- switch (rn) {
- case '\"' =>
- return '\"';
- case '\\' =>
- return '\\';
- case '/' =>
- return '/';
- case 'b' =>
- return '\b';
- case 'f' =>
- return '\f';
- case 'n' =>
- return '\n';
- case 'r' =>
- return '\r';
- case 't' =>
- return '\t';
- case 'u' =>
- let buf: [4]u8 = [0...];
- match (io::readall(lex.src, buf)?) {
- case io::EOF =>
- return lex.loc: invalid;
- case size =>
- yield;
- };
- const s = match (strings::fromutf8(buf)) {
- case let s: str =>
- yield s;
- case =>
- return lex.loc: invalid;
- };
- match (strconv::stou32b(s, strconv::base::HEX)) {
- case let u: u32 =>
- lex.loc.1 += 4;
- return u: rune;
- case =>
- return lex.loc: invalid;
- };
- case =>
- return lex.loc: invalid;
- };
-};
-
-// Gets the next rune from the lexer.
-fn nextrune(lex: *lexer) (rune | io::EOF | error) = {
- if (lex.rb is rune) {
- lex.prevrloc = lex.loc;
- const r = lex.rb as rune;
- lex.rb = void;
- if (r == '\n') {
- lex.loc = (lex.loc.0 + 1, 0);
- } else {
- lex.loc.1 += 1;
- };
- return r;
- };
- match (bufio::scanrune(lex.src)) {
- case let err: io::error =>
- return err;
- case utf8::invalid =>
- return lex.loc: invalid;
- case io::EOF =>
- return io::EOF;
- case let rn: rune =>
- lex.prevrloc = lex.loc;
- if (rn == '\n') {
- lex.loc = (lex.loc.0 + 1, 0);
- } else {
- lex.loc.1 += 1;
- };
- return rn;
- };
-};
-
-// Like nextrune but skips whitespace.
-fn nextrunews(lex: *lexer) (rune | io::EOF | error) = {
- for (true) {
- match (nextrune(lex)?) {
- case let rn: rune =>
- if (isspace(rn)) {
- continue;
- };
- return rn;
- case io::EOF =>
- return io::EOF;
- };
- };
- abort(); // Unreachable
-};
-
-fn unget(lex: *lexer, r: rune) void = {
- assert(lex.rb is void);
- lex.rb = r;
- lex.loc = lex.prevrloc;
-};
-
-fn iscntrl(r: rune) bool = r: u32 < 0x20;
-
-fn isspace(r: rune) bool = ascii::isspace(r) && r != '\f';
diff --git a/encoding/json/load.ha b/encoding/json/load.ha
@@ -1,142 +0,0 @@
-use bufio;
-use io;
-use strings;
-use types;
-
-// Options for [[load]].
-export type load_option = nestlimit;
-
-// The maximum number of nested objects or arrays that can be entered before
-// erroring out.
-export type nestlimit = uint;
-
-// Parses a JSON value from the given [[io::handle]], returning the value or an
-// error. The return value is allocated on the heap; use [[finish]] to free it
-// up when you're done using it.
-//
-// By default, this function assumes non-antagonistic inputs, and does not limit
-// recursion depth or memory usage. You may want to set a custom [[nestlimit]],
-// or incorporate an [[io::limitreader]] or similar. Alternatively, you can use
-// the JSON lexer ([[lex]]) directly if dealing with potentially malicious
-// inputs.
-export fn load(src: io::handle, opts: load_option...) (value | error) = {
- let limit = types::UINT_MAX;
- for (let i = 0z; i < len(opts); i += 1) {
- limit = opts[i]: nestlimit: uint;
- };
- const lex = newlexer(src);
- defer close(&lex);
- return _load(&lex, 0, limit);
-};
-
-// Parses a JSON value from the given string, returning the value or an error.
-// The return value is allocated on the heap; use [[finish]] to free it up when
-// you're done using it.
-//
-// See the documentation for [[load]] for information on dealing with
-// potentially malicious inputs.
-export fn loadstr(input: str, opts: load_option...) (value | error) = {
- let src = bufio::fixed(strings::toutf8(input), io::mode::READ);
- return load(&src, opts...);
-};
-
-fn _load(lexer: *lexer, level: uint, limit: uint) (value | error) = {
- const tok = mustscan(lexer)?;
- match (tok) {
- case _null =>
- return _null;
- case let b: bool =>
- return b;
- case let f: f64 =>
- return f;
- case let s: str =>
- return strings::dup(s);
- case arraystart =>
- if (level == limit) {
- return limitreached;
- };
- return _load_array(lexer, level + 1, limit);
- case objstart =>
- if (level == limit) {
- return limitreached;
- };
- return _load_obj(lexer, level + 1, limit);
- case (arrayend | objend | colon | comma) =>
- return lexer.loc: invalid;
- };
-};
-
-fn _load_array(lexer: *lexer, level: uint, limit: uint) (value | error) = {
- let array: []value = [];
- let tok = mustscan(lexer)?;
- match (tok) {
- case arrayend =>
- return array;
- case =>
- unlex(lexer, tok);
- };
-
- for (true) {
- append(array, _load(lexer, level, limit)?);
-
- tok = mustscan(lexer)?;
- match (tok) {
- case comma => void;
- case arrayend => break;
- case =>
- return lexer.loc: invalid;
- };
- };
- return array;
-};
-
-fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = {
- let obj = newobject();
- let tok = mustscan(lexer)?;
- match (tok) {
- case objend =>
- return obj;
- case =>
- unlex(lexer, tok);
- };
-
- for (true) {
- let tok = mustscan(lexer)?;
- const key = match (tok) {
- case let s: str =>
- yield strings::dup(s);
- case =>
- return lexer.loc: invalid;
- };
- defer free(key);
-
- tok = mustscan(lexer)?;
- if (!(tok is colon)) {
- return lexer.loc: invalid;
- };
-
- const val = _load(lexer, level, limit)?;
- defer finish(val);
- set(&obj, key, val);
-
- tok = mustscan(lexer)?;
- match (tok) {
- case comma => void;
- case objend => break;
- case =>
- return lexer.loc: invalid;
- };
- };
-
- return obj;
-};
-
-fn mustscan(lexer: *lexer) (token | error) = {
- match (lex(lexer)?) {
- case io::EOF =>
- lexer.loc.1 += 1;
- return lexer.loc: invalid;
- case let tok: token =>
- return tok;
- };
-};
diff --git a/encoding/json/types.ha b/encoding/json/types.ha
@@ -1,50 +0,0 @@
-// License: MPL-2.0
-// (c) 2022 Drew DeVault <sir@cmpwn.com>
-use fmt;
-use io;
-
-// An invalid JSON token was encountered at this location (line, column).
-export type invalid = !(uint, uint);
-
-// The maximum nesting limit was reached.
-export type limitreached = !void;
-
-// A tagged union of all possible errors returned from this module.
-export type error = !(invalid | limitreached | io::error);
-
-// The JSON null value.
-export type _null = void;
-
-// The '[' token, signaling the start of a JSON array.
-export type arraystart = void;
-
-// The ']' token, signaling the end of a JSON array.
-export type arrayend = void;
-
-// The '{' token, signaling the start of a JSON object.
-export type objstart = void;
-
-// The '}' token, signaling the end of a JSON object.
-export type objend = void;
-
-// The ':' token.
-export type colon = void;
-
-// The ',' token.
-export type comma = void;
-
-// All tokens which can be returned from the JSON tokenizer.
-export type token = (arraystart | arrayend | objstart |
- objend | colon | comma | str | f64 | bool | _null);
-
-// Converts an [[error]] into a human-friendly string.
-export fn strerror(err: error) const str = {
- static let buf: [53]u8 = [0...];
- match (err) {
- case let err: invalid =>
- return fmt::bsprintf(buf,
- "{}:{}: Invalid JSON token encountered", err.0, err.1);
- case let err: io::error =>
- return io::strerror(err);
- };
-};
diff --git a/encoding/json/value.ha b/encoding/json/value.ha
@@ -1,193 +0,0 @@
-// License: MPL-2.0
-// (c) 2022 Drew DeVault <sir@cmpwn.com>
-use hash::fnv;
-use strings;
-
-// TODO: Resize table as appropriate
-export def OBJECT_BUCKETS: size = 32;
-
-export type object = struct {
- buckets: [OBJECT_BUCKETS][](str, value),
-};
-
-// A JSON value.
-export type value = (f64 | str | bool | _null | []value | object);
-
-// Initializes a new (empty) JSON object. Call [[finish]] to free associated
-// resources when you're done using it.
-export fn newobject() object = {
- return object { ... };
-};
-
-// Gets a value from a JSON object. The return value is borrowed from the
-// object.
-export fn get(obj: *object, key: str) (*value | void) = {
- const hash = fnv::string(key);
- const bucket = &obj.buckets[hash % len(obj.buckets)];
- for (let i = 0z; i < len(bucket); i += 1) {
- if (bucket[i].0 == key) {
- return &bucket[i].1;
- };
- };
-};
-
-// Sets a value in a JSON object. The key and value will be duplicated.
-export fn set(obj: *object, key: const str, val: const value) void = {
- const hash = fnv::string(key);
- const bucket = &obj.buckets[hash % len(obj.buckets)];
- for (let i = 0z; i < len(bucket); i += 1) {
- if (bucket[i].0 == key) {
- finish(bucket[i].1);
- bucket[i].1 = dup(val);
- return;
- };
- };
- append(bucket, (strings::dup(key), dup(val)));
-};
-
-// Deletes values from a JSON object, if they are present.
-export fn del(obj: *object, keys: const str...) void = {
- for (let i = 0z; i < len(keys); i += 1) {
- const key = keys[i];
- const hash = fnv::string(key);
- const bucket = &obj.buckets[hash % len(obj.buckets)];
- for (let i = 0z; i < len(bucket); i += 1) {
- if (bucket[i].0 == key) {
- free(bucket[i].0);
- finish(bucket[i].1);
- delete(bucket[i]);
- break;
- };
- };
- };
-};
-
-// Clears all values from a JSON object, leaving it empty.
-export fn reset(obj: *object) void = {
- let it = iter(obj);
- for (true) match (next(&it)) {
- case void =>
- break;
- case let v: (const str, const *value) =>
- del(obj, v.0);
- };
-};
-
-export type iterator = struct {
- obj: *object,
- i: size,
- j: size,
-};
-
-// Creates an iterator that enumerates over the key/value pairs in an
-// [[object]].
-export fn iter(obj: *object) iterator = {
- return iterator { obj = obj, ... };
-};
-
-// Returns the next key/value pair from this iterator, or void if none remain.
-export fn next(iter: *iterator) ((const str, const *value) | void) = {
- for (iter.i < len(iter.obj.buckets); iter.i += 1) {
- const bucket = &iter.obj.buckets[iter.i];
- for (iter.j < len(bucket)) {
- const key = bucket[iter.j].0;
- const val = &bucket[iter.j].1;
- iter.j += 1;
- return (key, val);
- };
- iter.j = 0;
- };
-};
-
-// Duplicates a JSON value. The caller must pass the return value to [[finish]]
-// to free associated resources when they're done using it.
-export fn dup(val: value) value = {
- match (val) {
- case let s: str =>
- return strings::dup(s);
- case let v: []value =>
- let new: []value = alloc([], len(v));
- for (let i = 0z; i < len(v); i += 1) {
- append(new, dup(v[i]));
- };
- return new;
- case let o: object =>
- let new = newobject();
- const i = iter(&o);
- for (true) {
- const pair = match (next(&i)) {
- case void =>
- break;
- case let pair: (const str, const *value) =>
- yield pair;
- };
- set(&new, pair.0, *pair.1);
- };
- return new;
- case =>
- return val;
- };
-};
-
-// Checks two JSON values for equality.
-export fn equal(a: value, b: value) bool = {
- match (a) {
- case _null =>
- return b is _null;
- case let a: bool =>
- return b is bool && a == b as bool;
- case let a: f64 =>
- return b is f64 && a == b as f64;
- case let a: str =>
- return b is str && a == b as str;
- case let a: []value =>
- if (!(b is []value)) return false;
- const b = b as []value;
- if (len(a) != len(b)) return false;
- for (let i = 0z; i < len(a); i += 1) {
- if (!equal(a[i], b[i])) {
- return false;
- };
- };
- return true;
- case let a: object =>
- if (!(b is object)) return false;
- let a = iter(&a), b = iter(&(b as object));
- for (true) match (next(&a)) {
- case let a: (const str, const *value) =>
- match (next(&b)) {
- case let b: (const str, const *value) =>
- if (a.0 != b.0 || !equal(*a.1, *b.1)) {
- return false;
- };
- };
- case void =>
- return next(&b) is void;
- };
- return true;
- };
-};
-
-// Frees state associated with a JSON value.
-export fn finish(val: value) void = {
- match (val) {
- case let s: str =>
- free(s);
- case let v: []value =>
- for (let i = 0z; i < len(v); i += 1) {
- finish(v[i]);
- };
- free(v);
- case let o: object =>
- for (let i = 0z; i < len(o.buckets); i += 1) {
- const bucket = &o.buckets[i];
- for (let j = 0z; j < len(bucket); j += 1) {
- free(bucket[j].0);
- finish(bucket[j].1);
- };
- // TODO: https://todo.sr.ht/~sircmpwn/hare/690
- //free(bucket);
- };
- case => void;
- };
-};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -545,30 +545,6 @@ encoding_hex() {
gen_ssa encoding::hex ascii bytes fmt io strconv strio strings
}
-encoding_json() {
- if [ $testing -eq 0 ]
- then
- gen_srcs encoding::json \
- types.ha \
- lex.ha \
- load.ha \
- dump.ha \
- value.ha
- else
- gen_srcs encoding::json \
- types.ha \
- lex.ha \
- load.ha \
- dump.ha \
- value.ha \
- +test/lexer.ha \
- +test/load.ha \
- +test/value.ha
- fi
- gen_ssa encoding::json ascii bufio io strio os encoding::utf8 strings \
- strconv hash::fnv fmt types
-}
-
encoding_pem() {
if [ $testing -eq 0 ]
then
@@ -1478,7 +1454,6 @@ dirs
encoding::base64
encoding::base32
encoding::hex
-encoding::json
encoding::pem
encoding::utf8
endian
diff --git a/stdlib.mk b/stdlib.mk
@@ -320,12 +320,6 @@ stdlib_deps_any += $(stdlib_encoding_hex_any)
stdlib_encoding_hex_linux = $(stdlib_encoding_hex_any)
stdlib_encoding_hex_freebsd = $(stdlib_encoding_hex_any)
-# gen_lib encoding::json (any)
-stdlib_encoding_json_any = $(HARECACHE)/encoding/json/encoding_json-any.o
-stdlib_deps_any += $(stdlib_encoding_json_any)
-stdlib_encoding_json_linux = $(stdlib_encoding_json_any)
-stdlib_encoding_json_freebsd = $(stdlib_encoding_json_any)
-
# gen_lib encoding::pem (any)
stdlib_encoding_pem_any = $(HARECACHE)/encoding/pem/encoding_pem-any.o
stdlib_deps_any += $(stdlib_encoding_pem_any)
@@ -1120,20 +1114,6 @@ $(HARECACHE)/encoding/hex/encoding_hex-any.ssa: $(stdlib_encoding_hex_any_srcs)
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::hex \
-t$(HARECACHE)/encoding/hex/encoding_hex.td $(stdlib_encoding_hex_any_srcs)
-# encoding::json (+any)
-stdlib_encoding_json_any_srcs = \
- $(STDLIB)/encoding/json/types.ha \
- $(STDLIB)/encoding/json/lex.ha \
- $(STDLIB)/encoding/json/load.ha \
- $(STDLIB)/encoding/json/dump.ha \
- $(STDLIB)/encoding/json/value.ha
-
-$(HARECACHE)/encoding/json/encoding_json-any.ssa: $(stdlib_encoding_json_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_hash_fnv_$(PLATFORM)) $(stdlib_fmt_$(PLATFORM)) $(stdlib_types_$(PLATFORM))
- @printf 'HAREC \t$@\n'
- @mkdir -p $(HARECACHE)/encoding/json
- @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::json \
- -t$(HARECACHE)/encoding/json/encoding_json.td $(stdlib_encoding_json_any_srcs)
-
# encoding::pem (+any)
stdlib_encoding_pem_any_srcs = \
$(STDLIB)/encoding/pem/pem.ha
@@ -2520,12 +2500,6 @@ testlib_deps_any += $(testlib_encoding_hex_any)
testlib_encoding_hex_linux = $(testlib_encoding_hex_any)
testlib_encoding_hex_freebsd = $(testlib_encoding_hex_any)
-# gen_lib encoding::json (any)
-testlib_encoding_json_any = $(TESTCACHE)/encoding/json/encoding_json-any.o
-testlib_deps_any += $(testlib_encoding_json_any)
-testlib_encoding_json_linux = $(testlib_encoding_json_any)
-testlib_encoding_json_freebsd = $(testlib_encoding_json_any)
-
# gen_lib encoding::pem (any)
testlib_encoding_pem_any = $(TESTCACHE)/encoding/pem/encoding_pem-any.o
testlib_deps_any += $(testlib_encoding_pem_any)
@@ -3346,23 +3320,6 @@ $(TESTCACHE)/encoding/hex/encoding_hex-any.ssa: $(testlib_encoding_hex_any_srcs)
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::hex \
-t$(TESTCACHE)/encoding/hex/encoding_hex.td $(testlib_encoding_hex_any_srcs)
-# encoding::json (+any)
-testlib_encoding_json_any_srcs = \
- $(STDLIB)/encoding/json/types.ha \
- $(STDLIB)/encoding/json/lex.ha \
- $(STDLIB)/encoding/json/load.ha \
- $(STDLIB)/encoding/json/dump.ha \
- $(STDLIB)/encoding/json/value.ha \
- $(STDLIB)/encoding/json/+test/lexer.ha \
- $(STDLIB)/encoding/json/+test/load.ha \
- $(STDLIB)/encoding/json/+test/value.ha
-
-$(TESTCACHE)/encoding/json/encoding_json-any.ssa: $(testlib_encoding_json_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_hash_fnv_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_types_$(PLATFORM))
- @printf 'HAREC \t$@\n'
- @mkdir -p $(TESTCACHE)/encoding/json
- @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::json \
- -t$(TESTCACHE)/encoding/json/encoding_json.td $(testlib_encoding_json_any_srcs)
-
# encoding::pem (+any)
testlib_encoding_pem_any_srcs = \
$(STDLIB)/encoding/pem/pem.ha \