hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 893a7416aab3af31cee4271f5c2a8a9ce44cfd89
parent 0c101ac5dddc96c2a80b7fcc9651cfa9a4a1eb76
Author: Sebastian <sebastian@sebsite.pw>
Date:   Sat,  9 Apr 2022 14:26:39 -0400

format::xml: return line number of syntax error

This also expands the tests to ensure that the line number is accurate.

Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mformat/xml/+test.ha | 28+++++++++++++++++-----------
Mformat/xml/parser.ha | 114+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Mformat/xml/types.ha | 21+++++++++++++--------
Mscripts/gen-stdlib | 2+-
Mstdlib.mk | 4++--
5 files changed, 107 insertions(+), 62 deletions(-)

diff --git a/format/xml/+test.ha b/format/xml/+test.ha @@ -5,6 +5,7 @@ // (c) 2021 Eyal Sawady <ecs@d2evs.net> use io; use bufio; +use fmt; use strings; @test fn basic() void = { @@ -25,7 +26,7 @@ use strings; "foobar": elementend, "\n": text, "root": elementend, - ], false); + ], void); }; @test fn comments() void = { @@ -42,7 +43,7 @@ use strings; "hello": elementend, "\n": text, "root": elementend, - ], false); + ], void); }; @test fn entities() void = { @@ -58,7 +59,7 @@ use strings; "hello": elementend, "\n": text, "root": elementend, - ], false); + ], void); }; @test fn cdata() void = { @@ -71,11 +72,15 @@ use strings; "Hello world &foo <bar>": text, "\n": text, "root": elementend, - ], false); + ], void); }; @test fn errors() void = { xmltest("<?xml version='1.0' encoding='utf-8' ?> +<!-- +comment which spans +multiple lines +--> <root> <hello name='foobar'></world> </root>", [ @@ -83,10 +88,10 @@ use strings; "\n\t": text, "hello": elementstart, ("name", "foobar"): attribute, - ], true); + ], 7); }; -fn xmltest(input: str, expected: []token, err: bool) void = { +fn xmltest(input: str, expected: []token, err: (void | size)) void = { let in = bufio::fixed(strings::toutf8(input), io::mode::READ); let parser = parse(&in) as *parser; for (let i = 0z; i < len(expected); i += 1) { @@ -95,8 +100,8 @@ fn xmltest(input: str, expected: []token, err: bool) void = { yield tok; case void => abort("Expected token, got void"); - case syntaxerr => - abort("Expected token, got syntax error"); + case let err: syntaxerr => + fmt::fatal("{}", strerror(err)); }; match (tok) { case let el: elementstart => @@ -117,9 +122,10 @@ fn xmltest(input: str, expected: []token, err: bool) void = { assert(el == ex); }; }; - if (err) { - assert(scan(parser) is error); - } else { + match (err) { + case void => assert(scan(parser) is void); + case let z: size => + assert(scan(parser) as syntaxerr: size == z); }; }; diff --git a/format/xml/parser.ha b/format/xml/parser.ha @@ -33,6 +33,7 @@ export fn parse(in: io::handle) (*parser | error) = { namebuf = strio::dynamic(), entbuf = strio::dynamic(), textbuf = strio::dynamic(), + line = 1, ... }); if (bufio::isbuffered(in)) { @@ -77,7 +78,7 @@ export fn scan(par: *parser) (token | void | error) = { let rn: rune = match (bufio::scanrune(par.in)?) { case io::EOF => if (par.state == state::ROOT) { - return syntaxerr; + return par.line: syntaxerr; } else { return; }; @@ -90,7 +91,7 @@ export fn scan(par: *parser) (token | void | error) = { case '<' => const next = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => bufio::unreadrune(par.in, rn); yield rn; @@ -108,7 +109,7 @@ export fn scan(par: *parser) (token | void | error) = { return el; case => if (par.state == state::ROOT) { - return syntaxerr; + return par.line: syntaxerr; }; bufio::unreadrune(par.in, rn); return scan_content(par)?; @@ -122,7 +123,7 @@ export fn scan(par: *parser) (token | void | error) = { par.state = state::ELEMENT; return scan(par)?; } else if (!isnamestart(rn)) { - return syntaxerr; + return par.line: syntaxerr; }; bufio::unreadrune(par.in, rn); return scan_attr(par)?; @@ -131,13 +132,13 @@ export fn scan(par: *parser) (token | void | error) = { fn poptag(par: *parser, expect: str) (str | error) = { if (len(par.tags) == 0) { - return syntaxerr; + return par.line: syntaxerr; }; let pop = par.tags[len(par.tags) - 1]; delete(par.tags[len(par.tags) - 1]); defer free(pop); if (expect != "" && expect != pop) { - return syntaxerr; + return par.line: syntaxerr; }; strio::reset(&par.namebuf); strio::concat(&par.namebuf, pop)!; @@ -151,14 +152,17 @@ fn scan_attr(par: *parser) (token | error) = { strio::reset(&par.textbuf); for (true) match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => rn = switch (rn) { case '<' => - return syntaxerr; + return par.line: syntaxerr; case '&' => bufio::unreadrune(par.in, rn); yield scan_entity(par)?; + case '\n' => + par.line += 1; + yield rn; case => yield rn; }; @@ -172,7 +176,7 @@ fn scan_comment(par: *parser) (token | void | error) = { want(par, "<!")?; match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => switch (rn) { case '-' => // Comments @@ -180,35 +184,43 @@ fn scan_comment(par: *parser) (token | void | error) = { case '[' => want(par, "CDATA[")?; if (par.state != state::ELEMENT) { - return syntaxerr; + return par.line: syntaxerr; }; return scan_cdata(par)?; case => - return syntaxerr; + return par.line: syntaxerr; }; }; for (true) { const rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => + if (rn == '\n') par.line += 1; yield rn; }; if (rn != '-') continue; const rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => + if (rn == '\n') par.line += 1; yield rn; }; if (rn != '-') continue; const rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; - if (rn == '>') break; + switch (rn) { + case '>' => + break; + case '\n' => + par.line += 1; + case => void; + }; }; return scan(par); }; @@ -218,31 +230,39 @@ fn scan_cdata(par: *parser) (text | error) = { for (true) { const rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; if (rn != ']') { + if (rn == '\n') par.line += 1; strio::appendrune(&par.textbuf, rn)!; continue; }; const rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; if (rn != ']') { + if (rn == '\n') par.line += 1; strio::appendrune(&par.textbuf, rn)!; continue; }; const rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; - if (rn == '>') break; + switch (rn) { + case '>' => + break; + case '\n' => + par.line += 1; + case => void; + }; strio::appendrune(&par.textbuf, rn)!; }; return strio::string(&par.textbuf): text; @@ -261,6 +281,9 @@ fn scan_content(par: *parser) (text | error) = { case '&', '%' => bufio::unreadrune(par.in, rn); yield scan_entity(par)?; + case '\n' => + par.line += 1; + yield rn; case => yield rn; }; @@ -274,11 +297,14 @@ fn scan_element(par: *parser) (token | error) = { let close = false; match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => switch (rn) { case '/' => close = true; + case '\n' => + par.line += 1; + bufio::unreadrune(par.in, rn); case => bufio::unreadrune(par.in, rn); }; @@ -297,7 +323,7 @@ fn scan_entity(par: *parser) (rune | error) = { want(par, '&')?; let rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; @@ -305,7 +331,9 @@ fn scan_entity(par: *parser) (rune | error) = { case '#' => return scan_charref(par); case '%' => - return syntaxerr; // XXX: Deliberate omission: PEReference + return par.line: syntaxerr; // XXX: Deliberate omission: PEReference + case '\n' => + return par.line: syntaxerr; case => bufio::unreadrune(par.in, rn); return scan_namedent(par); @@ -316,7 +344,7 @@ fn scan_charref(par: *parser) (rune | error) = { let base = strconv::base::DEC; match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => if (rn == 'x') { base = strconv::base::HEX; @@ -329,7 +357,7 @@ fn scan_charref(par: *parser) (rune | error) = { for (true) { let rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; @@ -338,17 +366,17 @@ fn scan_charref(par: *parser) (rune | error) = { } else if (rn == ';') { break; } else { - return syntaxerr; + return par.line: syntaxerr; }; }; if (len(strio::string(&par.entbuf)) == 0) { - return syntaxerr; + return par.line: syntaxerr; }; match (strconv::stou32b(strio::string(&par.entbuf), base)) { case let u: u32 => return u: rune; case (strconv::invalid | strconv::overflow) => - return syntaxerr; + return par.line: syntaxerr; }; }; @@ -369,7 +397,7 @@ fn scan_namedent(par: *parser) (rune | error) = { }; // XXX: Deliberate ommission: this only supports the pre-defined // entities as defined by XML 1.0 (Fifth Edition) section 4.6. - return syntaxerr; + return par.line: syntaxerr; }; fn scan_name(par: *parser, buf: *strio::dynamic_stream) (str | error) = { @@ -377,18 +405,18 @@ fn scan_name(par: *parser, buf: *strio::dynamic_stream) (str | error) = { const rn = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; if (!isnamestart(rn)) { - return syntaxerr; + return par.line: syntaxerr; }; strio::appendrune(buf, rn)!; for (true) match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => if (isname(rn)) { strio::appendrune(buf, rn)!; @@ -436,7 +464,7 @@ fn prolog(par: *parser) (void | error) = { if (encoding) { let attr = scan_attr(par)? as attribute; if (attr.0 != "encoding") { - return syntaxerr; + return par.line: syntaxerr; }; // XXX: Deliberate omission: all values other than utf-8 match (ascii::strcasecmp(attr.1, "utf-8")) { @@ -460,15 +488,15 @@ fn prolog(par: *parser) (void | error) = { if (standalone) { let attr = scan_attr(par)? as attribute; if (attr.0 != "standalone") { - return syntaxerr; + return par.line: syntaxerr; }; // XXX: Deliberate omission: non-standalone documents match (ascii::strcasecmp(attr.1, "yes")) { case void => - return syntaxerr; + return par.line: syntaxerr; case let n: int => if (n != 0) { - return syntaxerr; + return par.line: syntaxerr; }; }; }; @@ -490,10 +518,10 @@ fn quote(par: *parser) (rune | error) = { case '"', '\'' => return rn; case => - return syntaxerr; + return par.line: syntaxerr; }; case => - return syntaxerr; + return par.line: syntaxerr; }; }; @@ -503,12 +531,15 @@ fn want(par: *parser, tok: (rune | str | whitespace)...) (bool | error) = { case let x: rune => let have = match (bufio::scanrune(par.in)?) { case io::EOF => - return syntaxerr; + return par.line: syntaxerr; case let rn: rune => yield rn; }; if (have != x) { - return syntaxerr; + return par.line: syntaxerr; + }; + if (x == '\n') { + par.line += 1; }; case let x: str => let iter = strings::iter(x); @@ -528,9 +559,12 @@ fn want(par: *parser, tok: (rune | str | whitespace)...) (bool | error) = { bufio::unreadrune(par.in, rn); break; }; + if (rn == '\n') { + par.line += 1; + }; }; if (ws && n < 1) { - return syntaxerr; + return par.line: syntaxerr; }; hadws = n >= 1; }; diff --git a/format/xml/types.ha b/format/xml/types.ha @@ -4,6 +4,7 @@ // (c) 2021 Eyal Sawady <ecs@d2evs.net> use bufio; use encoding::utf8; +use fmt; use io; use os; use strio; @@ -14,6 +15,7 @@ export type parser = struct { close: bool, state: state, tags: []str, + line: size, // strio buffers: namebuf: strio::dynamic_stream, @@ -43,17 +45,20 @@ export type text = str; export type token = (elementstart | elementend | attribute | text); // A syntax error was encountered in the document. -export type syntaxerr = !void; // TODO: Add line number? +export type syntaxerr = !size; // Any error which can occur during XML parsing. export type error = !(syntaxerr | utf8::invalid | io::error); // Converts an [[error]] to a user-friendly string representation. -export fn strerror(err: error) const str = match (err) { -case syntaxerr => - yield "Syntax error"; -case utf8::invalid => - yield "Document is not valid UTF-8"; -case let err: io::error => - yield io::strerror(err); +export fn strerror(err: error) const str = { + static let buf: [2048]u8 = [0...]; + match (err) { + case let err: syntaxerr => + return fmt::bsprintf(buf, "Syntax error on line {}", err: size); + case utf8::invalid => + return "Document is not valid UTF-8"; + case let err: io::error => + return io::strerror(err); + }; }; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -520,7 +520,7 @@ format_xml() { else gensrcs_format_xml +test.ha fi - gen_ssa format::xml io bufio strings ascii strio os + gen_ssa format::xml fmt io bufio strings ascii strio os } fs() { diff --git a/stdlib.mk b/stdlib.mk @@ -1035,7 +1035,7 @@ stdlib_format_xml_any_srcs= \ $(STDLIB)/format/xml/parser.ha \ $(STDLIB)/format/xml/chars.ha -$(HARECACHE)/format/xml/format_xml-any.ssa: $(stdlib_format_xml_any_srcs) $(stdlib_rt) $(stdlib_io_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_ascii_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) +$(HARECACHE)/format/xml/format_xml-any.ssa: $(stdlib_format_xml_any_srcs) $(stdlib_rt) $(stdlib_fmt_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_ascii_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/format/xml @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nformat::xml \ @@ -2954,7 +2954,7 @@ testlib_format_xml_any_srcs= \ $(STDLIB)/format/xml/chars.ha \ $(STDLIB)/format/xml/+test.ha -$(TESTCACHE)/format/xml/format_xml-any.ssa: $(testlib_format_xml_any_srcs) $(testlib_rt) $(testlib_io_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_ascii_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) +$(TESTCACHE)/format/xml/format_xml-any.ssa: $(testlib_format_xml_any_srcs) $(testlib_rt) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_ascii_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/format/xml @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nformat::xml \