commit 893a7416aab3af31cee4271f5c2a8a9ce44cfd89
parent 0c101ac5dddc96c2a80b7fcc9651cfa9a4a1eb76
Author: Sebastian <sebastian@sebsite.pw>
Date: Sat, 9 Apr 2022 14:26:39 -0400
format::xml: return line number of syntax error
This also expands the tests to ensure that the line number is accurate.
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
5 files changed, 107 insertions(+), 62 deletions(-)
diff --git a/format/xml/+test.ha b/format/xml/+test.ha
@@ -5,6 +5,7 @@
// (c) 2021 Eyal Sawady <ecs@d2evs.net>
use io;
use bufio;
+use fmt;
use strings;
@test fn basic() void = {
@@ -25,7 +26,7 @@ use strings;
"foobar": elementend,
"\n": text,
"root": elementend,
- ], false);
+ ], void);
};
@test fn comments() void = {
@@ -42,7 +43,7 @@ use strings;
"hello": elementend,
"\n": text,
"root": elementend,
- ], false);
+ ], void);
};
@test fn entities() void = {
@@ -58,7 +59,7 @@ use strings;
"hello": elementend,
"\n": text,
"root": elementend,
- ], false);
+ ], void);
};
@test fn cdata() void = {
@@ -71,11 +72,15 @@ use strings;
"Hello world &foo <bar>": text,
"\n": text,
"root": elementend,
- ], false);
+ ], void);
};
@test fn errors() void = {
xmltest("<?xml version='1.0' encoding='utf-8' ?>
+<!--
+comment which spans
+multiple lines
+-->
<root>
<hello name='foobar'></world>
</root>", [
@@ -83,10 +88,10 @@ use strings;
"\n\t": text,
"hello": elementstart,
("name", "foobar"): attribute,
- ], true);
+ ], 7);
};
-fn xmltest(input: str, expected: []token, err: bool) void = {
+fn xmltest(input: str, expected: []token, err: (void | size)) void = {
let in = bufio::fixed(strings::toutf8(input), io::mode::READ);
let parser = parse(&in) as *parser;
for (let i = 0z; i < len(expected); i += 1) {
@@ -95,8 +100,8 @@ fn xmltest(input: str, expected: []token, err: bool) void = {
yield tok;
case void =>
abort("Expected token, got void");
- case syntaxerr =>
- abort("Expected token, got syntax error");
+ case let err: syntaxerr =>
+ fmt::fatal("{}", strerror(err));
};
match (tok) {
case let el: elementstart =>
@@ -117,9 +122,10 @@ fn xmltest(input: str, expected: []token, err: bool) void = {
assert(el == ex);
};
};
- if (err) {
- assert(scan(parser) is error);
- } else {
+ match (err) {
+ case void =>
assert(scan(parser) is void);
+ case let z: size =>
+ assert(scan(parser) as syntaxerr: size == z);
};
};
diff --git a/format/xml/parser.ha b/format/xml/parser.ha
@@ -33,6 +33,7 @@ export fn parse(in: io::handle) (*parser | error) = {
namebuf = strio::dynamic(),
entbuf = strio::dynamic(),
textbuf = strio::dynamic(),
+ line = 1,
...
});
if (bufio::isbuffered(in)) {
@@ -77,7 +78,7 @@ export fn scan(par: *parser) (token | void | error) = {
let rn: rune = match (bufio::scanrune(par.in)?) {
case io::EOF =>
if (par.state == state::ROOT) {
- return syntaxerr;
+ return par.line: syntaxerr;
} else {
return;
};
@@ -90,7 +91,7 @@ export fn scan(par: *parser) (token | void | error) = {
case '<' =>
const next = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
bufio::unreadrune(par.in, rn);
yield rn;
@@ -108,7 +109,7 @@ export fn scan(par: *parser) (token | void | error) = {
return el;
case =>
if (par.state == state::ROOT) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
bufio::unreadrune(par.in, rn);
return scan_content(par)?;
@@ -122,7 +123,7 @@ export fn scan(par: *parser) (token | void | error) = {
par.state = state::ELEMENT;
return scan(par)?;
} else if (!isnamestart(rn)) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
bufio::unreadrune(par.in, rn);
return scan_attr(par)?;
@@ -131,13 +132,13 @@ export fn scan(par: *parser) (token | void | error) = {
fn poptag(par: *parser, expect: str) (str | error) = {
if (len(par.tags) == 0) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
let pop = par.tags[len(par.tags) - 1];
delete(par.tags[len(par.tags) - 1]);
defer free(pop);
if (expect != "" && expect != pop) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
strio::reset(&par.namebuf);
strio::concat(&par.namebuf, pop)!;
@@ -151,14 +152,17 @@ fn scan_attr(par: *parser) (token | error) = {
strio::reset(&par.textbuf);
for (true) match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
rn = switch (rn) {
case '<' =>
- return syntaxerr;
+ return par.line: syntaxerr;
case '&' =>
bufio::unreadrune(par.in, rn);
yield scan_entity(par)?;
+ case '\n' =>
+ par.line += 1;
+ yield rn;
case =>
yield rn;
};
@@ -172,7 +176,7 @@ fn scan_comment(par: *parser) (token | void | error) = {
want(par, "<!")?;
match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
switch (rn) {
case '-' => // Comments
@@ -180,35 +184,43 @@ fn scan_comment(par: *parser) (token | void | error) = {
case '[' =>
want(par, "CDATA[")?;
if (par.state != state::ELEMENT) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
return scan_cdata(par)?;
case =>
- return syntaxerr;
+ return par.line: syntaxerr;
};
};
for (true) {
const rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
+ if (rn == '\n') par.line += 1;
yield rn;
};
if (rn != '-') continue;
const rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
+ if (rn == '\n') par.line += 1;
yield rn;
};
if (rn != '-') continue;
const rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
- if (rn == '>') break;
+ switch (rn) {
+ case '>' =>
+ break;
+ case '\n' =>
+ par.line += 1;
+ case => void;
+ };
};
return scan(par);
};
@@ -218,31 +230,39 @@ fn scan_cdata(par: *parser) (text | error) = {
for (true) {
const rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
if (rn != ']') {
+ if (rn == '\n') par.line += 1;
strio::appendrune(&par.textbuf, rn)!;
continue;
};
const rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
if (rn != ']') {
+ if (rn == '\n') par.line += 1;
strio::appendrune(&par.textbuf, rn)!;
continue;
};
const rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
- if (rn == '>') break;
+ switch (rn) {
+ case '>' =>
+ break;
+ case '\n' =>
+ par.line += 1;
+ case => void;
+ };
strio::appendrune(&par.textbuf, rn)!;
};
return strio::string(&par.textbuf): text;
@@ -261,6 +281,9 @@ fn scan_content(par: *parser) (text | error) = {
case '&', '%' =>
bufio::unreadrune(par.in, rn);
yield scan_entity(par)?;
+ case '\n' =>
+ par.line += 1;
+ yield rn;
case =>
yield rn;
};
@@ -274,11 +297,14 @@ fn scan_element(par: *parser) (token | error) = {
let close = false;
match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
switch (rn) {
case '/' =>
close = true;
+ case '\n' =>
+ par.line += 1;
+ bufio::unreadrune(par.in, rn);
case =>
bufio::unreadrune(par.in, rn);
};
@@ -297,7 +323,7 @@ fn scan_entity(par: *parser) (rune | error) = {
want(par, '&')?;
let rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
@@ -305,7 +331,9 @@ fn scan_entity(par: *parser) (rune | error) = {
case '#' =>
return scan_charref(par);
case '%' =>
- return syntaxerr; // XXX: Deliberate omission: PEReference
+ return par.line: syntaxerr; // XXX: Deliberate omission: PEReference
+ case '\n' =>
+ return par.line: syntaxerr;
case =>
bufio::unreadrune(par.in, rn);
return scan_namedent(par);
@@ -316,7 +344,7 @@ fn scan_charref(par: *parser) (rune | error) = {
let base = strconv::base::DEC;
match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
if (rn == 'x') {
base = strconv::base::HEX;
@@ -329,7 +357,7 @@ fn scan_charref(par: *parser) (rune | error) = {
for (true) {
let rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
@@ -338,17 +366,17 @@ fn scan_charref(par: *parser) (rune | error) = {
} else if (rn == ';') {
break;
} else {
- return syntaxerr;
+ return par.line: syntaxerr;
};
};
if (len(strio::string(&par.entbuf)) == 0) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
match (strconv::stou32b(strio::string(&par.entbuf), base)) {
case let u: u32 =>
return u: rune;
case (strconv::invalid | strconv::overflow) =>
- return syntaxerr;
+ return par.line: syntaxerr;
};
};
@@ -369,7 +397,7 @@ fn scan_namedent(par: *parser) (rune | error) = {
};
// XXX: Deliberate ommission: this only supports the pre-defined
// entities as defined by XML 1.0 (Fifth Edition) section 4.6.
- return syntaxerr;
+ return par.line: syntaxerr;
};
fn scan_name(par: *parser, buf: *strio::dynamic_stream) (str | error) = {
@@ -377,18 +405,18 @@ fn scan_name(par: *parser, buf: *strio::dynamic_stream) (str | error) = {
const rn = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
if (!isnamestart(rn)) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
strio::appendrune(buf, rn)!;
for (true) match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
if (isname(rn)) {
strio::appendrune(buf, rn)!;
@@ -436,7 +464,7 @@ fn prolog(par: *parser) (void | error) = {
if (encoding) {
let attr = scan_attr(par)? as attribute;
if (attr.0 != "encoding") {
- return syntaxerr;
+ return par.line: syntaxerr;
};
// XXX: Deliberate omission: all values other than utf-8
match (ascii::strcasecmp(attr.1, "utf-8")) {
@@ -460,15 +488,15 @@ fn prolog(par: *parser) (void | error) = {
if (standalone) {
let attr = scan_attr(par)? as attribute;
if (attr.0 != "standalone") {
- return syntaxerr;
+ return par.line: syntaxerr;
};
// XXX: Deliberate omission: non-standalone documents
match (ascii::strcasecmp(attr.1, "yes")) {
case void =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let n: int =>
if (n != 0) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
};
};
@@ -490,10 +518,10 @@ fn quote(par: *parser) (rune | error) = {
case '"', '\'' =>
return rn;
case =>
- return syntaxerr;
+ return par.line: syntaxerr;
};
case =>
- return syntaxerr;
+ return par.line: syntaxerr;
};
};
@@ -503,12 +531,15 @@ fn want(par: *parser, tok: (rune | str | whitespace)...) (bool | error) = {
case let x: rune =>
let have = match (bufio::scanrune(par.in)?) {
case io::EOF =>
- return syntaxerr;
+ return par.line: syntaxerr;
case let rn: rune =>
yield rn;
};
if (have != x) {
- return syntaxerr;
+ return par.line: syntaxerr;
+ };
+ if (x == '\n') {
+ par.line += 1;
};
case let x: str =>
let iter = strings::iter(x);
@@ -528,9 +559,12 @@ fn want(par: *parser, tok: (rune | str | whitespace)...) (bool | error) = {
bufio::unreadrune(par.in, rn);
break;
};
+ if (rn == '\n') {
+ par.line += 1;
+ };
};
if (ws && n < 1) {
- return syntaxerr;
+ return par.line: syntaxerr;
};
hadws = n >= 1;
};
diff --git a/format/xml/types.ha b/format/xml/types.ha
@@ -4,6 +4,7 @@
// (c) 2021 Eyal Sawady <ecs@d2evs.net>
use bufio;
use encoding::utf8;
+use fmt;
use io;
use os;
use strio;
@@ -14,6 +15,7 @@ export type parser = struct {
close: bool,
state: state,
tags: []str,
+ line: size,
// strio buffers:
namebuf: strio::dynamic_stream,
@@ -43,17 +45,20 @@ export type text = str;
export type token = (elementstart | elementend | attribute | text);
// A syntax error was encountered in the document.
-export type syntaxerr = !void; // TODO: Add line number?
+export type syntaxerr = !size;
// Any error which can occur during XML parsing.
export type error = !(syntaxerr | utf8::invalid | io::error);
// Converts an [[error]] to a user-friendly string representation.
-export fn strerror(err: error) const str = match (err) {
-case syntaxerr =>
- yield "Syntax error";
-case utf8::invalid =>
- yield "Document is not valid UTF-8";
-case let err: io::error =>
- yield io::strerror(err);
+export fn strerror(err: error) const str = {
+ static let buf: [2048]u8 = [0...];
+ match (err) {
+ case let err: syntaxerr =>
+ return fmt::bsprintf(buf, "Syntax error on line {}", err: size);
+ case utf8::invalid =>
+ return "Document is not valid UTF-8";
+ case let err: io::error =>
+ return io::strerror(err);
+ };
};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -520,7 +520,7 @@ format_xml() {
else
gensrcs_format_xml +test.ha
fi
- gen_ssa format::xml io bufio strings ascii strio os
+ gen_ssa format::xml fmt io bufio strings ascii strio os
}
fs() {
diff --git a/stdlib.mk b/stdlib.mk
@@ -1035,7 +1035,7 @@ stdlib_format_xml_any_srcs= \
$(STDLIB)/format/xml/parser.ha \
$(STDLIB)/format/xml/chars.ha
-$(HARECACHE)/format/xml/format_xml-any.ssa: $(stdlib_format_xml_any_srcs) $(stdlib_rt) $(stdlib_io_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_ascii_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM))
+$(HARECACHE)/format/xml/format_xml-any.ssa: $(stdlib_format_xml_any_srcs) $(stdlib_rt) $(stdlib_fmt_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_ascii_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_os_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(HARECACHE)/format/xml
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nformat::xml \
@@ -2954,7 +2954,7 @@ testlib_format_xml_any_srcs= \
$(STDLIB)/format/xml/chars.ha \
$(STDLIB)/format/xml/+test.ha
-$(TESTCACHE)/format/xml/format_xml-any.ssa: $(testlib_format_xml_any_srcs) $(testlib_rt) $(testlib_io_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_ascii_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM))
+$(TESTCACHE)/format/xml/format_xml-any.ssa: $(testlib_format_xml_any_srcs) $(testlib_rt) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_ascii_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_os_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(TESTCACHE)/format/xml
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nformat::xml \