commit 204ab8c007de98c676fe6cb1a8dec8ea44875223
parent 8369d72918381cb6243e67bcd05f3ae4a85992df
Author: Drew DeVault <sir@cmpwn.com>
Date: Fri, 9 Apr 2021 13:57:34 -0400
encoding::xml: add tests
Diffstat:
4 files changed, 79 insertions(+), 19 deletions(-)
diff --git a/encoding/xml/+test.ha b/encoding/xml/+test.ha
@@ -0,0 +1,42 @@
+use io;
+use bufio;
+use strings;
+
+@test fn basic() void = {
+ const input = "<?xml version='1.0' encoding='utf-8' ?>
+<root>
+ <hello name='foobar'>world</hello>
+</root>";
+ const expected: []token = [
+ "root": elementstart,
+ "\n\t": text,
+ "hello": elementstart,
+ ("name", "foobar"): attribute,
+ "world": text,
+ elementend,
+ "\n": text,
+ elementend,
+ ];
+ let in = bufio::fixed(strings::toutf8(input), io::mode::READ);
+ defer io::close(in);
+ let parser = parse(in) as *parser;
+ for (let i = 0z; i < len(expected); i += 1) {
+ let tok = scan(parser) as token;
+ match (tok) {
+ el: elementstart => {
+ let ex = expected[i] as elementstart;
+ assert(el == ex);
+ },
+ at: attribute => {
+ let ex = expected[i] as attribute;
+ assert(at.0 == ex.0 && at.1 == ex.1);
+ },
+ tx: text => {
+ let ex = expected[i] as text;
+ assert(tx == ex);
+ },
+ elementend => assert(expected[i] is elementend),
+ };
+ };
+ assert(scan(parser) is void);
+};
diff --git a/encoding/xml/parser.ha b/encoding/xml/parser.ha
@@ -53,6 +53,10 @@ export fn parser_free(par: *parser) void = {
// Scans for and returns the next [token]. The caller must pass the returned
// token to [token_free] when they're done with it.
export fn scan(par: *parser) (token | void | error) = {
+ switch (par.state) {
+ state::ROOT, state::ATTRS => want(par, OPTWS)?,
+ * => void,
+ };
let rn: rune = match (bufio::scanrune(par.in)?) {
io::EOF => if (par.state == state::ROOT) {
return syntaxerr;
@@ -68,7 +72,7 @@ export fn scan(par: *parser) (token | void | error) = {
state::ROOT => switch (rn) {
'<' => {
bufio::unreadrune(par.in, rn);
- let el = scan_element(par);
+ let el = scan_element(par)?;
par.state = state::ATTRS;
el;
},
@@ -114,7 +118,6 @@ fn scan_element(par: *parser) (token | error) = {
},
};
let name = scan_name(par)?;
- want(par, OPTWS)?;
if (close) {
free(name);
return elementend;
@@ -140,7 +143,6 @@ fn scan_attr(par: *parser) (token | error) = {
strio::appendrune(val, rn);
},
};
- want(par, OPTWS)?;
return (name, strio::finish(val)): attribute;
};
@@ -209,7 +211,6 @@ fn prolog(par: *parser) (void | error) = {
};
want(par, quot)?;
- // TODO: Replace this with attribute() when it's written
let hadws = want(par, OPTWS)?;
let encoding = match (bufio::scanrune(par.in)) {
io::EOF => false,
@@ -219,14 +220,15 @@ fn prolog(par: *parser) (void | error) = {
},
};
if (encoding) {
- want(par, "encoding", OPTWS, '=', OPTWS)?;
- let quot = quote(par)?;
- match (want(par, "UTF-8")) {
- syntaxerr => return utf8::invalid,
- err: error => return err,
- bool => void,
+ let attr = scan_attr(par)? as attribute;
+ defer token_free(attr);
+ if (attr.0 != "encoding") {
+ return syntaxerr;
+ };
+ match (ascii::strcasecmp(attr.1, "utf-8")) {
+ void => return utf8::invalid,
+ n: int => if (n != 0) return utf8::invalid,
};
- want(par, quot)?;
};
let hadws = want(par, OPTWS)?;
@@ -238,10 +240,15 @@ fn prolog(par: *parser) (void | error) = {
},
};
if (standalone) {
- want(par, "standalone", OPTWS, '=', OPTWS)?;
- let quot = quote(par)?;
- // TODO: Should we support standalone="no"?
- want(par, "yes", quot)?;
+ let attr = scan_attr(par)? as attribute;
+ defer token_free(attr);
+ if (attr.0 != "standalone") {
+ return syntaxerr;
+ };
+ match (ascii::strcasecmp(attr.1, "yes")) {
+ void => return syntaxerr,
+ n: int => if (n != 0) return syntaxerr, // TODO?
+ };
};
want(par, OPTWS, "?>", OPTWS)?;
@@ -269,7 +276,7 @@ fn want(par: *parser, tok: (rune | str | whitespace)...) (bool | error) = {
for (let i = 0z; i < len(tok); i += 1) match (tok[i]) {
x: rune => {
let have = match (bufio::scanrune(par.in)?) {
- * => return syntaxerr,
+ io::EOF => return syntaxerr,
rn: rune => rn,
};
if (have != x) {
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -233,11 +233,21 @@ encoding_utf8() {
gen_ssa encoding::utf8 types
}
-encoding_xml() {
+gensrcs_encoding_xml() {
gen_srcs encoding::xml \
types.ha \
parser.ha \
- chars.ha
+ chars.ha \
+ $*
+}
+
+encoding_xml() {
+ if [ $testing -eq 0 ]
+ then
+ gensrcs_encoding_xml
+ else
+ gensrcs_encoding_xml +test.ha
+ fi
gen_ssa encoding::xml io bufio strings ascii strio
}
diff --git a/stdlib.mk b/stdlib.mk
@@ -1222,7 +1222,8 @@ $(TESTCACHE)/encoding/utf8/encoding_utf8.ssa: $(testlib_encoding_utf8_srcs) $(te
testlib_encoding_xml_srcs= \
$(STDLIB)/encoding/xml/types.ha \
$(STDLIB)/encoding/xml/parser.ha \
- $(STDLIB)/encoding/xml/chars.ha
+ $(STDLIB)/encoding/xml/chars.ha \
+ $(STDLIB)/encoding/xml/+test.ha
$(TESTCACHE)/encoding/xml/encoding_xml.ssa: $(testlib_encoding_xml_srcs) $(testlib_rt) $(testlib_io) $(testlib_bufio) $(testlib_strings) $(testlib_ascii) $(testlib_strio)
@printf 'HAREC \t$@\n'