commit a0fdca9f8135c0f7e3f1d266385af10b31f04660
parent daa4a2f9178dfb64ff5e431c37914923850d3064
Author: Drew DeVault <sir@cmpwn.com>
Date: Sun, 11 Apr 2021 09:20:56 -0400
format::xml: validate closing tags
Diffstat:
3 files changed, 67 insertions(+), 17 deletions(-)
diff --git a/format/xml/+test.ha b/format/xml/+test.ha
@@ -6,16 +6,21 @@ use strings;
xmltest("<?xml version='1.0' encoding='utf-8' ?>
<root>
<hello name='foobar'>world</hello>
+ <foobar bar='foobaz' />
</root>", [
"root": elementstart,
"\n\t": text,
"hello": elementstart,
("name", "foobar"): attribute,
"world": text,
- elementend,
+ "hello": elementend,
+ "\n\t": text,
+ "foobar": elementstart,
+ ("bar", "foobaz"): attribute,
+ "foobar": elementend,
"\n": text,
- elementend,
- ]);
+ "root": elementend,
+ ], false);
};
@test fn comments() void = {
@@ -29,10 +34,10 @@ use strings;
"hello": elementstart,
("name", "foobar"): attribute,
"world": text,
- elementend,
+ "hello": elementend,
"\n": text,
- elementend,
- ]);
+ "root": elementend,
+ ], false);
};
@test fn entities() void = {
@@ -45,10 +50,10 @@ use strings;
"hello": elementstart,
("name", "foobar &!!"): attribute,
"<world> \"'": text,
- elementend,
+ "hello": elementend,
"\n": text,
- elementend,
- ]);
+ "root": elementend,
+ ], false);
};
@test fn cdata() void = {
@@ -60,11 +65,23 @@ use strings;
"\n\t": text,
"Hello world &foo <bar>": text,
"\n": text,
- elementend,
- ]);
+ "root": elementend,
+ ], false);
};
-fn xmltest(input: str, expected: []token) void = {
+@test fn errors() void = {
+ xmltest("<?xml version='1.0' encoding='utf-8' ?>
+<root>
+ <hello name='foobar'></world>
+</root>", [
+ "root": elementstart,
+ "\n\t": text,
+ "hello": elementstart,
+ ("name", "foobar"): attribute,
+ ], true);
+};
+
+fn xmltest(input: str, expected: []token, error: bool) void = {
let in = bufio::fixed(strings::toutf8(input), io::mode::READ);
defer io::close(in);
let parser = parse(in) as *parser;
@@ -87,8 +104,15 @@ fn xmltest(input: str, expected: []token) void = {
let ex = expected[i] as text;
assert(tx == ex);
},
- elementend => assert(expected[i] is elementend),
+ el: elementend => {
+ let ex = expected[i] as elementend;
+ assert(el == ex);
+ },
};
};
- assert(scan(parser) is void);
+ if (error) {
+ assert(scan(parser) is error);
+ } else {
+ assert(scan(parser) is void);
+ };
};
diff --git a/format/xml/parser.ha b/format/xml/parser.ha
@@ -59,6 +59,10 @@ export fn parser_free(par: *parser) void = {
io::close(par.namebuf);
io::close(par.entbuf);
io::close(par.textbuf);
+ for (let i = 0z; i < len(par.tags); i += 1) {
+ free(par.tags[i]);
+ };
+ free(par.tags);
free(par);
};
@@ -108,7 +112,7 @@ export fn scan(par: *parser) (token | void | error) = {
if (rn == '/') {
want(par, '>')?;
par.state = state::ELEMENT;
- return elementend;
+ return poptag(par, "")?: elementend;
} else if (rn == '>') {
par.state = state::ELEMENT;
return scan(par)?;
@@ -121,6 +125,21 @@ export fn scan(par: *parser) (token | void | error) = {
};
};
+fn poptag(par: *parser, expect: str) (str | error) = {
+ if (len(par.tags) == 0) {
+ return syntaxerr;
+ };
+ let pop = par.tags[len(par.tags) - 1];
+ delete(par.tags[len(par.tags) - 1]);
+ defer free(pop);
+ if (expect != "" && expect != pop) {
+ return syntaxerr;
+ };
+ strio::reset(par.namebuf);
+ strio::concat(par.namebuf, pop);
+ return strio::string(par.namebuf);
+};
+
fn scan_attr(par: *parser) (token | error) = {
let name = scan_name(par, par.namebuf)?;
want(par, OPTWS, '=', OPTWS);
@@ -243,7 +262,13 @@ fn scan_element(par: *parser) (token | error) = {
},
};
let name = scan_name(par, par.namebuf)?;
- return if (close) elementend else name: elementstart;
+ if (close) {
+ poptag(par, name)?;
+ return name: elementend;
+ } else {
+ append(par.tags, strings::dup(name));
+ return name: elementstart;
+ };
};
fn scan_entity(par: *parser) (rune | error) = {
diff --git a/format/xml/types.ha b/format/xml/types.ha
@@ -8,6 +8,7 @@ export type parser = struct {
in: *io::stream,
buf: [os::BUFSIZ]u8,
state: state,
+ tags: []str,
// strio buffers:
namebuf: *io::stream,
entbuf: *io::stream,
@@ -24,7 +25,7 @@ export type state = enum {
export type elementstart = str;
// The end of an XML element, e.g. /> or </example>
-export type elementend = void;
+export type elementend = str;
// An attribute of an XML element, e.g. foo="bar"
export type attribute = (str, str);