hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 204ab8c007de98c676fe6cb1a8dec8ea44875223
parent 8369d72918381cb6243e67bcd05f3ae4a85992df
Author: Drew DeVault <sir@cmpwn.com>
Date:   Fri,  9 Apr 2021 13:57:34 -0400

encoding::xml: add tests

Diffstat:
Aencoding/xml/+test.ha | 42++++++++++++++++++++++++++++++++++++++++++
Mencoding/xml/parser.ha | 39+++++++++++++++++++++++----------------
Mscripts/gen-stdlib | 14++++++++++++--
Mstdlib.mk | 3++-
4 files changed, 79 insertions(+), 19 deletions(-)

diff --git a/encoding/xml/+test.ha b/encoding/xml/+test.ha @@ -0,0 +1,42 @@ +use io; +use bufio; +use strings; + +@test fn basic() void = { + const input = "<?xml version='1.0' encoding='utf-8' ?> +<root> + <hello name='foobar'>world</hello> +</root>"; + const expected: []token = [ + "root": elementstart, + "\n\t": text, + "hello": elementstart, + ("name", "foobar"): attribute, + "world": text, + elementend, + "\n": text, + elementend, + ]; + let in = bufio::fixed(strings::toutf8(input), io::mode::READ); + defer io::close(in); + let parser = parse(in) as *parser; + for (let i = 0z; i < len(expected); i += 1) { + let tok = scan(parser) as token; + match (tok) { + el: elementstart => { + let ex = expected[i] as elementstart; + assert(el == ex); + }, + at: attribute => { + let ex = expected[i] as attribute; + assert(at.0 == ex.0 && at.1 == ex.1); + }, + tx: text => { + let ex = expected[i] as text; + assert(tx == ex); + }, + elementend => assert(expected[i] is elementend), + }; + }; + assert(scan(parser) is void); +}; diff --git a/encoding/xml/parser.ha b/encoding/xml/parser.ha @@ -53,6 +53,10 @@ export fn parser_free(par: *parser) void = { // Scans for and returns the next [token]. The caller must pass the returned // token to [token_free] when they're done with it. export fn scan(par: *parser) (token | void | error) = { + switch (par.state) { + state::ROOT, state::ATTRS => want(par, OPTWS)?, + * => void, + }; let rn: rune = match (bufio::scanrune(par.in)?) { io::EOF => if (par.state == state::ROOT) { return syntaxerr; @@ -68,7 +72,7 @@ export fn scan(par: *parser) (token | void | error) = { state::ROOT => switch (rn) { '<' => { bufio::unreadrune(par.in, rn); - let el = scan_element(par); + let el = scan_element(par)?; par.state = state::ATTRS; el; }, @@ -114,7 +118,6 @@ fn scan_element(par: *parser) (token | error) = { }, }; let name = scan_name(par)?; - want(par, OPTWS)?; if (close) { free(name); return elementend; @@ -140,7 +143,6 @@ fn scan_attr(par: *parser) (token | error) = { strio::appendrune(val, rn); }, }; - want(par, OPTWS)?; return (name, strio::finish(val)): attribute; }; @@ -209,7 +211,6 @@ fn prolog(par: *parser) (void | error) = { }; want(par, quot)?; - // TODO: Replace this with attribute() when it's written let hadws = want(par, OPTWS)?; let encoding = match (bufio::scanrune(par.in)) { io::EOF => false, @@ -219,14 +220,15 @@ fn prolog(par: *parser) (void | error) = { }, }; if (encoding) { - want(par, "encoding", OPTWS, '=', OPTWS)?; - let quot = quote(par)?; - match (want(par, "UTF-8")) { - syntaxerr => return utf8::invalid, - err: error => return err, - bool => void, + let attr = scan_attr(par)? as attribute; + defer token_free(attr); + if (attr.0 != "encoding") { + return syntaxerr; + }; + match (ascii::strcasecmp(attr.1, "utf-8")) { + void => return utf8::invalid, + n: int => if (n != 0) return utf8::invalid, }; - want(par, quot)?; }; let hadws = want(par, OPTWS)?; @@ -238,10 +240,15 @@ fn prolog(par: *parser) (void | error) = { }, }; if (standalone) { - want(par, "standalone", OPTWS, '=', OPTWS)?; - let quot = quote(par)?; - // TODO: Should we support standalone="no"? - want(par, "yes", quot)?; + let attr = scan_attr(par)? as attribute; + defer token_free(attr); + if (attr.0 != "standalone") { + return syntaxerr; + }; + match (ascii::strcasecmp(attr.1, "yes")) { + void => return syntaxerr, + n: int => if (n != 0) return syntaxerr, // TODO? + }; }; want(par, OPTWS, "?>", OPTWS)?; @@ -269,7 +276,7 @@ fn want(par: *parser, tok: (rune | str | whitespace)...) (bool | error) = { for (let i = 0z; i < len(tok); i += 1) match (tok[i]) { x: rune => { let have = match (bufio::scanrune(par.in)?) { - * => return syntaxerr, + io::EOF => return syntaxerr, rn: rune => rn, }; if (have != x) { diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -233,11 +233,21 @@ encoding_utf8() { gen_ssa encoding::utf8 types } -encoding_xml() { +gensrcs_encoding_xml() { gen_srcs encoding::xml \ types.ha \ parser.ha \ - chars.ha + chars.ha \ + $* +} + +encoding_xml() { + if [ $testing -eq 0 ] + then + gensrcs_encoding_xml + else + gensrcs_encoding_xml +test.ha + fi gen_ssa encoding::xml io bufio strings ascii strio } diff --git a/stdlib.mk b/stdlib.mk @@ -1222,7 +1222,8 @@ $(TESTCACHE)/encoding/utf8/encoding_utf8.ssa: $(testlib_encoding_utf8_srcs) $(te testlib_encoding_xml_srcs= \ $(STDLIB)/encoding/xml/types.ha \ $(STDLIB)/encoding/xml/parser.ha \ - $(STDLIB)/encoding/xml/chars.ha + $(STDLIB)/encoding/xml/chars.ha \ + $(STDLIB)/encoding/xml/+test.ha $(TESTCACHE)/encoding/xml/encoding_xml.ssa: $(testlib_encoding_xml_srcs) $(testlib_rt) $(testlib_io) $(testlib_bufio) $(testlib_strings) $(testlib_ascii) $(testlib_strio) @printf 'HAREC \t$@\n'