hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit aade19988b7f9c74ef4643b254b8752d692b4b9c
parent 27587d0fe37b83184f2b911d6ba2dfbd6ae5b756
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sat, 10 Apr 2021 10:23:42 -0400

format::xml: implement XML entities

Diffstat:
Mformat/xml/+test.ha | 16++++++++++++++++
Mformat/xml/parser.ha | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
2 files changed, 88 insertions(+), 10 deletions(-)

diff --git a/format/xml/+test.ha b/format/xml/+test.ha @@ -35,6 +35,22 @@ use strings; ]); }; +@test fn entities() void = { + xmltest("<?xml version='1.0' encoding='utf-8' ?> +<root> + <hello name='foobar &amp;&#33;&#x21;'>&lt;world&gt; &quot;&apos;</hello> +</root>", [ + "root": elementstart, + "\n\t": text, + "hello": elementstart, + ("name", "foobar &!!"): attribute, + "<world> \"'": text, + elementend, + "\n": text, + elementend, + ]); +}; + fn xmltest(input: str, expected: []token) void = { let in = bufio::fixed(strings::toutf8(input), io::mode::READ); defer io::close(in); diff --git a/format/xml/parser.ha b/format/xml/parser.ha @@ -15,8 +15,12 @@ use ascii; use bufio; use encoding::utf8; use io; +use strconv; use strings; use strio; + +// Note: +// // Are you an intrepid programmer seeking to fork this module to create a more // sophisticated XML parser supporting a broader set of features? Good news: all // of the features you need to implement are annotated throughout with @@ -175,20 +179,14 @@ fn scan_content(par: *parser) (text | error) = { for (true) match (bufio::scanrune(par.in)?) { io::EOF => break, rn: rune => { - switch (rn) { + rn = switch (rn) { '<' => { bufio::unreadrune(par.in, rn); break; }, - '&' => { - bufio::unreadrune(par.in, rn); - scan_entity(par); - }, - '%' => { + '&', '%' => { bufio::unreadrune(par.in, rn); - // XXX: Deliberate omission: parameter - // entities - return syntaxerr; + scan_entity(par)?; }, }; strio::appendrune(content, rn); @@ -217,9 +215,73 @@ fn scan_element(par: *parser) (token | error) = { }; fn scan_entity(par: *parser) (rune | error) = { + want(par, '&')?; + let rn = match (bufio::scanrune(par.in)?) { + io::EOF => return syntaxerr, + rn: rune => rn, + }; + return switch (rn) { + '#' => scan_charref(par), + '%' => syntaxerr, // XXX: Deliberate omission: PEReference + * => { + bufio::unreadrune(par.in, rn); + scan_namedent(par); + }, + }; +}; + +fn scan_charref(par: *parser) (rune | error) = { + let base = strconv::base::DEC; + match (bufio::scanrune(par.in)?) { + io::EOF => return syntaxerr, + rn: rune => if (rn == 'x') { + base = strconv::base::HEX; + } else bufio::unreadrune(par.in, rn), + }; + + let buf = strio::dynamic(); + defer io::close(buf); + for (true) { + let rn = match (bufio::scanrune(par.in)?) { + io::EOF => return syntaxerr, + rn: rune => rn, + }; + if (ascii::isdigit(rn)) { + strio::appendrune(buf, rn); + } else if (rn == ';') { + break; + } else { + return syntaxerr; + }; + }; + if (len(strio::string(buf)) == 0) { + return syntaxerr; + }; + return match (strconv::stou32b(strio::string(buf), base)) { + u: u32 => u: rune, + (strconv::invalid | strconv::overflow) => syntaxerr, + }; +}; + +fn scan_namedent(par: *parser) (rune | error) = { + let name = scan_name(par)?; + defer free(name); + want(par, ';')?; + let map = [ + ("lt", '<'), + ("gt", '>'), + ("amp", '&'), + ("apos", '\''), + ("quot", '"'), + ]; + for (let i = 0z; i < len(map); i += 1) { + if (map[i].0 == name) { + return map[i].1; + }; + }; // XXX: Deliberate ommission: this only supports the pre-defined // entities as defined by XML 1.0 (Fifth Edition) section 4.6. - abort(); // TODO + return syntaxerr; }; fn scan_name(par: *parser) (str | error) = {