hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 59ca2ea3a89c775932adf3267ae06d5012c78774
parent 5a6d7fdb581259bf7c69f1e67c52dd6430405e8f
Author: Drew DeVault <sir@cmpwn.com>
Date:   Fri,  9 Apr 2021 13:06:14 -0400

encoding::xml: scan content

Diffstat:
Mencoding/xml/parser.ha | 69++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Mencoding/xml/types.ha | 1+
2 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/encoding/xml/parser.ha b/encoding/xml/parser.ha @@ -40,12 +40,27 @@ export fn parser_free(par: *parser) void = { // Scans for and returns the next [token]. The caller must pass the returned // token to [token_free] when they're done with it. export fn scan(par: *parser) (token | void | error) = { - want(par, OPTWS)?; let rn: rune = match (bufio::scanrune(par.in)?) { - io::EOF => return void, + io::EOF => if (par.state == state::ROOT) { + return syntaxerr; + } else return void, rn: rune => rn, }; + // TODO: + // - Comments + // - Entities/references + // - CDATA + // - Processing Instructions return switch (par.state) { + state::ROOT => switch (rn) { + '<' => { + bufio::unreadrune(par.in, rn); + let el = scan_element(par); + par.state = state::ATTRS; + el; + }, + * => return syntaxerr, + }, state::ELEMENT => switch (rn) { '<' => { bufio::unreadrune(par.in, rn); @@ -53,7 +68,10 @@ export fn scan(par: *parser) (token | void | error) = { par.state = state::ATTRS; el; }, - * => abort(), + * => { + bufio::unreadrune(par.in, rn); + scan_content(par)?; + }, }, state::ATTRS => { if (rn == '/') { @@ -74,7 +92,22 @@ export fn scan(par: *parser) (token | void | error) = { fn scan_element(par: *parser) (token | error) = { want(par, '<')?; - return scan_name(par)?: elementstart; + let close = false; + match (bufio::scanrune(par.in)?) { + io::EOF => return syntaxerr, + rn: rune => switch (rn) { + '/' => close = true, + * => bufio::unreadrune(par.in, rn), + }, + }; + let name = scan_name(par)?; + want(par, OPTWS)?; + if (close) { + free(name); + return elementend; + } else { + return name: elementstart; + }; }; fn scan_attr(par: *parser) (token | error) = { @@ -94,6 +127,7 @@ fn scan_attr(par: *parser) (token | error) = { strio::appendrune(val, rn); }, }; + want(par, OPTWS)?; return (name, strio::finish(val)): attribute; }; @@ -122,6 +156,31 @@ fn scan_name(par: *parser) (str | error) = { return strio::finish(buf); }; +fn scan_content(par: *parser) (text | error) = { + let content = strio::dynamic(); + for (true) match (bufio::scanrune(par.in)?) { + io::EOF => break, + rn: rune => { + switch (rn) { + '<' => { + bufio::unreadrune(par.in, rn); + break; + }, + '&' => { + bufio::unreadrune(par.in, rn); + abort(); // TODO + }, + '%' => { + bufio::unreadrune(par.in, rn); + abort(); // TODO + }, + }; + strio::appendrune(content, rn); + }, + }; + return strio::finish(content); +}; + fn prolog(par: *parser) (void | error) = { want(par, "<?xml", WS)?; @@ -172,7 +231,7 @@ fn prolog(par: *parser) (void | error) = { want(par, "yes", quot)?; }; - want(par, OPTWS, "?>")?; + want(par, OPTWS, "?>", OPTWS)?; // TODO: Parse doctypedecl & misc return; }; diff --git a/encoding/xml/types.ha b/encoding/xml/types.ha @@ -10,6 +10,7 @@ export type parser = struct { }; export type state = enum { + ROOT, ELEMENT, ATTRS, };