hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit fb804916039bfde1628b08ea7a8bc835e7216f06
parent aade19988b7f9c74ef4643b254b8752d692b4b9c
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sat, 10 Apr 2021 11:00:49 -0400

format::xml: reuse strio buffers

This reduces the number of allocations required to parse a document,
improving performance.

Diffstat:
Mformat/xml/+test.ha | 4++++
Mformat/xml/parser.ha | 54++++++++++++++++++++++++++----------------------------
Mformat/xml/types.ha | 17+++++------------
3 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/format/xml/+test.ha b/format/xml/+test.ha @@ -2,6 +2,8 @@ use io; use bufio; use strings; +use fmt; + @test fn basic() void = { xmltest("<?xml version='1.0' encoding='utf-8' ?> <root> @@ -68,6 +70,8 @@ fn xmltest(input: str, expected: []token) void = { }, at: attribute => { let ex = expected[i] as attribute; + fmt::errorfln("want: '{}'='{}', got: '{}'='{}'", + ex.0, ex.1, at.0, at.1); assert(at.0 == ex.0 && at.1 == ex.1); }, tx: text => { diff --git a/format/xml/parser.ha b/format/xml/parser.ha @@ -40,6 +40,9 @@ export fn parse(in: *io::stream) (*parser | error) = { let par = alloc(parser { orig = in, in = in, + namebuf = strio::dynamic(), + entbuf = strio::dynamic(), + textbuf = strio::dynamic(), ... }); if (!bufio::isbuffered(in)) { @@ -55,11 +58,15 @@ export fn parser_free(par: *parser) void = { if (par.in != par.orig) { io::close(par.in); }; + io::close(par.namebuf); + io::close(par.entbuf); + io::close(par.textbuf); free(par); }; -// Scans for and returns the next [token]. The caller must pass the returned -// token to [token_free] when they're done with it. +// Scans for and returns the next [token]. Tokens are borrowed from the parser +// and are not valid on subsequent calls to [scan]; use [token_dup] to extend +// their lifetime. export fn scan(par: *parser) (token | void | error) = { switch (par.state) { state::ROOT, state::ATTRS => want(par, OPTWS)?, @@ -117,10 +124,10 @@ export fn scan(par: *parser) (token | void | error) = { }; fn scan_attr(par: *parser) (token | error) = { - let name = scan_name(par)?; + let name = scan_name(par, par.namebuf)?; want(par, OPTWS, '=', OPTWS); let quot = quote(par)?; - let val = strio::dynamic(); + strio::reset(par.textbuf); for (true) match (bufio::scanrune(par.in)?) { io::EOF => return syntaxerr, rn: rune => { @@ -133,10 +140,10 @@ fn scan_attr(par: *parser) (token | error) = { * => rn, }; if (rn == quot) break; - strio::appendrune(val, rn); + strio::appendrune(par.textbuf, rn); }, }; - return (name, strio::finish(val)): attribute; + return (name, strio::string(par.textbuf)): attribute; }; fn scan_comment(par: *parser) (token | void | error) = { @@ -175,7 +182,7 @@ fn scan_comment(par: *parser) (token | void | error) = { }; fn scan_content(par: *parser) (text | error) = { - let content = strio::dynamic(); + strio::reset(par.textbuf); for (true) match (bufio::scanrune(par.in)?) { io::EOF => break, rn: rune => { @@ -189,10 +196,10 @@ fn scan_content(par: *parser) (text | error) = { scan_entity(par)?; }, }; - strio::appendrune(content, rn); + strio::appendrune(par.textbuf, rn); }, }; - return strio::finish(content); + return strio::string(par.textbuf); }; fn scan_element(par: *parser) (token | error) = { @@ -205,13 +212,8 @@ fn scan_element(par: *parser) (token | error) = { * => bufio::unreadrune(par.in, rn), }, }; - let name = scan_name(par)?; - if (close) { - free(name); - return elementend; - } else { - return name: elementstart; - }; + let name = scan_name(par, par.namebuf)?; + return if (close) elementend else name: elementstart; }; fn scan_entity(par: *parser) (rune | error) = { @@ -239,33 +241,31 @@ fn scan_charref(par: *parser) (rune | error) = { } else bufio::unreadrune(par.in, rn), }; - let buf = strio::dynamic(); - defer io::close(buf); + strio::reset(par.entbuf); for (true) { let rn = match (bufio::scanrune(par.in)?) { io::EOF => return syntaxerr, rn: rune => rn, }; if (ascii::isdigit(rn)) { - strio::appendrune(buf, rn); + strio::appendrune(par.entbuf, rn); } else if (rn == ';') { break; } else { return syntaxerr; }; }; - if (len(strio::string(buf)) == 0) { + if (len(strio::string(par.entbuf)) == 0) { return syntaxerr; }; - return match (strconv::stou32b(strio::string(buf), base)) { + return match (strconv::stou32b(strio::string(par.entbuf), base)) { u: u32 => u: rune, (strconv::invalid | strconv::overflow) => syntaxerr, }; }; fn scan_namedent(par: *parser) (rune | error) = { - let name = scan_name(par)?; - defer free(name); + let name = scan_name(par, par.entbuf)?; want(par, ';')?; let map = [ ("lt", '<'), @@ -284,8 +284,8 @@ fn scan_namedent(par: *parser) (rune | error) = { return syntaxerr; }; -fn scan_name(par: *parser) (str | error) = { - let buf = strio::dynamic(); +fn scan_name(par: *parser, buf: *io::stream) (str | error) = { + strio::reset(buf); const rn = match (bufio::scanrune(par.in)?) { io::EOF => return syntaxerr, @@ -306,7 +306,7 @@ fn scan_name(par: *parser) (str | error) = { }, }; - return strio::finish(buf); + return strio::string(buf); }; fn scan_pi(par: *parser) (void | error) = { @@ -341,7 +341,6 @@ fn prolog(par: *parser) (void | error) = { }; if (encoding) { let attr = scan_attr(par)? as attribute; - defer token_free(attr); if (attr.0 != "encoding") { return syntaxerr; }; @@ -362,7 +361,6 @@ fn prolog(par: *parser) (void | error) = { }; if (standalone) { let attr = scan_attr(par)? as attribute; - defer token_free(attr); if (attr.0 != "standalone") { return syntaxerr; }; diff --git a/format/xml/types.ha b/format/xml/types.ha @@ -7,6 +7,10 @@ export type parser = struct { in: *io::stream, buf: [4096]u8, state: state, + // strio buffers: + namebuf: *io::stream, + entbuf: *io::stream, + textbuf: *io::stream, }; export type state = enum { @@ -45,15 +49,4 @@ export fn strerror(err: error) const str = { }; }; -// Frees resources associated with a [token]. -export fn token_free(tok: token) void = { - match (tok) { - el: elementstart => free(el), - attr: attribute => { - free(attr.0); - free(attr.1); - }, - tx: text => free(tx), - elementend => void, - }; -}; +export fn token_dup(tok: token) void = abort(); // TODO