commit 59ca2ea3a89c775932adf3267ae06d5012c78774
parent 5a6d7fdb581259bf7c69f1e67c52dd6430405e8f
Author: Drew DeVault <sir@cmpwn.com>
Date: Fri, 9 Apr 2021 13:06:14 -0400
encoding::xml: scan content
Diffstat:
2 files changed, 65 insertions(+), 5 deletions(-)
diff --git a/encoding/xml/parser.ha b/encoding/xml/parser.ha
@@ -40,12 +40,27 @@ export fn parser_free(par: *parser) void = {
// Scans for and returns the next [token]. The caller must pass the returned
// token to [token_free] when they're done with it.
export fn scan(par: *parser) (token | void | error) = {
- want(par, OPTWS)?;
let rn: rune = match (bufio::scanrune(par.in)?) {
- io::EOF => return void,
+ io::EOF => if (par.state == state::ROOT) {
+ return syntaxerr;
+ } else return void,
rn: rune => rn,
};
+ // TODO:
+ // - Comments
+ // - Entities/references
+ // - CDATA
+ // - Processing Instructions
return switch (par.state) {
+ state::ROOT => switch (rn) {
+ '<' => {
+ bufio::unreadrune(par.in, rn);
+ let el = scan_element(par);
+ par.state = state::ATTRS;
+ el;
+ },
+ * => return syntaxerr,
+ },
state::ELEMENT => switch (rn) {
'<' => {
bufio::unreadrune(par.in, rn);
@@ -53,7 +68,10 @@ export fn scan(par: *parser) (token | void | error) = {
par.state = state::ATTRS;
el;
},
- * => abort(),
+ * => {
+ bufio::unreadrune(par.in, rn);
+ scan_content(par)?;
+ },
},
state::ATTRS => {
if (rn == '/') {
@@ -74,7 +92,22 @@ export fn scan(par: *parser) (token | void | error) = {
fn scan_element(par: *parser) (token | error) = {
want(par, '<')?;
- return scan_name(par)?: elementstart;
+ let close = false;
+ match (bufio::scanrune(par.in)?) {
+ io::EOF => return syntaxerr,
+ rn: rune => switch (rn) {
+ '/' => close = true,
+ * => bufio::unreadrune(par.in, rn),
+ },
+ };
+ let name = scan_name(par)?;
+ want(par, OPTWS)?;
+ if (close) {
+ free(name);
+ return elementend;
+ } else {
+ return name: elementstart;
+ };
};
fn scan_attr(par: *parser) (token | error) = {
@@ -94,6 +127,7 @@ fn scan_attr(par: *parser) (token | error) = {
strio::appendrune(val, rn);
},
};
+ want(par, OPTWS)?;
return (name, strio::finish(val)): attribute;
};
@@ -122,6 +156,31 @@ fn scan_name(par: *parser) (str | error) = {
return strio::finish(buf);
};
+fn scan_content(par: *parser) (text | error) = {
+ let content = strio::dynamic();
+ for (true) match (bufio::scanrune(par.in)?) {
+ io::EOF => break,
+ rn: rune => {
+ switch (rn) {
+ '<' => {
+ bufio::unreadrune(par.in, rn);
+ break;
+ },
+ '&' => {
+ bufio::unreadrune(par.in, rn);
+ abort(); // TODO
+ },
+ '%' => {
+ bufio::unreadrune(par.in, rn);
+ abort(); // TODO
+ },
+ };
+ strio::appendrune(content, rn);
+ },
+ };
+ return strio::finish(content);
+};
+
fn prolog(par: *parser) (void | error) = {
want(par, "<?xml", WS)?;
@@ -172,7 +231,7 @@ fn prolog(par: *parser) (void | error) = {
want(par, "yes", quot)?;
};
- want(par, OPTWS, "?>")?;
+ want(par, OPTWS, "?>", OPTWS)?;
// TODO: Parse doctypedecl & misc
return;
};
diff --git a/encoding/xml/types.ha b/encoding/xml/types.ha
@@ -10,6 +10,7 @@ export type parser = struct {
};
export type state = enum {
+ ROOT,
ELEMENT,
ATTRS,
};