commit 27587d0fe37b83184f2b911d6ba2dfbd6ae5b756
parent bcb025aee2b0a2957f715b3727a8ad86fbaf0b7a
Author: Drew DeVault <sir@cmpwn.com>
Date: Sat, 10 Apr 2021 10:07:45 -0400
format::xml: leave comments at deliberate omissions
Diffstat:
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/format/xml/parser.ha b/format/xml/parser.ha
@@ -17,6 +17,10 @@ use encoding::utf8;
use io;
use strings;
use strio;
+// Are you an intrepid programmer seeking to fork this module to create a more
+// sophisticated XML parser supporting a broader set of features? Good news: all
+// of the features you need to implement are annotated throughout with
+// "XXX: Deliberate ommission" comments.
// Returns an XML parser which reads from a stream. The caller must call
// [parser_free] when they are finished with it.
@@ -116,10 +120,13 @@ fn scan_attr(par: *parser) (token | error) = {
for (true) match (bufio::scanrune(par.in)?) {
io::EOF => return syntaxerr,
rn: rune => {
- switch (rn) {
+ rn = switch (rn) {
'<' => return syntaxerr,
- '&' => abort(), // TODO: Entities
- * => void,
+ '&' => {
+ bufio::unreadrune(par.in, rn);
+ scan_entity(par)?;
+ },
+ * => rn,
};
if (rn == quot) break;
strio::appendrune(val, rn);
@@ -175,11 +182,13 @@ fn scan_content(par: *parser) (text | error) = {
},
'&' => {
bufio::unreadrune(par.in, rn);
- abort(); // TODO: Entities
+ scan_entity(par);
},
'%' => {
bufio::unreadrune(par.in, rn);
- abort(); // TODO: Parameter entities
+ // XXX: Deliberate omission: parameter
+ // entities
+ return syntaxerr;
},
};
strio::appendrune(content, rn);
@@ -207,6 +216,12 @@ fn scan_element(par: *parser) (token | error) = {
};
};
+fn scan_entity(par: *parser) (rune | error) = {
+ // XXX: Deliberate ommission: this only supports the pre-defined
+ // entities as defined by XML 1.0 (Fifth Edition) section 4.6.
+ abort(); // TODO
+};
+
fn scan_name(par: *parser) (str | error) = {
let buf = strio::dynamic();
@@ -237,6 +252,9 @@ fn scan_pi(par: *parser) (void | error) = {
};
fn prolog(par: *parser) (void | error) = {
+ // XXX: Deliberate omission(s):
+ // - UTF-8 BOM detection
+ // - UTF-16 support
want(par, "<?xml", WS)?;
want(par, "version", OPTWS, '=', OPTWS)?;
@@ -265,6 +283,7 @@ fn prolog(par: *parser) (void | error) = {
if (attr.0 != "encoding") {
return syntaxerr;
};
+ // XXX: Deliberate omission: all values other than utf-8
match (ascii::strcasecmp(attr.1, "utf-8")) {
void => return utf8::invalid,
n: int => if (n != 0) return utf8::invalid,
@@ -285,9 +304,10 @@ fn prolog(par: *parser) (void | error) = {
if (attr.0 != "standalone") {
return syntaxerr;
};
+ // XXX: Deliberate omission: non-standalone documents
match (ascii::strcasecmp(attr.1, "yes")) {
void => return syntaxerr,
- n: int => if (n != 0) return syntaxerr, // TODO?
+ n: int => if (n != 0) return syntaxerr,
};
};