commit 9e45fd5b1b66f6a8043ff88ee70f789b0250e037
parent 9f396d7ce8f03206f90e6c8f610e9f3218838924
Author: Drew DeVault <sir@cmpwn.com>
Date: Fri, 28 Jan 2022 10:55:32 +0100
mime: slightly improved implementation of quoted-string
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Diffstat:
2 files changed, 47 insertions(+), 25 deletions(-)
diff --git a/mime/README b/mime/README
@@ -1,3 +1,9 @@
The mime package implements a subset of RFC 2045, namely the subset necessary to
handle parsing and encoding Media Types (formerly "MIME types"), and to map them
with file extensions.
+
+This module implements a "reasonable subset" of the specification which aims to
+address contemporary use-cases of Media Types outside of the broader context of
+the Content-Type header as it's used by emails. The implementation should not
+parse any Media Types which are not conformant, but may not parse all conformant
+Media Types.
diff --git a/mime/parse.ha b/mime/parse.ha
@@ -1,27 +1,11 @@
use ascii;
use errors;
use strings;
+use io; // XXX: TEMP
const tspecial: str = "()<>@,;:\\/[]?=";
export type type_params = strings::tokenizer;
-fn typevalid(in: str) (void | errors::invalid) = {
- const miter = strings::iter(in);
- for (true) {
- const rn = match (strings::next(&miter)) {
- case let rn: rune =>
- yield rn;
- case void =>
- break;
- };
- if (!ascii::isascii(rn) || rn == ' '
- || ascii::iscntrl(rn)
- || strings::contains(tspecial, rn)) {
- return errors::invalid;
- };
- };
-};
-
// Parses a Media Type, returning a tuple of the content type (e.g.
// "text/plain") and a parameter parser object, or [[errors::invalid]] if the
// input cannot be parsed.
@@ -65,17 +49,49 @@ export fn next_param(in: *type_params) ((str, str) | void | errors::invalid) = {
items.1 = strings::trim(items.1);
if (strings::hasprefix(items.1, "\"")) {
- // So you handed mime::next_param a seemingly valid parameter
- // and ended up getting this error instead. Good. That's means
- // you're motivated to deal with this! YOU read the RFC's ABNF
- // grammar and figure out how this should be implemented. I sure
- // as hell don't have the patience for this shitty RFC.
- return errors::invalid;
+ items.1 = quoted(items.1)?;
};
return (items.0, items.1);
};
+fn quoted(in: str) (str | errors::invalid) = {
+ // We have only a basic implementation of quoted-string. It has a couple
+ // of problems:
+ //
+ // 1. The RFC does not define it very well
+ // 2. The parts of the RFC which are ill-defined are rarely used
+ // 3. Implementing quoted-pair would require allocating a new string
+ //
+ // This implementation should handle most Media Types seen in practice
+ // unless they're doing something weird and ill-advised with them.
+ in = strings::sub(in, 1, strings::end);
+ const items = strings::cut(in, "\"");
+ if (strings::contains(in, "\\")
+ || strings::contains(in, "\r")
+ || strings::contains(in, "\n")) {
+ return errors::invalid;
+ };
+ return items.0;
+};
+
+fn typevalid(in: str) (void | errors::invalid) = {
+ const miter = strings::iter(in);
+ for (true) {
+ const rn = match (strings::next(&miter)) {
+ case let rn: rune =>
+ yield rn;
+ case void =>
+ break;
+ };
+ if (!ascii::isascii(rn) || rn == ' '
+ || ascii::iscntrl(rn)
+ || strings::contains(tspecial, rn)) {
+ return errors::invalid;
+ };
+ };
+};
+
@test fn parse() void = {
const res = parse("text/plain")!;
assert(res.0 == "text/plain");
@@ -83,12 +99,12 @@ export fn next_param(in: *type_params) ((str, str) | void | errors::invalid) = {
const res = parse("image/png")!;
assert(res.0 == "image/png");
- const res = parse("application/svg+xml; charset=utf-8; foo=bar")!;
+ const res = parse("application/svg+xml; charset=utf-8; foo=\"bar baz\"")!;
assert(res.0 == "application/svg+xml");
const params = res.1;
const param = next_param(¶ms)! as (str, str);
assert(param.0 == "charset" && param.1 == "utf-8");
const param = next_param(¶ms)! as (str, str);
- assert(param.0 == "foo" && param.1 == "bar");
+ assert(param.0 == "foo" && param.1 == "bar baz");
assert(next_param(¶ms) is void);
};