commit 435b45c377229d95100b58392cdbb1930f64db81
parent 57a723a2d911043b76171ff3712528be806aaa07
Author: Drew DeVault <sir@cmpwn.com>
Date: Thu, 13 Jan 2022 16:16:05 +0100
mime: new module
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Diffstat:
4 files changed, 169 insertions(+), 33 deletions(-)
diff --git a/mime/README b/mime/README
@@ -0,0 +1,3 @@
+The mime package implements a subset of RFC 2045, namely the subset necessary to
+handle parsing and encoding Media Types (formerly "MIME types"), and to map them
+with file extensions.
diff --git a/mime/parse.ha b/mime/parse.ha
@@ -0,0 +1,94 @@
+use ascii;
+use errors;
+use strings;
+
+const tspecial: str = "()<>@,;:\\/[]?=";
+export type type_params = strings::tokenizer;
+
+fn typevalid(in: str) (void | errors::invalid) = {
+ const miter = strings::iter(in);
+ for (true) {
+ const rn = match (strings::next(&miter)) {
+ case let rn: rune =>
+ yield rn;
+ case void =>
+ break;
+ };
+ if (!ascii::isascii(rn) || rn == ' '
+ || ascii::iscntrl(rn)
+ || strings::contains(tspecial, rn)) {
+ return errors::invalid;
+ };
+ };
+};
+
+// Parses a Media Type, returning a tuple of the content type (e.g.
+// "text/plain") and a parameter parser object, or [[errors::invalid]] if the
+// input cannot be parsed.
+//
+// To enumerate the Media Type parameter list, pass the type_params object into
+// [[next_param]]. If you do not need the parameter list, you can safely discard
+// the object. Note that any format errors following the ";" token will not
+// cause [[errors::invalid]] to be returned unless [[next_param]] is used to
+// enumerate all of the parameters.
+export fn parse(in: str) ((str, type_params) | errors::invalid) = {
+ const items = strings::cut(in, ";");
+ const mtype = items.0, params = items.1;
+ const items = strings::cut(mtype, "/");
+ if (len(items.0) < 1 || len(items.1) < 1) {
+ return errors::invalid;
+ };
+ typevalid(items.0)?;
+ typevalid(items.1)?;
+ return (mtype, strings::tokenize(params, ";"));
+};
+
+// Returns the next parameter as a (key, value) tuple from a [[type_params]]
+// object that was prepared via [[parse]], void if there are no remaining
+// parameters, and [[errors::invalid]] if a syntax error was encountered.
+export fn next_param(in: *type_params) ((str, str) | void | errors::invalid) = {
+ const tok = match (strings::next_token(in)) {
+ case let s: str =>
+ if (s == "") {
+ // empty parameter
+ return errors::invalid;
+ };
+ yield s;
+ case void =>
+ return;
+ };
+
+ const items = strings::cut(tok, "=");
+ // The RFC does not permit whitespace here, but whitespace is very
+ // common in the wild. ¯\_(ツ)_/¯
+ items.0 = strings::trim(items.0);
+ items.1 = strings::trim(items.1);
+
+ if (strings::hasprefix(items.1, "\"")) {
+ // So you handed mime::next_param a seemingly valid parameter
+ // and ended up getting this error instead. Good. That's means
+ // you're motivated to deal with this! YOU read the RFC's ABNF
+ // grammar and figure out how this should be implemented. I sure
+ // as hell don't have the patience for this shitty RFC.
+ return errors::invalid;
+ };
+
+ return (items.0, items.1);
+};
+
+@test fn parse() void = {
+ const res = parse("text/plain")!;
+ assert(res.0 == "text/plain");
+
+ const res = parse("image/png")!;
+ assert(res.0 == "image/png");
+
+ const res = parse("application/svg+xml; charset=utf-8; foo=bar")!;
+ assert(res.0 == "application/svg+xml");
+ const params = res.1;
+ const param = next_param(¶ms)! as (str, str);
+ assert(param.0 == "charset" && param.1 == "utf-8");
+ const param = next_param(¶ms)! as (str, str);
+ assert(param.0 == "foo" && param.1 == "bar");
+ assert(next_param(¶ms) is void);
+};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -781,6 +781,12 @@ math() {
gen_ssa math types
}
+mime() {
+ gen_srcs mime \
+ parse.ha
+ gen_ssa mime ascii errors string
+}
+
net() {
gen_srcs -plinux net \
+linux.ha \
@@ -1194,6 +1200,8 @@ linux::signalfd linux
linux::io_uring linux
linux::vdso linux
math
+math::random
+mime
net linux freebsd
net::dial
net::dns
@@ -1201,7 +1209,6 @@ net::ip linux freebsd
net::tcp linux freebsd
net::udp linux freebsd
net::unix linux freebsd
-math::random
os linux freebsd
os::exec linux freebsd
path
diff --git a/stdlib.mk b/stdlib.mk
@@ -432,6 +432,18 @@ stdlib_deps_any+=$(stdlib_math_any)
stdlib_math_linux=$(stdlib_math_any)
stdlib_math_freebsd=$(stdlib_math_any)
+# gen_lib math::random (any)
+stdlib_math_random_any=$(HARECACHE)/math/random/math_random-any.o
+stdlib_deps_any+=$(stdlib_math_random_any)
+stdlib_math_random_linux=$(stdlib_math_random_any)
+stdlib_math_random_freebsd=$(stdlib_math_random_any)
+
+# gen_lib mime (any)
+stdlib_mime_any=$(HARECACHE)/mime/mime-any.o
+stdlib_deps_any+=$(stdlib_mime_any)
+stdlib_mime_linux=$(stdlib_mime_any)
+stdlib_mime_freebsd=$(stdlib_mime_any)
+
# gen_lib net (linux)
stdlib_net_linux=$(HARECACHE)/net/net-linux.o
stdlib_deps_linux+=$(stdlib_net_linux)
@@ -484,12 +496,6 @@ stdlib_deps_linux+=$(stdlib_net_unix_linux)
stdlib_net_unix_freebsd=$(HARECACHE)/net/unix/net_unix-freebsd.o
stdlib_deps_freebsd+=$(stdlib_net_unix_freebsd)
-# gen_lib math::random (any)
-stdlib_math_random_any=$(HARECACHE)/math/random/math_random-any.o
-stdlib_deps_any+=$(stdlib_math_random_any)
-stdlib_math_random_linux=$(stdlib_math_random_any)
-stdlib_math_random_freebsd=$(stdlib_math_random_any)
-
# gen_lib os (linux)
stdlib_os_linux=$(HARECACHE)/os/os-linux.o
stdlib_deps_linux+=$(stdlib_os_linux)
@@ -1285,6 +1291,26 @@ $(HARECACHE)/math/math-any.ssa: $(stdlib_math_any_srcs) $(stdlib_rt) $(stdlib_ty
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nmath \
-t$(HARECACHE)/math/math.td $(stdlib_math_any_srcs)
+# math::random (+any)
+stdlib_math_random_any_srcs= \
+ $(STDLIB)/math/random/random.ha
+
+$(HARECACHE)/math/random/math_random-any.ssa: $(stdlib_math_random_any_srcs) $(stdlib_rt)
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(HARECACHE)/math/random
+ @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nmath::random \
+ -t$(HARECACHE)/math/random/math_random.td $(stdlib_math_random_any_srcs)
+
+# mime (+any)
+stdlib_mime_any_srcs= \
+ $(STDLIB)/mime/parse.ha
+
+$(HARECACHE)/mime/mime-any.ssa: $(stdlib_mime_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_string_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(HARECACHE)/mime
+ @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nmime \
+ -t$(HARECACHE)/mime/mime.td $(stdlib_mime_any_srcs)
+
# net (+linux)
stdlib_net_linux_srcs= \
$(STDLIB)/net/+linux.ha \
@@ -1438,16 +1464,6 @@ $(HARECACHE)/net/unix/net_unix-freebsd.ssa: $(stdlib_net_unix_freebsd_srcs) $(st
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nnet::unix \
-t$(HARECACHE)/net/unix/net_unix.td $(stdlib_net_unix_freebsd_srcs)
-# math::random (+any)
-stdlib_math_random_any_srcs= \
- $(STDLIB)/math/random/random.ha
-
-$(HARECACHE)/math/random/math_random-any.ssa: $(stdlib_math_random_any_srcs) $(stdlib_rt)
- @printf 'HAREC \t$@\n'
- @mkdir -p $(HARECACHE)/math/random
- @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nmath::random \
- -t$(HARECACHE)/math/random/math_random.td $(stdlib_math_random_any_srcs)
-
# os (+linux)
stdlib_os_linux_srcs= \
$(STDLIB)/os/+linux/environ.ha \
@@ -2215,6 +2231,18 @@ testlib_deps_any+=$(testlib_math_any)
testlib_math_linux=$(testlib_math_any)
testlib_math_freebsd=$(testlib_math_any)
+# gen_lib math::random (any)
+testlib_math_random_any=$(TESTCACHE)/math/random/math_random-any.o
+testlib_deps_any+=$(testlib_math_random_any)
+testlib_math_random_linux=$(testlib_math_random_any)
+testlib_math_random_freebsd=$(testlib_math_random_any)
+
+# gen_lib mime (any)
+testlib_mime_any=$(TESTCACHE)/mime/mime-any.o
+testlib_deps_any+=$(testlib_mime_any)
+testlib_mime_linux=$(testlib_mime_any)
+testlib_mime_freebsd=$(testlib_mime_any)
+
# gen_lib net (linux)
testlib_net_linux=$(TESTCACHE)/net/net-linux.o
testlib_deps_linux+=$(testlib_net_linux)
@@ -2267,12 +2295,6 @@ testlib_deps_linux+=$(testlib_net_unix_linux)
testlib_net_unix_freebsd=$(TESTCACHE)/net/unix/net_unix-freebsd.o
testlib_deps_freebsd+=$(testlib_net_unix_freebsd)
-# gen_lib math::random (any)
-testlib_math_random_any=$(TESTCACHE)/math/random/math_random-any.o
-testlib_deps_any+=$(testlib_math_random_any)
-testlib_math_random_linux=$(testlib_math_random_any)
-testlib_math_random_freebsd=$(testlib_math_random_any)
-
# gen_lib os (linux)
testlib_os_linux=$(TESTCACHE)/os/os-linux.o
testlib_deps_linux+=$(testlib_os_linux)
@@ -3103,6 +3125,26 @@ $(TESTCACHE)/math/math-any.ssa: $(testlib_math_any_srcs) $(testlib_rt) $(testlib
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nmath \
-t$(TESTCACHE)/math/math.td $(testlib_math_any_srcs)
+# math::random (+any)
+testlib_math_random_any_srcs= \
+ $(STDLIB)/math/random/random.ha
+
+$(TESTCACHE)/math/random/math_random-any.ssa: $(testlib_math_random_any_srcs) $(testlib_rt)
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(TESTCACHE)/math/random
+ @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nmath::random \
+ -t$(TESTCACHE)/math/random/math_random.td $(testlib_math_random_any_srcs)
+
+# mime (+any)
+testlib_mime_any_srcs= \
+ $(STDLIB)/mime/parse.ha
+
+$(TESTCACHE)/mime/mime-any.ssa: $(testlib_mime_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_string_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(TESTCACHE)/mime
+ @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nmime \
+ -t$(TESTCACHE)/mime/mime.td $(testlib_mime_any_srcs)
+
# net (+linux)
testlib_net_linux_srcs= \
$(STDLIB)/net/+linux.ha \
@@ -3258,16 +3300,6 @@ $(TESTCACHE)/net/unix/net_unix-freebsd.ssa: $(testlib_net_unix_freebsd_srcs) $(t
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nnet::unix \
-t$(TESTCACHE)/net/unix/net_unix.td $(testlib_net_unix_freebsd_srcs)
-# math::random (+any)
-testlib_math_random_any_srcs= \
- $(STDLIB)/math/random/random.ha
-
-$(TESTCACHE)/math/random/math_random-any.ssa: $(testlib_math_random_any_srcs) $(testlib_rt)
- @printf 'HAREC \t$@\n'
- @mkdir -p $(TESTCACHE)/math/random
- @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nmath::random \
- -t$(TESTCACHE)/math/random/math_random.td $(testlib_math_random_any_srcs)
-
# os (+linux)
testlib_os_linux_srcs= \
$(STDLIB)/os/+linux/environ.ha \