hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit e7752e7e1996c6b387a19a3b989976ab3fd76f75
parent eb4fced7dfc80539b1c04a1cbf95210754850328
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sat, 29 Jan 2022 11:22:37 +0100

mime: load system database

This also corrects a use-after-free error with the storage tables by
switching to slices of pointers rather than slices of values.

Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
Mmime/database.ha | 49++++++++++++++++++++++++++-----------------------
Mmime/lookup.ha | 1+
Amime/system.ha | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mos/+linux/fs.ha | 1+
Mscripts/gen-stdlib | 9++++++---
Mstdlib.mk | 36------------------------------------
6 files changed, 110 insertions(+), 62 deletions(-)

diff --git a/mime/database.ha b/mime/database.ha @@ -10,11 +10,11 @@ export type mimetype = struct { // List of media types with statically allocated fields (though the list itself // is dynamically allocated). -let static_db: []mimetype = []; +let static_db: []*mimetype = []; // List of media types with heap-allocated fields, used when loading mime types // from the system database. -let heap_db: []mimetype = []; +let heap_db: []*mimetype = []; def MIME_BUCKETS: size = 256; @@ -25,42 +25,45 @@ let exttable: [MIME_BUCKETS][]*mimetype = [[]...]; // Registers a Media Type and its extensions in the internal MIME database. This // function is designed to be used by @init functions for modules which // implement new Media Types. -export fn register(mime: mimetype...) void = { +export fn register(mime: *mimetype...) void = { let i = len(static_db); append(static_db, mime...); for (i < len(static_db); i += 1) { - const item = &static_db[i]; - const hash = fnv::string(item.mime); - let bucket = &mimetable[hash % len(mimetable)]; - append(bucket, item); + hashtable_insert(static_db[i]); + }; +}; + +fn hashtable_insert(item: *mimetype) void = { + const hash = fnv::string(item.mime); + let bucket = &mimetable[hash % len(mimetable)]; + append(bucket, item); - for (let i = 0z; i < len(item.exts); i += 1) { - const hash = fnv::string(item.exts[i]); - let bucket = &exttable[hash % len(exttable)]; - append(bucket, item); - }; + for (let i = 0z; i < len(item.exts); i += 1) { + const hash = fnv::string(item.exts[i]); + let bucket = &exttable[hash % len(exttable)]; + append(bucket, item); }; }; -const builtins: [_]mimetype = [ - mimetype { - mime = "text/plain", - exts = ["txt"], - }, - mimetype { - mime = "text/x-hare", - exts = ["ha"], - }, -]; +const text_plain: mimetype = mimetype { + mime = "text/plain", + exts = ["txt"], +}; + +const text_hare: mimetype = mimetype { + mime = "text/x-hare", + exts = ["ha"], +}; @init fn init() void = { - register(builtins...); + register(&text_plain, &text_hare); }; @fini fn fini() void = { for (let i = 0z; i < len(heap_db); i += 1) { free(heap_db[i].mime); strings::freeall(heap_db[i].exts); + free(heap_db[i]); }; free(heap_db); free(static_db); diff --git a/mime/lookup.ha b/mime/lookup.ha @@ -1,5 +1,6 @@ use hash::fnv; use strings; +use io; // XXX: TEMP // Looks up a Media Type based on the mime type string, returning null if // unknown. diff --git a/mime/system.ha b/mime/system.ha @@ -0,0 +1,76 @@ +use bufio; +use errors; +use encoding::utf8; +use fs; +use io; +use os; +use strings; + +// Path to the system MIME database. +export def SYSTEM_DB: str = "/etc/mime.types"; + +@init fn init() void = { + // Done in a separate function so we can discard errors here + load_systemdb(): void; +}; + +fn load_systemdb() (void | fs::error | io::error) = { + const file = os::open(SYSTEM_DB)?; + defer io::close(file); + + let buf: [os::BUFSIZ]u8 = [0...]; + const file = bufio::buffered(file, buf, []); + defer io::close(&file); + + for (true) { + const line = match (bufio::scanline(&file)?) { + case let bytes: []u8 => + yield match (strings::try_fromutf8(bytes)) { + case utf8::invalid => + io::errorln("Warning: /etc/mime.types contains invalid UTF-8"); + return; + case let s: str => + yield s; + }; + case io::EOF => + break; + }; + defer free(line); + + const line = strings::trim(line); + if (strings::hasprefix(line, "#") || len(line) == 0) { + continue; + }; + + const items = strings::cut(line, "\t"); + const mime = strings::trim(items.0), + exts = strings::trim(items.1); + if (len(exts) == 0) { + continue; + }; + + const tok = strings::tokenize(exts, " "); + let entry = alloc(mimetype { + mime = strings::dup(mime), + exts = [], + }); + for (true) { + const ext = match (strings::next_token(&tok)) { + case let tok: str => + yield strings::trim(tok); + case void => + break; + }; + append(entry.exts, strings::dup(ext)); + }; + register_heap(entry); + }; +}; + +fn register_heap(mime: *mimetype...) void = { + let i = len(heap_db); + append(heap_db, mime...); + for (i < len(heap_db); i += 1) { + hashtable_insert(heap_db[i]); + }; +}; diff --git a/os/+linux/fs.ha b/os/+linux/fs.ha @@ -3,6 +3,7 @@ use fs; use path; use rt; use strings; +use io; @init fn init() void = { static let cwd_fs = os_filesystem { ... }; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -794,11 +794,15 @@ math() { } mime() { + # This module is not built by default because gen-stdlib does not do a good + # job of resolving @init dependency ordering issues gen_srcs mime \ database.ha \ lookup.ha \ - parse.ha - gen_ssa mime ascii errors string + parse.ha \ + system.ha + gen_ssa mime ascii errors string hash::fnv encoding::utf8 bufio \ + errors fs io os } net() { @@ -1216,7 +1220,6 @@ linux::io_uring linux linux::vdso linux math math::random -mime net linux freebsd net::dial net::dns diff --git a/stdlib.mk b/stdlib.mk @@ -444,12 +444,6 @@ stdlib_deps_any+=$(stdlib_math_random_any) stdlib_math_random_linux=$(stdlib_math_random_any) stdlib_math_random_freebsd=$(stdlib_math_random_any) -# gen_lib mime (any) -stdlib_mime_any=$(HARECACHE)/mime/mime-any.o -stdlib_deps_any+=$(stdlib_mime_any) -stdlib_mime_linux=$(stdlib_mime_any) -stdlib_mime_freebsd=$(stdlib_mime_any) - # gen_lib net (linux) stdlib_net_linux=$(HARECACHE)/net/net-linux.o stdlib_deps_linux+=$(stdlib_net_linux) @@ -1318,18 +1312,6 @@ $(HARECACHE)/math/random/math_random-any.ssa: $(stdlib_math_random_any_srcs) $(s @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nmath::random \ -t$(HARECACHE)/math/random/math_random.td $(stdlib_math_random_any_srcs) -# mime (+any) -stdlib_mime_any_srcs= \ - $(STDLIB)/mime/database.ha \ - $(STDLIB)/mime/lookup.ha \ - $(STDLIB)/mime/parse.ha - -$(HARECACHE)/mime/mime-any.ssa: $(stdlib_mime_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_string_$(PLATFORM)) - @printf 'HAREC \t$@\n' - @mkdir -p $(HARECACHE)/mime - @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nmime \ - -t$(HARECACHE)/mime/mime.td $(stdlib_mime_any_srcs) - # net (+linux) stdlib_net_linux_srcs= \ $(STDLIB)/net/+linux.ha \ @@ -2262,12 +2244,6 @@ testlib_deps_any+=$(testlib_math_random_any) testlib_math_random_linux=$(testlib_math_random_any) testlib_math_random_freebsd=$(testlib_math_random_any) -# gen_lib mime (any) -testlib_mime_any=$(TESTCACHE)/mime/mime-any.o -testlib_deps_any+=$(testlib_mime_any) -testlib_mime_linux=$(testlib_mime_any) -testlib_mime_freebsd=$(testlib_mime_any) - # gen_lib net (linux) testlib_net_linux=$(TESTCACHE)/net/net-linux.o testlib_deps_linux+=$(testlib_net_linux) @@ -3172,18 +3148,6 @@ $(TESTCACHE)/math/random/math_random-any.ssa: $(testlib_math_random_any_srcs) $( @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nmath::random \ -t$(TESTCACHE)/math/random/math_random.td $(testlib_math_random_any_srcs) -# mime (+any) -testlib_mime_any_srcs= \ - $(STDLIB)/mime/database.ha \ - $(STDLIB)/mime/lookup.ha \ - $(STDLIB)/mime/parse.ha - -$(TESTCACHE)/mime/mime-any.ssa: $(testlib_mime_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_string_$(PLATFORM)) - @printf 'HAREC \t$@\n' - @mkdir -p $(TESTCACHE)/mime - @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nmime \ - -t$(TESTCACHE)/mime/mime.td $(testlib_mime_any_srcs) - # net (+linux) testlib_net_linux_srcs= \ $(STDLIB)/net/+linux.ha \