hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit a335a556ee379d6a3237a6d81aa838522fa9cc66
parent b7158f2188b22737a0d6d347e2fb59a5389049bf
Author: Drew DeVault <sir@cmpwn.com>
Date:   Fri, 28 Jan 2022 10:04:24 +0100

mime: implement internal MIME database

Left some initial pieces in place to support extending this by reading
/etc/mime.types in the future.

Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
Amime/database.ha | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amime/lookup.ha | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 2++
Mstdlib.mk | 4++++
4 files changed, 149 insertions(+), 0 deletions(-)

diff --git a/mime/database.ha b/mime/database.ha @@ -0,0 +1,68 @@ +use hash::fnv; +use strings; + +// A pair of a Media Type and a list of file extensions associated with it. The +// extension list does not include the leading '.' character. +export type mimetype = struct { + mime: str, + exts: []str, +}; + +// List of media types with a statically allocated fields (though the list +// itself is dynamically allocated). +let static_db: []mimetype = []; + +// List of media types with a heap-allocated fields, used when loading mime +// types from the system database. +let heap_db: []mimetype = []; + +def MIME_BUCKETS: size = 256; + +// Hash tables for efficient database lookup by mimetype or extension +let mimetable: [MIME_BUCKETS][]*mimetype = [[]...]; +let exttable: [MIME_BUCKETS][]*mimetype = [[]...]; + +// Registers a Media Type and its extensions in the internal MIME database. This +// function is designed to be used by @init functions for modules which +// implement new Media Types. +export fn register(mime: mimetype...) void = { + let i = len(static_db); + append(static_db, mime...); + for (i < len(static_db); i += 1) { + const item = &static_db[i]; + const hash = fnv::string(item.mime); + let bucket = &mimetable[hash % len(mimetable)]; + append(bucket, item); + + for (let i = 0z; i < len(item.exts); i += 1) { + const hash = fnv::string(item.exts[i]); + let bucket = &exttable[hash % len(exttable)]; + append(bucket, item); + assert(len(bucket) > 0); + }; + }; +}; + +const builtins: [_]mimetype = [ + mimetype { + mime = "text/plain", + exts = ["txt"], + }, + mimetype { + mime = "text/x-hare", + exts = ["ha"], + }, +]; + +@init fn init() void = { + register(builtins...); +}; + +@fini fn fini() void = { + for (let i = 0z; i < len(heap_db); i += 1) { + free(heap_db[i].mime); + strings::freeall(heap_db[i].exts); + }; + free(heap_db); + free(static_db); +}; diff --git a/mime/lookup.ha b/mime/lookup.ha @@ -0,0 +1,75 @@ +use hash::fnv; +use strings; + +// Looks up a Media Type based on the mime type string, returning null if +// unknown. +export fn lookup_mime(mime: str) const nullable *mimetype = { + const hash = fnv::string(mime); + const bucket = &mimetable[hash % len(mimetable)]; + for (let i = 0z; i < len(bucket); i += 1) { + const item = bucket[i]; + if (item.mime == mime) { + return item; + }; + }; + return null; +}; + +@test fn lookup_mime() void = { + assert(lookup_mime("foo/bar") == null); + + const result = lookup_mime("text/plain"); + assert(result != null); + const result = result: *mimetype; + assert(result.mime == "text/plain"); + assert(len(result.exts) == 1); + assert(result.exts[0] == "txt"); + + const result = lookup_mime("text/x-hare"); + assert(result != null); + const result = result: *mimetype; + assert(result.mime == "text/x-hare"); + assert(len(result.exts) == 1); + assert(result.exts[0] == "ha"); +}; + +// Looks up a Media Type based on a file extension, with or without the leading +// '.' character, returning null if unknown. +export fn lookup_ext(ext: str) const nullable *mimetype = { + ext = strings::ltrim(ext, '.'); + const hash = fnv::string(ext); + const bucket = &exttable[hash % len(exttable)]; + for (let i = 0z; i < len(bucket); i += 1) { + const item = bucket[i]; + for (let j = 0z; j < len(item.exts); j += 1) { + if (item.exts[j] == ext) { + return item; + }; + }; + }; + return null; +}; + +@test fn lookup_ext() void = { + assert(lookup_ext("foo") == null); + assert(lookup_ext(".foo") == null); + + const result = lookup_ext("txt"); + assert(result != null); + const result = result: *mimetype; + assert(result.mime == "text/plain"); + assert(len(result.exts) == 1); + assert(result.exts[0] == "txt"); + + const result = lookup_ext(".txt"); + assert(result != null); + const result = result: *mimetype; + assert(result.mime == "text/plain"); + + const result = lookup_ext("ha"); + assert(result != null); + const result = result: *mimetype; + assert(result.mime == "text/x-hare"); + assert(len(result.exts) == 1); + assert(result.exts[0] == "ha"); +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -795,6 +795,8 @@ math() { mime() { gen_srcs mime \ + database.ha \ + lookup.ha \ parse.ha gen_ssa mime ascii errors string } diff --git a/stdlib.mk b/stdlib.mk @@ -1320,6 +1320,8 @@ $(HARECACHE)/math/random/math_random-any.ssa: $(stdlib_math_random_any_srcs) $(s # mime (+any) stdlib_mime_any_srcs= \ + $(STDLIB)/mime/database.ha \ + $(STDLIB)/mime/lookup.ha \ $(STDLIB)/mime/parse.ha $(HARECACHE)/mime/mime-any.ssa: $(stdlib_mime_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_string_$(PLATFORM)) @@ -3172,6 +3174,8 @@ $(TESTCACHE)/math/random/math_random-any.ssa: $(testlib_math_random_any_srcs) $( # mime (+any) testlib_mime_any_srcs= \ + $(STDLIB)/mime/database.ha \ + $(STDLIB)/mime/lookup.ha \ $(STDLIB)/mime/parse.ha $(TESTCACHE)/mime/mime-any.ssa: $(testlib_mime_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_string_$(PLATFORM))