hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit baf3b6bdad164b9e05ced89eda3cdb9e8ff087b8
parent e15431c81b810cbf8040832fd43173585ec350cb
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sat,  8 Jan 2022 11:00:10 +0100

pathbuffer: merge into path

Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
Mpath/README | 25+++++++++++++++++++++++++
Apath/buffer.ha | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mpath/join.ha | 105+++++++++++++++++++++++++++++--------------------------------------------------
Dpathbuf/README | 24------------------------
Dpathbuf/buffer.ha | 127-------------------------------------------------------------------------------
Dpathbuf/ops.ha | 40----------------------------------------
Mscripts/gen-stdlib | 9+--------
Mstdlib.mk | 36++----------------------------------
8 files changed, 168 insertions(+), 300 deletions(-)

diff --git a/path/README b/path/README @@ -3,3 +3,28 @@ The path module provides utilities for working with filesystem paths. Note that Hare expects paths to be valid UTF-8 strings. If you require the use of non-UTF-8 paths (ideally for only as long as it takes to delete or rename those files), see the low-level functions available from [[rt]]. + +Use of the [[buffer]] type is recommended for efficient and consistent +manipulation of filesystem paths. + + let buf = path::init(); + path::add(&buf, "/", "foo", "bar", "baz.txt"); + io::println(path::string(&buf)); // "/foo/bar/baz.txt" + + path::add(&buf, "../.././hello.txt"); + io::println(path::string(&buf)); // "/foo/hello.txt" + +The buffer object includes an array of length [[PATH_MAX]], which can be +somewhat large; on Linux it's 4096 bytes. You can allocate this on the stack in +most cases, but you may prefer to allocate it elsewhere depending on your needs. + + // Stack allocated + let buf = path::init(); + + // Statically allocated + static let buf = path::buffer { ... }; + pathbuf::reset(&buf); + + // Heap allocated + let buf = alloc(path::init()); + defer free(buf); diff --git a/path/buffer.ha b/path/buffer.ha @@ -0,0 +1,102 @@ +use bytes; +use errors; +use strings; + +export type buffer = struct { + buf: [PATH_MAX]u8, + cur: []u8, +}; + +// Initializes a new path buffer. +export fn init() buffer = { + let buf = buffer { ... }; + reset(&buf); + return buf; +}; + +// Resets a path buffer to its initial state. +export fn reset(buf: *buffer) void = { + buf.cur = buf.buf[..0]; +}; + +// Creates a copy of another path buffer, which can be modified without +// affecting the original. +export fn dup(buf: *buffer) buffer = { + let new = buffer { ... }; + new.buf[..] = buf.buf[..]; + new.cur = new.buf[..0]; + return new; +}; + +// Returns the current path stored in this buffer. The path will always be +// normalized, which is to say that it will not include any of the following: +// +// - "." components +// - Redundant ".." components +// - Repeated path separators +// +// "/usr//bin/../bin/./hare" becomes "/usr/bin/hare" and "../../foo/bar" is +// unchanged. +export fn string(buf: *buffer) str = { + return strings::fromutf8_unsafe(buf.cur); +}; + +// Normalizes and appends a path component to a buffer. +// +// Invariant: elem must either be equal to [PATHSEP], or contain no path +// separators. +fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = { + const elem = match (elem) { + case let elem: []u8 => + yield elem; + case let string: str => + yield strings::toutf8(string); + }; + if (len(elem) == 1 && elem[0] == PATHSEP) { + if (len(buf.cur) == 0) { + static append(buf.cur, PATHSEP); + return; + }; + return; + } else if (bytes::equal(elem, ['.': u8])) { + return; + } else if (bytes::equal(elem, ['.': u8, '.': u8])) { + abort(); // TODO + }; + if (len(buf.cur) + len(elem) + 1 >= PATH_MAX) { + return errors::overflow; + }; + if (len(buf.cur) > 1 && buf.cur[len(buf.cur) - 1] != PATHSEP) { + static append(buf.cur, PATHSEP); + }; + static append(buf.cur, elem...); +}; + +@test fn appendnorm() void = { + let buf = init(); + assert(string(&buf) == ""); + appendnorm(&buf, "foo")!; + appendnorm(&buf, "bar")!; + appendnorm(&buf, "baz")!; + assert(string(&buf) == "foo/bar/baz"); + appendnorm(&buf, ".")!; + appendnorm(&buf, "bad")!; + appendnorm(&buf, ".")!; + assert(string(&buf) == "foo/bar/baz/bad"); + + let buf = init(); + appendnorm(&buf, "/")!; + appendnorm(&buf, "foo")!; + appendnorm(&buf, "bar")!; + appendnorm(&buf, "baz")!; + assert(string(&buf) == "/foo/bar/baz"); + appendnorm(&buf, "/")!; + appendnorm(&buf, "/")!; + assert(string(&buf) == "/foo/bar/baz"); + + let buf = init(); + appendnorm(&buf, "/")!; + appendnorm(&buf, "/")!; + appendnorm(&buf, "/")!; + assert(string(&buf) == "/"); +}; diff --git a/path/join.ha b/path/join.ha @@ -1,78 +1,49 @@ use bytes; -use bufio; +use errors; use strings; -use io; -// Joins together several path components with the path separator. The caller -// must free the return value. -export fn join(paths: str...) str = { - // TODO: Normalize inputs so that if they end with a / we don't double - // up on delimiters - let sink = bufio::dynamic(io::mode::WRITE); - let utf8 = true; - for (let i = 0z; i < len(paths); i += 1) { - const buf = strings::toutf8(paths[i]); - if (len(buf) == 0) { - continue; // Empty path component, discard - } else if (len(buf) == 1 && buf[0] == PATHSEP && i == 0) { - // "/" as the first component - io::write(&sink, [PATHSEP])!; - continue; - }; - - // Trim away trailing PATHSEPs, if present - let l = len(buf); - for (l > 0 && buf[l - 1] == PATHSEP) { - l -= 1; - }; - - for (let q = 0z; q < l) { - let w = io::write(&sink, buf[q..l]) as size; - q += w; - }; - if (i + 1 < len(paths)) { - assert(io::write(&sink, [PATHSEP]) as size == 1); +// Joins several path elements together and appends them to a path buffer. +export fn add(buf: *buffer, items: str...) (void | errors::overflow) = { + for (let i = 0z; i < len(items); i += 1) { + const elem = strings::toutf8(items[i]); + const tok = bytes::tokenize(elem, pathsep); + for (let j = 0z; true; j += 1) { + const next = match (bytes::next_token(&tok)) { + case let tok: []u8 => + yield tok; + case void => + break; + }; + if (len(next) == 0 && j == 0) { + // Handles the add("/foo") case as + // add("/", "foo"); + appendnorm(buf, "/")?; + }; + appendnorm(buf, next)?; }; }; - - return strings::fromutf8_unsafe(bufio::buffer(&sink)); }; -@test fn join() void = { - assert(PATHSEP == '/': u32: u8); // TODO: meh - let i = join("foo"); - defer free(i); - assert(i == "foo"); +@test fn add() void = { + let buf = init(); + add(&buf, "foo", "bar", "baz")!; + assert(string(&buf) == "foo/bar/baz"); - let p = join(i, "bar", "baz"); - defer free(p); - assert(p == "foo/bar/baz"); + reset(&buf); + add(&buf, "/foo/bar", "baz")!; + assert(string(&buf) == "/foo/bar/baz"); - let q = join(p, "bat", "bad"); - defer free(q); - assert(q == "foo/bar/baz/bat/bad"); - - let r = join(p, q); - defer free(r); - assert(r == "foo/bar/baz/foo/bar/baz/bat/bad"); - - let p = join("foo/", "bar"); - defer free(p); - assert(p == "foo/bar"); - - let p = join("foo///", "bar"); - defer free(p); - assert(p == "foo/bar"); - - let p = join("foo", "", "bar"); - defer free(p); - assert(p == "foo/bar"); - - let p = join("/", "foo", "bar", "baz"); - defer free(p); - assert(p == "/foo/bar/baz"); + reset(&buf); + add(&buf, "/", "foo/bar", "baz")!; + assert(string(&buf) == "/foo/bar/baz"); +}; - let i = join("/"); - defer free(i); - assert(i == "/"); +// Joins a list of path components together, normalizes it, and returns the +// resulting string. The caller must free the return value. If the resulting +// path would exceed [[PATH_MAX]], the program aborts. +export fn join(items: str...) str = { + static let buf = buffer { ... }; + reset(&buf); + add(&buf, items...)!; + return strings::dup(string(&buf)); }; diff --git a/pathbuf/README b/pathbuf/README @@ -1,24 +0,0 @@ -The pathbuf module provides for the efficient and consistent manipulation of -filesystem paths through the [[buffer]] type. - - let buf = pathbuf::init(); - pathbuf::join(&buf, "/", "foo", "bar", "baz.txt"); - io::println(pathbuf::path(&buf)); // "/foo/bar/baz.txt" - - pathbuf::join(&buf, "../.././hello.txt"); - io::println(pathbuf::path(&buf)); // "/foo/hello.txt" - -The buffer object includes an array of length [[path::PATH_MAX]], which can be -somewhat large - on Linux it's 4096. You can allocate this on the stack in most -cases, but you may prefer to allocate it elsewhere depending on your needs. - - // Stack allocated - let buf = pathbuf::init(); - - // Statically allocated - static let buf = pathbuf::buffer { ... }; - pathbuf::init_static(&buf); - - // Heap allocated - let buf = alloc(pathbuf::init()); - defer free(buf); diff --git a/pathbuf/buffer.ha b/pathbuf/buffer.ha @@ -1,127 +0,0 @@ -use bytes; -use errors; -use path; -use strings; - -export type buffer = struct { - buf: [path::PATH_MAX]u8, - cur: []u8, -}; - -// Initializes a new path buffer. -export fn init() buffer = { - let buf = buffer { ... }; - reset(&buf); - return buf; -}; - -// Initializes a caller-allocated path buffer. -// -// let buf = pathbuf::buffer { ... }; -// pathbuf::init_static(&buf); -export fn init_static(buf: *buffer) void = { - reset(buf); -}; - -// Initializes a new path buffer and sets its initial value from a set of path -// components. -export fn initfrom(items: str...) (buffer | errors::overflow) = { - let buf = buffer { ... }; - reset(&buf); - join(&buf, items...)?; - return buf; -}; - -// Resets a path buffer to its initial state (an empty path). -export fn reset(buf: *buffer) void = { - buf.cur = buf.buf[..0]; -}; - -// Creates a copy of another path buffer, which can be modified without -// affecting the original. -export fn dup(buf: *buffer) buffer = { - let new = buffer { ... }; - new.buf[..] = buf.buf[..]; - new.cur = new.buf[..0]; - return new; -}; - -// Like [[dup]], but the new buffer is allocated by the caller. -export fn dup_static(new: *buffer, old: *buffer) void = { - new.buf[..] = old.buf[..]; - new.cur = old.buf[..0]; -}; - -// Returns the current path stored in this buffer. The path will always be -// normalized, which is to say that it will not include any "." or ".." -// components, or repeated path separators (e.g. "/usr//bin/../bin/./hare" -// becomes "/usr/bin/hare"). -export fn path(buf: *buffer) str = { - return strings::fromutf8_unsafe(buf.cur); -}; - -// Overwrites the contents of a [[buffer]] with an arbitrary path. -export fn setpath(buf: *buffer, path: str) (void | errors::overflow) = { - reset(buf); - join(buf, path)?; -}; - -// Normalizes and appends a path component to a buffer. -// -// Invariant: elem must either be equal to [path::PATHSEP], or contain no path -// separators. -fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = { - const elem = match (elem) { - case let elem: []u8 => - yield elem; - case let string: str => - yield strings::toutf8(string); - }; - if (len(elem) == 1 && elem[0] == path::PATHSEP) { - if (len(buf.cur) == 0) { - static append(buf.cur, path::PATHSEP); - return; - }; - return; - } else if (bytes::equal(elem, ['.': u8])) { - return; - } else if (bytes::equal(elem, ['.': u8, '.': u8])) { - abort(); // TODO - }; - if (len(buf.cur) + len(elem) + 1 >= path::PATH_MAX) { - return errors::overflow; - }; - if (len(buf.cur) > 1 && buf.cur[len(buf.cur) - 1] != path::PATHSEP) { - static append(buf.cur, path::PATHSEP); - }; - static append(buf.cur, elem...); -}; - -@test fn appendnorm() void = { - let buf = init(); - assert(path(&buf) == ""); - appendnorm(&buf, "foo")!; - appendnorm(&buf, "bar")!; - appendnorm(&buf, "baz")!; - assert(path(&buf) == "foo/bar/baz"); - appendnorm(&buf, ".")!; - appendnorm(&buf, "bad")!; - appendnorm(&buf, ".")!; - assert(path(&buf) == "foo/bar/baz/bad"); - - let buf = init(); - appendnorm(&buf, "/")!; - appendnorm(&buf, "foo")!; - appendnorm(&buf, "bar")!; - appendnorm(&buf, "baz")!; - assert(path(&buf) == "/foo/bar/baz"); - appendnorm(&buf, "/")!; - appendnorm(&buf, "/")!; - assert(path(&buf) == "/foo/bar/baz"); - - let buf = init(); - appendnorm(&buf, "/")!; - appendnorm(&buf, "/")!; - appendnorm(&buf, "/")!; - assert(path(&buf) == "/"); -}; diff --git a/pathbuf/ops.ha b/pathbuf/ops.ha @@ -1,40 +0,0 @@ -use bytes; -use errors; -use path; -use strings; - -// Joins several path elements together and appends them to a path buffer. -export fn join(buf: *buffer, items: str...) (void | errors::overflow) = { - for (let i = 0z; i < len(items); i += 1) { - const elem = strings::toutf8(items[i]); - const tok = bytes::tokenize(elem, [path::PATHSEP]); - for (let j = 0z; true; j += 1) { - const next = match (bytes::next_token(&tok)) { - case let tok: []u8 => - yield tok; - case void => - break; - }; - if (len(next) == 0 && j == 0) { - // Handles the join("/foo") case as - // join("/", "foo"); - appendnorm(buf, "/")?; - }; - appendnorm(buf, next)?; - }; - }; -}; - -@test fn join() void = { - let buf = init(); - join(&buf, "foo", "bar", "baz")!; - assert(path(&buf) == "foo/bar/baz"); - - reset(&buf); - join(&buf, "/foo/bar", "baz")!; - assert(path(&buf) == "/foo/bar/baz"); - - reset(&buf); - join(&buf, "/", "foo/bar", "baz")!; - assert(path(&buf) == "/foo/bar/baz"); -}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -913,6 +913,7 @@ os_exec() { path() { gen_srcs path \ '+$(PLATFORM).ha' \ + buffer.ha \ util.ha \ join.ha \ names.ha \ @@ -920,13 +921,6 @@ path() { gen_ssa path strings bufio bytes io } -pathbuf() { - gen_srcs pathbuf \ - buffer.ha \ - ops.ha - gen_ssa pathbuf path -} - gensrcs_strconv() { gen_srcs strconv \ types.ha \ @@ -1188,7 +1182,6 @@ math::random os linux freebsd os::exec linux freebsd path -pathbuf shlex slice sort diff --git a/stdlib.mk b/stdlib.mk @@ -500,12 +500,6 @@ stdlib_deps_any+=$(stdlib_path_any) stdlib_path_linux=$(stdlib_path_any) stdlib_path_freebsd=$(stdlib_path_any) -# gen_lib pathbuf (any) -stdlib_pathbuf_any=$(HARECACHE)/pathbuf/pathbuf-any.o -stdlib_deps_any+=$(stdlib_pathbuf_any) -stdlib_pathbuf_linux=$(stdlib_pathbuf_any) -stdlib_pathbuf_freebsd=$(stdlib_pathbuf_any) - # gen_lib shlex (any) stdlib_shlex_any=$(HARECACHE)/shlex/shlex-any.o stdlib_deps_any+=$(stdlib_shlex_any) @@ -1482,6 +1476,7 @@ $(HARECACHE)/os/exec/os_exec-freebsd.ssa: $(stdlib_os_exec_freebsd_srcs) $(stdli # path (+any) stdlib_path_any_srcs= \ $(STDLIB)/path/+$(PLATFORM).ha \ + $(STDLIB)/path/buffer.ha \ $(STDLIB)/path/util.ha \ $(STDLIB)/path/join.ha \ $(STDLIB)/path/names.ha \ @@ -1493,17 +1488,6 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npath \ -t$(HARECACHE)/path/path.td $(stdlib_path_any_srcs) -# pathbuf (+any) -stdlib_pathbuf_any_srcs= \ - $(STDLIB)/pathbuf/buffer.ha \ - $(STDLIB)/pathbuf/ops.ha - -$(HARECACHE)/pathbuf/pathbuf-any.ssa: $(stdlib_pathbuf_any_srcs) $(stdlib_rt) $(stdlib_path_$(PLATFORM)) - @printf 'HAREC \t$@\n' - @mkdir -p $(HARECACHE)/pathbuf - @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npathbuf \ - -t$(HARECACHE)/pathbuf/pathbuf.td $(stdlib_pathbuf_any_srcs) - # shlex (+any) stdlib_shlex_any_srcs= \ $(STDLIB)/shlex/split.ha @@ -2268,12 +2252,6 @@ testlib_deps_any+=$(testlib_path_any) testlib_path_linux=$(testlib_path_any) testlib_path_freebsd=$(testlib_path_any) -# gen_lib pathbuf (any) -testlib_pathbuf_any=$(TESTCACHE)/pathbuf/pathbuf-any.o -testlib_deps_any+=$(testlib_pathbuf_any) -testlib_pathbuf_linux=$(testlib_pathbuf_any) -testlib_pathbuf_freebsd=$(testlib_pathbuf_any) - # gen_lib shlex (any) testlib_shlex_any=$(TESTCACHE)/shlex/shlex-any.o testlib_deps_any+=$(testlib_shlex_any) @@ -3286,6 +3264,7 @@ $(TESTCACHE)/os/exec/os_exec-freebsd.ssa: $(testlib_os_exec_freebsd_srcs) $(test # path (+any) testlib_path_any_srcs= \ $(STDLIB)/path/+$(PLATFORM).ha \ + $(STDLIB)/path/buffer.ha \ $(STDLIB)/path/util.ha \ $(STDLIB)/path/join.ha \ $(STDLIB)/path/names.ha \ @@ -3297,17 +3276,6 @@ $(TESTCACHE)/path/path-any.ssa: $(testlib_path_any_srcs) $(testlib_rt) $(testlib @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npath \ -t$(TESTCACHE)/path/path.td $(testlib_path_any_srcs) -# pathbuf (+any) -testlib_pathbuf_any_srcs= \ - $(STDLIB)/pathbuf/buffer.ha \ - $(STDLIB)/pathbuf/ops.ha - -$(TESTCACHE)/pathbuf/pathbuf-any.ssa: $(testlib_pathbuf_any_srcs) $(testlib_rt) $(testlib_path_$(PLATFORM)) - @printf 'HAREC \t$@\n' - @mkdir -p $(TESTCACHE)/pathbuf - @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npathbuf \ - -t$(TESTCACHE)/pathbuf/pathbuf.td $(testlib_pathbuf_any_srcs) - # shlex (+any) testlib_shlex_any_srcs= \ $(STDLIB)/shlex/split.ha \