commit baf3b6bdad164b9e05ced89eda3cdb9e8ff087b8
parent e15431c81b810cbf8040832fd43173585ec350cb
Author: Drew DeVault <sir@cmpwn.com>
Date: Sat, 8 Jan 2022 11:00:10 +0100
pathbuffer: merge into path
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Diffstat:
8 files changed, 168 insertions(+), 300 deletions(-)
diff --git a/path/README b/path/README
@@ -3,3 +3,28 @@ The path module provides utilities for working with filesystem paths.
Note that Hare expects paths to be valid UTF-8 strings. If you require the use
of non-UTF-8 paths (ideally for only as long as it takes to delete or rename
those files), see the low-level functions available from [[rt]].
+
+Use of the [[buffer]] type is recommended for efficient and consistent
+manipulation of filesystem paths.
+
+ let buf = path::init();
+ path::add(&buf, "/", "foo", "bar", "baz.txt");
+ io::println(path::string(&buf)); // "/foo/bar/baz.txt"
+
+ path::add(&buf, "../.././hello.txt");
+ io::println(path::string(&buf)); // "/foo/hello.txt"
+
+The buffer object includes an array of length [[PATH_MAX]], which can be
+somewhat large; on Linux it's 4096 bytes. You can allocate this on the stack in
+most cases, but you may prefer to allocate it elsewhere depending on your needs.
+
+ // Stack allocated
+ let buf = path::init();
+
+ // Statically allocated
+ static let buf = path::buffer { ... };
+ pathbuf::reset(&buf);
+
+ // Heap allocated
+ let buf = alloc(path::init());
+ defer free(buf);
diff --git a/path/buffer.ha b/path/buffer.ha
@@ -0,0 +1,102 @@
+use bytes;
+use errors;
+use strings;
+
+export type buffer = struct {
+ buf: [PATH_MAX]u8,
+ cur: []u8,
+};
+
+// Initializes a new path buffer.
+export fn init() buffer = {
+ let buf = buffer { ... };
+ reset(&buf);
+ return buf;
+};
+
+// Resets a path buffer to its initial state.
+export fn reset(buf: *buffer) void = {
+ buf.cur = buf.buf[..0];
+};
+
+// Creates a copy of another path buffer, which can be modified without
+// affecting the original.
+export fn dup(buf: *buffer) buffer = {
+ let new = buffer { ... };
+ new.buf[..] = buf.buf[..];
+ new.cur = new.buf[..0];
+ return new;
+};
+
+// Returns the current path stored in this buffer. The path will always be
+// normalized, which is to say that it will not include any of the following:
+//
+// - "." components
+// - Redundant ".." components
+// - Repeated path separators
+//
+// "/usr//bin/../bin/./hare" becomes "/usr/bin/hare" and "../../foo/bar" is
+// unchanged.
+export fn string(buf: *buffer) str = {
+ return strings::fromutf8_unsafe(buf.cur);
+};
+
+// Normalizes and appends a path component to a buffer.
+//
+// Invariant: elem must either be equal to [PATHSEP], or contain no path
+// separators.
+fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = {
+ const elem = match (elem) {
+ case let elem: []u8 =>
+ yield elem;
+ case let string: str =>
+ yield strings::toutf8(string);
+ };
+ if (len(elem) == 1 && elem[0] == PATHSEP) {
+ if (len(buf.cur) == 0) {
+ static append(buf.cur, PATHSEP);
+ return;
+ };
+ return;
+ } else if (bytes::equal(elem, ['.': u8])) {
+ return;
+ } else if (bytes::equal(elem, ['.': u8, '.': u8])) {
+ abort(); // TODO
+ };
+ if (len(buf.cur) + len(elem) + 1 >= PATH_MAX) {
+ return errors::overflow;
+ };
+ if (len(buf.cur) > 1 && buf.cur[len(buf.cur) - 1] != PATHSEP) {
+ static append(buf.cur, PATHSEP);
+ };
+ static append(buf.cur, elem...);
+};
+
+@test fn appendnorm() void = {
+ let buf = init();
+ assert(string(&buf) == "");
+ appendnorm(&buf, "foo")!;
+ appendnorm(&buf, "bar")!;
+ appendnorm(&buf, "baz")!;
+ assert(string(&buf) == "foo/bar/baz");
+ appendnorm(&buf, ".")!;
+ appendnorm(&buf, "bad")!;
+ appendnorm(&buf, ".")!;
+ assert(string(&buf) == "foo/bar/baz/bad");
+
+ let buf = init();
+ appendnorm(&buf, "/")!;
+ appendnorm(&buf, "foo")!;
+ appendnorm(&buf, "bar")!;
+ appendnorm(&buf, "baz")!;
+ assert(string(&buf) == "/foo/bar/baz");
+ appendnorm(&buf, "/")!;
+ appendnorm(&buf, "/")!;
+ assert(string(&buf) == "/foo/bar/baz");
+
+ let buf = init();
+ appendnorm(&buf, "/")!;
+ appendnorm(&buf, "/")!;
+ appendnorm(&buf, "/")!;
+ assert(string(&buf) == "/");
+};
diff --git a/path/join.ha b/path/join.ha
@@ -1,78 +1,49 @@
use bytes;
-use bufio;
+use errors;
use strings;
-use io;
-// Joins together several path components with the path separator. The caller
-// must free the return value.
-export fn join(paths: str...) str = {
- // TODO: Normalize inputs so that if they end with a / we don't double
- // up on delimiters
- let sink = bufio::dynamic(io::mode::WRITE);
- let utf8 = true;
- for (let i = 0z; i < len(paths); i += 1) {
- const buf = strings::toutf8(paths[i]);
- if (len(buf) == 0) {
- continue; // Empty path component, discard
- } else if (len(buf) == 1 && buf[0] == PATHSEP && i == 0) {
- // "/" as the first component
- io::write(&sink, [PATHSEP])!;
- continue;
- };
-
- // Trim away trailing PATHSEPs, if present
- let l = len(buf);
- for (l > 0 && buf[l - 1] == PATHSEP) {
- l -= 1;
- };
-
- for (let q = 0z; q < l) {
- let w = io::write(&sink, buf[q..l]) as size;
- q += w;
- };
- if (i + 1 < len(paths)) {
- assert(io::write(&sink, [PATHSEP]) as size == 1);
+// Joins several path elements together and appends them to a path buffer.
+export fn add(buf: *buffer, items: str...) (void | errors::overflow) = {
+ for (let i = 0z; i < len(items); i += 1) {
+ const elem = strings::toutf8(items[i]);
+ const tok = bytes::tokenize(elem, pathsep);
+ for (let j = 0z; true; j += 1) {
+ const next = match (bytes::next_token(&tok)) {
+ case let tok: []u8 =>
+ yield tok;
+ case void =>
+ break;
+ };
+ if (len(next) == 0 && j == 0) {
+ // Handles the add("/foo") case as
+ // add("/", "foo");
+ appendnorm(buf, "/")?;
+ };
+ appendnorm(buf, next)?;
};
};
-
- return strings::fromutf8_unsafe(bufio::buffer(&sink));
};
-@test fn join() void = {
- assert(PATHSEP == '/': u32: u8); // TODO: meh
- let i = join("foo");
- defer free(i);
- assert(i == "foo");
+@test fn add() void = {
+ let buf = init();
+ add(&buf, "foo", "bar", "baz")!;
+ assert(string(&buf) == "foo/bar/baz");
- let p = join(i, "bar", "baz");
- defer free(p);
- assert(p == "foo/bar/baz");
+ reset(&buf);
+ add(&buf, "/foo/bar", "baz")!;
+ assert(string(&buf) == "/foo/bar/baz");
- let q = join(p, "bat", "bad");
- defer free(q);
- assert(q == "foo/bar/baz/bat/bad");
-
- let r = join(p, q);
- defer free(r);
- assert(r == "foo/bar/baz/foo/bar/baz/bat/bad");
-
- let p = join("foo/", "bar");
- defer free(p);
- assert(p == "foo/bar");
-
- let p = join("foo///", "bar");
- defer free(p);
- assert(p == "foo/bar");
-
- let p = join("foo", "", "bar");
- defer free(p);
- assert(p == "foo/bar");
-
- let p = join("/", "foo", "bar", "baz");
- defer free(p);
- assert(p == "/foo/bar/baz");
+ reset(&buf);
+ add(&buf, "/", "foo/bar", "baz")!;
+ assert(string(&buf) == "/foo/bar/baz");
+};
- let i = join("/");
- defer free(i);
- assert(i == "/");
+// Joins a list of path components together, normalizes it, and returns the
+// resulting string. The caller must free the return value. If the resulting
+// path would exceed [[PATH_MAX]], the program aborts.
+export fn join(items: str...) str = {
+ static let buf = buffer { ... };
+ reset(&buf);
+ add(&buf, items...)!;
+ return strings::dup(string(&buf));
};
diff --git a/pathbuf/README b/pathbuf/README
@@ -1,24 +0,0 @@
-The pathbuf module provides for the efficient and consistent manipulation of
-filesystem paths through the [[buffer]] type.
-
- let buf = pathbuf::init();
- pathbuf::join(&buf, "/", "foo", "bar", "baz.txt");
- io::println(pathbuf::path(&buf)); // "/foo/bar/baz.txt"
-
- pathbuf::join(&buf, "../.././hello.txt");
- io::println(pathbuf::path(&buf)); // "/foo/hello.txt"
-
-The buffer object includes an array of length [[path::PATH_MAX]], which can be
-somewhat large - on Linux it's 4096. You can allocate this on the stack in most
-cases, but you may prefer to allocate it elsewhere depending on your needs.
-
- // Stack allocated
- let buf = pathbuf::init();
-
- // Statically allocated
- static let buf = pathbuf::buffer { ... };
- pathbuf::init_static(&buf);
-
- // Heap allocated
- let buf = alloc(pathbuf::init());
- defer free(buf);
diff --git a/pathbuf/buffer.ha b/pathbuf/buffer.ha
@@ -1,127 +0,0 @@
-use bytes;
-use errors;
-use path;
-use strings;
-
-export type buffer = struct {
- buf: [path::PATH_MAX]u8,
- cur: []u8,
-};
-
-// Initializes a new path buffer.
-export fn init() buffer = {
- let buf = buffer { ... };
- reset(&buf);
- return buf;
-};
-
-// Initializes a caller-allocated path buffer.
-//
-// let buf = pathbuf::buffer { ... };
-// pathbuf::init_static(&buf);
-export fn init_static(buf: *buffer) void = {
- reset(buf);
-};
-
-// Initializes a new path buffer and sets its initial value from a set of path
-// components.
-export fn initfrom(items: str...) (buffer | errors::overflow) = {
- let buf = buffer { ... };
- reset(&buf);
- join(&buf, items...)?;
- return buf;
-};
-
-// Resets a path buffer to its initial state (an empty path).
-export fn reset(buf: *buffer) void = {
- buf.cur = buf.buf[..0];
-};
-
-// Creates a copy of another path buffer, which can be modified without
-// affecting the original.
-export fn dup(buf: *buffer) buffer = {
- let new = buffer { ... };
- new.buf[..] = buf.buf[..];
- new.cur = new.buf[..0];
- return new;
-};
-
-// Like [[dup]], but the new buffer is allocated by the caller.
-export fn dup_static(new: *buffer, old: *buffer) void = {
- new.buf[..] = old.buf[..];
- new.cur = old.buf[..0];
-};
-
-// Returns the current path stored in this buffer. The path will always be
-// normalized, which is to say that it will not include any "." or ".."
-// components, or repeated path separators (e.g. "/usr//bin/../bin/./hare"
-// becomes "/usr/bin/hare").
-export fn path(buf: *buffer) str = {
- return strings::fromutf8_unsafe(buf.cur);
-};
-
-// Overwrites the contents of a [[buffer]] with an arbitrary path.
-export fn setpath(buf: *buffer, path: str) (void | errors::overflow) = {
- reset(buf);
- join(buf, path)?;
-};
-
-// Normalizes and appends a path component to a buffer.
-//
-// Invariant: elem must either be equal to [path::PATHSEP], or contain no path
-// separators.
-fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = {
- const elem = match (elem) {
- case let elem: []u8 =>
- yield elem;
- case let string: str =>
- yield strings::toutf8(string);
- };
- if (len(elem) == 1 && elem[0] == path::PATHSEP) {
- if (len(buf.cur) == 0) {
- static append(buf.cur, path::PATHSEP);
- return;
- };
- return;
- } else if (bytes::equal(elem, ['.': u8])) {
- return;
- } else if (bytes::equal(elem, ['.': u8, '.': u8])) {
- abort(); // TODO
- };
- if (len(buf.cur) + len(elem) + 1 >= path::PATH_MAX) {
- return errors::overflow;
- };
- if (len(buf.cur) > 1 && buf.cur[len(buf.cur) - 1] != path::PATHSEP) {
- static append(buf.cur, path::PATHSEP);
- };
- static append(buf.cur, elem...);
-};
-
-@test fn appendnorm() void = {
- let buf = init();
- assert(path(&buf) == "");
- appendnorm(&buf, "foo")!;
- appendnorm(&buf, "bar")!;
- appendnorm(&buf, "baz")!;
- assert(path(&buf) == "foo/bar/baz");
- appendnorm(&buf, ".")!;
- appendnorm(&buf, "bad")!;
- appendnorm(&buf, ".")!;
- assert(path(&buf) == "foo/bar/baz/bad");
-
- let buf = init();
- appendnorm(&buf, "/")!;
- appendnorm(&buf, "foo")!;
- appendnorm(&buf, "bar")!;
- appendnorm(&buf, "baz")!;
- assert(path(&buf) == "/foo/bar/baz");
- appendnorm(&buf, "/")!;
- appendnorm(&buf, "/")!;
- assert(path(&buf) == "/foo/bar/baz");
-
- let buf = init();
- appendnorm(&buf, "/")!;
- appendnorm(&buf, "/")!;
- appendnorm(&buf, "/")!;
- assert(path(&buf) == "/");
-};
diff --git a/pathbuf/ops.ha b/pathbuf/ops.ha
@@ -1,40 +0,0 @@
-use bytes;
-use errors;
-use path;
-use strings;
-
-// Joins several path elements together and appends them to a path buffer.
-export fn join(buf: *buffer, items: str...) (void | errors::overflow) = {
- for (let i = 0z; i < len(items); i += 1) {
- const elem = strings::toutf8(items[i]);
- const tok = bytes::tokenize(elem, [path::PATHSEP]);
- for (let j = 0z; true; j += 1) {
- const next = match (bytes::next_token(&tok)) {
- case let tok: []u8 =>
- yield tok;
- case void =>
- break;
- };
- if (len(next) == 0 && j == 0) {
- // Handles the join("/foo") case as
- // join("/", "foo");
- appendnorm(buf, "/")?;
- };
- appendnorm(buf, next)?;
- };
- };
-};
-
-@test fn join() void = {
- let buf = init();
- join(&buf, "foo", "bar", "baz")!;
- assert(path(&buf) == "foo/bar/baz");
-
- reset(&buf);
- join(&buf, "/foo/bar", "baz")!;
- assert(path(&buf) == "/foo/bar/baz");
-
- reset(&buf);
- join(&buf, "/", "foo/bar", "baz")!;
- assert(path(&buf) == "/foo/bar/baz");
-};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -913,6 +913,7 @@ os_exec() {
path() {
gen_srcs path \
'+$(PLATFORM).ha' \
+ buffer.ha \
util.ha \
join.ha \
names.ha \
@@ -920,13 +921,6 @@ path() {
gen_ssa path strings bufio bytes io
}
-pathbuf() {
- gen_srcs pathbuf \
- buffer.ha \
- ops.ha
- gen_ssa pathbuf path
-}
-
gensrcs_strconv() {
gen_srcs strconv \
types.ha \
@@ -1188,7 +1182,6 @@ math::random
os linux freebsd
os::exec linux freebsd
path
-pathbuf
shlex
slice
sort
diff --git a/stdlib.mk b/stdlib.mk
@@ -500,12 +500,6 @@ stdlib_deps_any+=$(stdlib_path_any)
stdlib_path_linux=$(stdlib_path_any)
stdlib_path_freebsd=$(stdlib_path_any)
-# gen_lib pathbuf (any)
-stdlib_pathbuf_any=$(HARECACHE)/pathbuf/pathbuf-any.o
-stdlib_deps_any+=$(stdlib_pathbuf_any)
-stdlib_pathbuf_linux=$(stdlib_pathbuf_any)
-stdlib_pathbuf_freebsd=$(stdlib_pathbuf_any)
-
# gen_lib shlex (any)
stdlib_shlex_any=$(HARECACHE)/shlex/shlex-any.o
stdlib_deps_any+=$(stdlib_shlex_any)
@@ -1482,6 +1476,7 @@ $(HARECACHE)/os/exec/os_exec-freebsd.ssa: $(stdlib_os_exec_freebsd_srcs) $(stdli
# path (+any)
stdlib_path_any_srcs= \
$(STDLIB)/path/+$(PLATFORM).ha \
+ $(STDLIB)/path/buffer.ha \
$(STDLIB)/path/util.ha \
$(STDLIB)/path/join.ha \
$(STDLIB)/path/names.ha \
@@ -1493,17 +1488,6 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npath \
-t$(HARECACHE)/path/path.td $(stdlib_path_any_srcs)
-# pathbuf (+any)
-stdlib_pathbuf_any_srcs= \
- $(STDLIB)/pathbuf/buffer.ha \
- $(STDLIB)/pathbuf/ops.ha
-
-$(HARECACHE)/pathbuf/pathbuf-any.ssa: $(stdlib_pathbuf_any_srcs) $(stdlib_rt) $(stdlib_path_$(PLATFORM))
- @printf 'HAREC \t$@\n'
- @mkdir -p $(HARECACHE)/pathbuf
- @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npathbuf \
- -t$(HARECACHE)/pathbuf/pathbuf.td $(stdlib_pathbuf_any_srcs)
-
# shlex (+any)
stdlib_shlex_any_srcs= \
$(STDLIB)/shlex/split.ha
@@ -2268,12 +2252,6 @@ testlib_deps_any+=$(testlib_path_any)
testlib_path_linux=$(testlib_path_any)
testlib_path_freebsd=$(testlib_path_any)
-# gen_lib pathbuf (any)
-testlib_pathbuf_any=$(TESTCACHE)/pathbuf/pathbuf-any.o
-testlib_deps_any+=$(testlib_pathbuf_any)
-testlib_pathbuf_linux=$(testlib_pathbuf_any)
-testlib_pathbuf_freebsd=$(testlib_pathbuf_any)
-
# gen_lib shlex (any)
testlib_shlex_any=$(TESTCACHE)/shlex/shlex-any.o
testlib_deps_any+=$(testlib_shlex_any)
@@ -3286,6 +3264,7 @@ $(TESTCACHE)/os/exec/os_exec-freebsd.ssa: $(testlib_os_exec_freebsd_srcs) $(test
# path (+any)
testlib_path_any_srcs= \
$(STDLIB)/path/+$(PLATFORM).ha \
+ $(STDLIB)/path/buffer.ha \
$(STDLIB)/path/util.ha \
$(STDLIB)/path/join.ha \
$(STDLIB)/path/names.ha \
@@ -3297,17 +3276,6 @@ $(TESTCACHE)/path/path-any.ssa: $(testlib_path_any_srcs) $(testlib_rt) $(testlib
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npath \
-t$(TESTCACHE)/path/path.td $(testlib_path_any_srcs)
-# pathbuf (+any)
-testlib_pathbuf_any_srcs= \
- $(STDLIB)/pathbuf/buffer.ha \
- $(STDLIB)/pathbuf/ops.ha
-
-$(TESTCACHE)/pathbuf/pathbuf-any.ssa: $(testlib_pathbuf_any_srcs) $(testlib_rt) $(testlib_path_$(PLATFORM))
- @printf 'HAREC \t$@\n'
- @mkdir -p $(TESTCACHE)/pathbuf
- @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npathbuf \
- -t$(TESTCACHE)/pathbuf/pathbuf.td $(testlib_pathbuf_any_srcs)
-
# shlex (+any)
testlib_shlex_any_srcs= \
$(STDLIB)/shlex/split.ha \