pathbuffer: merge into path - hare - The Hare programming language

commit baf3b6bdad164b9e05ced89eda3cdb9e8ff087b8
parent e15431c81b810cbf8040832fd43173585ec350cb
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sat,  8 Jan 2022 11:00:10 +0100

pathbuffer: merge into path

Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
M path/README  | 25 +++++++++++++++++++++++++
A path/buffer.ha  | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M path/join.ha  | 105 +++++++++++++++++++++++++++++--------------------------------------------------
D pathbuf/README  | 24 ------------------------
D pathbuf/buffer.ha  | 127 -------------------------------------------------------------------------------
D pathbuf/ops.ha  | 40 ----------------------------------------
M scripts/gen-stdlib  | 9 +--------
M stdlib.mk  | 36 ++----------------------------------

8 files changed, 168 insertions(+), 300 deletions(-)
diff --git a/path/README b/path/README
@@ -3,3 +3,28 @@ The path module provides utilities for working with filesystem paths.
 Note that Hare expects paths to be valid UTF-8 strings. If you require the use
 of non-UTF-8 paths (ideally for only as long as it takes to delete or rename
 those files), see the low-level functions available from [[rt]].
+
+Use of the [[buffer]] type is recommended for efficient and consistent
+manipulation of filesystem paths.
+
+	let buf = path::init();
+	path::add(&buf, "/", "foo", "bar", "baz.txt");
+	io::println(path::string(&buf)); // "/foo/bar/baz.txt"
+
+	path::add(&buf, "../.././hello.txt");
+	io::println(path::string(&buf)); // "/foo/hello.txt"
+
+The buffer object includes an array of length [[PATH_MAX]], which can be
+somewhat large; on Linux it's 4096 bytes. You can allocate this on the stack in
+most cases, but you may prefer to allocate it elsewhere depending on your needs.
+
+	// Stack allocated
+	let buf = path::init();
+
+	// Statically allocated
+	static let buf = path::buffer { ... };
+	pathbuf::reset(&buf);
+
+	// Heap allocated
+	let buf = alloc(path::init());
+	defer free(buf);
diff --git a/path/buffer.ha b/path/buffer.ha
@@ -0,0 +1,102 @@
+use bytes;
+use errors;
+use strings;
+
+export type buffer = struct {
+	buf: [PATH_MAX]u8,
+	cur: []u8,
+};
+
+// Initializes a new path buffer.
+export fn init() buffer = {
+	let buf = buffer { ... };
+	reset(&buf);
+	return buf;
+};
+
+// Resets a path buffer to its initial state.
+export fn reset(buf: *buffer) void = {
+	buf.cur = buf.buf[..0];
+};
+
+// Creates a copy of another path buffer, which can be modified without
+// affecting the original.
+export fn dup(buf: *buffer) buffer = {
+	let new = buffer { ... };
+	new.buf[..] = buf.buf[..];
+	new.cur = new.buf[..0];
+	return new;
+};
+
+// Returns the current path stored in this buffer. The path will always be
+// normalized, which is to say that it will not include any of the following:
+//
+// - "." components
+// - Redundant ".." components
+// - Repeated path separators
+//
+// "/usr//bin/../bin/./hare" becomes "/usr/bin/hare" and "../../foo/bar" is
+// unchanged.
+export fn string(buf: *buffer) str = {
+	return strings::fromutf8_unsafe(buf.cur);
+};
+
+// Normalizes and appends a path component to a buffer.
+//
+// Invariant: elem must either be equal to [PATHSEP], or contain no path
+// separators.
+fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = {
+	const elem = match (elem) {
+	case let elem: []u8 =>
+		yield elem;
+	case let string: str =>
+		yield strings::toutf8(string);
+	};
+	if (len(elem) == 1 && elem[0] == PATHSEP) {
+		if (len(buf.cur) == 0) {
+			static append(buf.cur, PATHSEP);
+			return;
+		};
+		return;
+	} else if (bytes::equal(elem, ['.': u8])) {
+		return;
+	} else if (bytes::equal(elem, ['.': u8, '.': u8])) {
+		abort(); // TODO
+	};
+	if (len(buf.cur) + len(elem) + 1 >= PATH_MAX) {
+		return errors::overflow;
+	};
+	if (len(buf.cur) > 1 && buf.cur[len(buf.cur) - 1] != PATHSEP) {
+		static append(buf.cur, PATHSEP);
+	};
+	static append(buf.cur, elem...);
+};
+
+@test fn appendnorm() void = {
+	let buf = init();
+	assert(string(&buf) == "");
+	appendnorm(&buf, "foo")!;
+	appendnorm(&buf, "bar")!;
+	appendnorm(&buf, "baz")!;
+	assert(string(&buf) == "foo/bar/baz");
+	appendnorm(&buf, ".")!;
+	appendnorm(&buf, "bad")!;
+	appendnorm(&buf, ".")!;
+	assert(string(&buf) == "foo/bar/baz/bad");
+
+	let buf = init();
+	appendnorm(&buf, "/")!;
+	appendnorm(&buf, "foo")!;
+	appendnorm(&buf, "bar")!;
+	appendnorm(&buf, "baz")!;
+	assert(string(&buf) == "/foo/bar/baz");
+	appendnorm(&buf, "/")!;
+	appendnorm(&buf, "/")!;
+	assert(string(&buf) == "/foo/bar/baz");
+
+	let buf = init();
+	appendnorm(&buf, "/")!;
+	appendnorm(&buf, "/")!;
+	appendnorm(&buf, "/")!;
+	assert(string(&buf) == "/");
+};
diff --git a/path/join.ha b/path/join.ha
@@ -1,78 +1,49 @@
 use bytes;
-use bufio;
+use errors;
 use strings;
-use io;
 
-// Joins together several path components with the path separator. The caller
-// must free the return value.
-export fn join(paths: str...) str = {
-	// TODO: Normalize inputs so that if they end with a / we don't double
-	// up on delimiters
-	let sink = bufio::dynamic(io::mode::WRITE);
-	let utf8 = true;
-	for (let i = 0z; i < len(paths); i += 1) {
-		const buf = strings::toutf8(paths[i]);
-		if (len(buf) == 0) {
-			continue; // Empty path component, discard
-		} else if (len(buf) == 1 && buf[0] == PATHSEP && i == 0) {
-			// "/" as the first component
-			io::write(&sink, [PATHSEP])!;
-			continue;
-		};
-
-		// Trim away trailing PATHSEPs, if present
-		let l = len(buf);
-		for (l > 0 && buf[l - 1] == PATHSEP) {
-			l -= 1;
-		};
-
-		for (let q = 0z; q < l) {
-			let w = io::write(&sink, buf[q..l]) as size;
-			q += w;
-		};
-		if (i + 1 < len(paths)) {
-			assert(io::write(&sink, [PATHSEP]) as size == 1);
+// Joins several path elements together and appends them to a path buffer.
+export fn add(buf: *buffer, items: str...) (void | errors::overflow) = {
+	for (let i = 0z; i < len(items); i += 1) {
+		const elem = strings::toutf8(items[i]);
+		const tok = bytes::tokenize(elem, pathsep);
+		for (let j = 0z; true; j += 1) {
+			const next = match (bytes::next_token(&tok)) {
+			case let tok: []u8 =>
+				yield tok;
+			case void =>
+				break;
+			};
+			if (len(next) == 0 && j == 0) {
+				// Handles the add("/foo") case as
+				// add("/", "foo");
+				appendnorm(buf, "/")?;
+			};
+			appendnorm(buf, next)?;
 		};
 	};
-
-	return strings::fromutf8_unsafe(bufio::buffer(&sink));
 };
 
-@test fn join() void = {
-	assert(PATHSEP == '/': u32: u8); // TODO: meh
-	let i = join("foo");
-	defer free(i);
-	assert(i == "foo");
+@test fn add() void = {
+	let buf = init();
+	add(&buf, "foo", "bar", "baz")!;
+	assert(string(&buf) == "foo/bar/baz");
 
-	let p = join(i, "bar", "baz");
-	defer free(p);
-	assert(p == "foo/bar/baz");
+	reset(&buf);
+	add(&buf, "/foo/bar", "baz")!;
+	assert(string(&buf) == "/foo/bar/baz");
 
-	let q = join(p, "bat", "bad");
-	defer free(q);
-	assert(q == "foo/bar/baz/bat/bad");
-
-	let r = join(p, q);
-	defer free(r);
-	assert(r == "foo/bar/baz/foo/bar/baz/bat/bad");
-
-	let p = join("foo/", "bar");
-	defer free(p);
-	assert(p == "foo/bar");
-
-	let p = join("foo///", "bar");
-	defer free(p);
-	assert(p == "foo/bar");
-
-	let p = join("foo", "", "bar");
-	defer free(p);
-	assert(p == "foo/bar");
-
-	let p = join("/", "foo", "bar", "baz");
-	defer free(p);
-	assert(p == "/foo/bar/baz");
+	reset(&buf);
+	add(&buf, "/", "foo/bar", "baz")!;
+	assert(string(&buf) == "/foo/bar/baz");
+};
 
-	let i = join("/");
-	defer free(i);
-	assert(i == "/");
+// Joins a list of path components together, normalizes it, and returns the
+// resulting string. The caller must free the return value. If the resulting
+// path would exceed [[PATH_MAX]], the program aborts.
+export fn join(items: str...) str = {
+	static let buf = buffer { ... };
+	reset(&buf);
+	add(&buf, items...)!;
+	return strings::dup(string(&buf));
 };
diff --git a/pathbuf/README b/pathbuf/README
@@ -1,24 +0,0 @@
-The pathbuf module provides for the efficient and consistent manipulation of
-filesystem paths through the [[buffer]] type.
-
-	let buf = pathbuf::init();
-	pathbuf::join(&buf, "/", "foo", "bar", "baz.txt");
-	io::println(pathbuf::path(&buf)); // "/foo/bar/baz.txt"
-
-	pathbuf::join(&buf, "../.././hello.txt");
-	io::println(pathbuf::path(&buf)); // "/foo/hello.txt"
-
-The buffer object includes an array of length [[path::PATH_MAX]], which can be
-somewhat large - on Linux it's 4096. You can allocate this on the stack in most
-cases, but you may prefer to allocate it elsewhere depending on your needs.
-
-	// Stack allocated
-	let buf = pathbuf::init();
-
-	// Statically allocated
-	static let buf = pathbuf::buffer { ... };
-	pathbuf::init_static(&buf);
-
-	// Heap allocated
-	let buf = alloc(pathbuf::init());
-	defer free(buf);
diff --git a/pathbuf/buffer.ha b/pathbuf/buffer.ha
@@ -1,127 +0,0 @@
-use bytes;
-use errors;
-use path;
-use strings;
-
-export type buffer = struct {
-	buf: [path::PATH_MAX]u8,
-	cur: []u8,
-};
-
-// Initializes a new path buffer.
-export fn init() buffer = {
-	let buf = buffer { ... };
-	reset(&buf);
-	return buf;
-};
-
-// Initializes a caller-allocated path buffer.
-//
-// 	let buf = pathbuf::buffer { ... };
-// 	pathbuf::init_static(&buf);
-export fn init_static(buf: *buffer) void = {
-	reset(buf);
-};
-
-// Initializes a new path buffer and sets its initial value from a set of path
-// components.
-export fn initfrom(items: str...) (buffer | errors::overflow) = {
-	let buf = buffer { ... };
-	reset(&buf);
-	join(&buf, items...)?;
-	return buf;
-};
-
-// Resets a path buffer to its initial state (an empty path).
-export fn reset(buf: *buffer) void = {
-	buf.cur = buf.buf[..0];
-};
-
-// Creates a copy of another path buffer, which can be modified without
-// affecting the original.
-export fn dup(buf: *buffer) buffer = {
-	let new = buffer { ... };
-	new.buf[..] = buf.buf[..];
-	new.cur = new.buf[..0];
-	return new;
-};
-
-// Like [[dup]], but the new buffer is allocated by the caller.
-export fn dup_static(new: *buffer, old: *buffer) void = {
-	new.buf[..] = old.buf[..];
-	new.cur = old.buf[..0];
-};
-
-// Returns the current path stored in this buffer. The path will always be
-// normalized, which is to say that it will not include any "." or ".."
-// components, or repeated path separators (e.g. "/usr//bin/../bin/./hare"
-// becomes "/usr/bin/hare").
-export fn path(buf: *buffer) str = {
-	return strings::fromutf8_unsafe(buf.cur);
-};
-
-// Overwrites the contents of a [[buffer]] with an arbitrary path.
-export fn setpath(buf: *buffer, path: str) (void | errors::overflow) = {
-	reset(buf);
-	join(buf, path)?;
-};
-
-// Normalizes and appends a path component to a buffer.
-//
-// Invariant: elem must either be equal to [path::PATHSEP], or contain no path
-// separators.
-fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = {
-	const elem = match (elem) {
-	case let elem: []u8 =>
-		yield elem;
-	case let string: str =>
-		yield strings::toutf8(string);
-	};
-	if (len(elem) == 1 && elem[0] == path::PATHSEP) {
-		if (len(buf.cur) == 0) {
-			static append(buf.cur, path::PATHSEP);
-			return;
-		};
-		return;
-	} else if (bytes::equal(elem, ['.': u8])) {
-		return;
-	} else if (bytes::equal(elem, ['.': u8, '.': u8])) {
-		abort(); // TODO
-	};
-	if (len(buf.cur) + len(elem) + 1 >= path::PATH_MAX) {
-		return errors::overflow;
-	};
-	if (len(buf.cur) > 1 && buf.cur[len(buf.cur) - 1] != path::PATHSEP) {
-		static append(buf.cur, path::PATHSEP);
-	};
-	static append(buf.cur, elem...);
-};
-
-@test fn appendnorm() void = {
-	let buf = init();
-	assert(path(&buf) == "");
-	appendnorm(&buf, "foo")!;
-	appendnorm(&buf, "bar")!;
-	appendnorm(&buf, "baz")!;
-	assert(path(&buf) == "foo/bar/baz");
-	appendnorm(&buf, ".")!;
-	appendnorm(&buf, "bad")!;
-	appendnorm(&buf, ".")!;
-	assert(path(&buf) == "foo/bar/baz/bad");
-
-	let buf = init();
-	appendnorm(&buf, "/")!;
-	appendnorm(&buf, "foo")!;
-	appendnorm(&buf, "bar")!;
-	appendnorm(&buf, "baz")!;
-	assert(path(&buf) == "/foo/bar/baz");
-	appendnorm(&buf, "/")!;
-	appendnorm(&buf, "/")!;
-	assert(path(&buf) == "/foo/bar/baz");
-
-	let buf = init();
-	appendnorm(&buf, "/")!;
-	appendnorm(&buf, "/")!;
-	appendnorm(&buf, "/")!;
-	assert(path(&buf) == "/");
-};
diff --git a/pathbuf/ops.ha b/pathbuf/ops.ha
@@ -1,40 +0,0 @@
-use bytes;
-use errors;
-use path;
-use strings;
-
-// Joins several path elements together and appends them to a path buffer.
-export fn join(buf: *buffer, items: str...) (void | errors::overflow) = {
-	for (let i = 0z; i < len(items); i += 1) {
-		const elem = strings::toutf8(items[i]);
-		const tok = bytes::tokenize(elem, [path::PATHSEP]);
-		for (let j = 0z; true; j += 1) {
-			const next = match (bytes::next_token(&tok)) {
-			case let tok: []u8 =>
-				yield tok;
-			case void =>
-				break;
-			};
-			if (len(next) == 0 && j == 0) {
-				// Handles the join("/foo") case as
-				// join("/", "foo");
-				appendnorm(buf, "/")?;
-			};
-			appendnorm(buf, next)?;
-		};
-	};
-};
-
-@test fn join() void = {
-	let buf = init();
-	join(&buf, "foo", "bar", "baz")!;
-	assert(path(&buf) == "foo/bar/baz");
-
-	reset(&buf);
-	join(&buf, "/foo/bar", "baz")!;
-	assert(path(&buf) == "/foo/bar/baz");
-
-	reset(&buf);
-	join(&buf, "/", "foo/bar", "baz")!;
-	assert(path(&buf) == "/foo/bar/baz");
-};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -913,6 +913,7 @@ os_exec() {
 path() {
 	gen_srcs path \
 		'+$(PLATFORM).ha' \
+		buffer.ha \
 		util.ha \
 		join.ha \
 		names.ha \
@@ -920,13 +921,6 @@ path() {
 	gen_ssa path strings bufio bytes io
 }
 
-pathbuf() {
-	gen_srcs pathbuf \
-		buffer.ha \
-		ops.ha
-	gen_ssa pathbuf path
-}
-
 gensrcs_strconv() {
 	gen_srcs strconv \
 		types.ha \
@@ -1188,7 +1182,6 @@ math::random
 os				linux freebsd
 os::exec		linux freebsd
 path
-pathbuf
 shlex
 slice
 sort
diff --git a/stdlib.mk b/stdlib.mk
@@ -500,12 +500,6 @@ stdlib_deps_any+=$(stdlib_path_any)
 stdlib_path_linux=$(stdlib_path_any)
 stdlib_path_freebsd=$(stdlib_path_any)
 
-# gen_lib pathbuf (any)
-stdlib_pathbuf_any=$(HARECACHE)/pathbuf/pathbuf-any.o
-stdlib_deps_any+=$(stdlib_pathbuf_any)
-stdlib_pathbuf_linux=$(stdlib_pathbuf_any)
-stdlib_pathbuf_freebsd=$(stdlib_pathbuf_any)
-
 # gen_lib shlex (any)
 stdlib_shlex_any=$(HARECACHE)/shlex/shlex-any.o
 stdlib_deps_any+=$(stdlib_shlex_any)
@@ -1482,6 +1476,7 @@ $(HARECACHE)/os/exec/os_exec-freebsd.ssa: $(stdlib_os_exec_freebsd_srcs) $(stdli
 # path (+any)
 stdlib_path_any_srcs= \
 	$(STDLIB)/path/+$(PLATFORM).ha \
+	$(STDLIB)/path/buffer.ha \
 	$(STDLIB)/path/util.ha \
 	$(STDLIB)/path/join.ha \
 	$(STDLIB)/path/names.ha \
@@ -1493,17 +1488,6 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st
 	@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npath \
 		-t$(HARECACHE)/path/path.td $(stdlib_path_any_srcs)
 
-# pathbuf (+any)
-stdlib_pathbuf_any_srcs= \
-	$(STDLIB)/pathbuf/buffer.ha \
-	$(STDLIB)/pathbuf/ops.ha
-
-$(HARECACHE)/pathbuf/pathbuf-any.ssa: $(stdlib_pathbuf_any_srcs) $(stdlib_rt) $(stdlib_path_$(PLATFORM))
-	@printf 'HAREC \t$@\n'
-	@mkdir -p $(HARECACHE)/pathbuf
-	@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npathbuf \
-		-t$(HARECACHE)/pathbuf/pathbuf.td $(stdlib_pathbuf_any_srcs)
-
 # shlex (+any)
 stdlib_shlex_any_srcs= \
 	$(STDLIB)/shlex/split.ha
@@ -2268,12 +2252,6 @@ testlib_deps_any+=$(testlib_path_any)
 testlib_path_linux=$(testlib_path_any)
 testlib_path_freebsd=$(testlib_path_any)
 
-# gen_lib pathbuf (any)
-testlib_pathbuf_any=$(TESTCACHE)/pathbuf/pathbuf-any.o
-testlib_deps_any+=$(testlib_pathbuf_any)
-testlib_pathbuf_linux=$(testlib_pathbuf_any)
-testlib_pathbuf_freebsd=$(testlib_pathbuf_any)
-
 # gen_lib shlex (any)
 testlib_shlex_any=$(TESTCACHE)/shlex/shlex-any.o
 testlib_deps_any+=$(testlib_shlex_any)
@@ -3286,6 +3264,7 @@ $(TESTCACHE)/os/exec/os_exec-freebsd.ssa: $(testlib_os_exec_freebsd_srcs) $(test
 # path (+any)
 testlib_path_any_srcs= \
 	$(STDLIB)/path/+$(PLATFORM).ha \
+	$(STDLIB)/path/buffer.ha \
 	$(STDLIB)/path/util.ha \
 	$(STDLIB)/path/join.ha \
 	$(STDLIB)/path/names.ha \
@@ -3297,17 +3276,6 @@ $(TESTCACHE)/path/path-any.ssa: $(testlib_path_any_srcs) $(testlib_rt) $(testlib
 	@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npath \
 		-t$(TESTCACHE)/path/path.td $(testlib_path_any_srcs)
 
-# pathbuf (+any)
-testlib_pathbuf_any_srcs= \
-	$(STDLIB)/pathbuf/buffer.ha \
-	$(STDLIB)/pathbuf/ops.ha
-
-$(TESTCACHE)/pathbuf/pathbuf-any.ssa: $(testlib_pathbuf_any_srcs) $(testlib_rt) $(testlib_path_$(PLATFORM))
-	@printf 'HAREC \t$@\n'
-	@mkdir -p $(TESTCACHE)/pathbuf
-	@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npathbuf \
-		-t$(TESTCACHE)/pathbuf/pathbuf.td $(testlib_pathbuf_any_srcs)
-
 # shlex (+any)
 testlib_shlex_any_srcs= \
 	$(STDLIB)/shlex/split.ha \

	hare The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE

M	path/README	\|	25	+++++++++++++++++++++++++
A	path/buffer.ha	\|	102	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	path/join.ha	\|	105	+++++++++++++++++++++++++++++--------------------------------------------------
D	pathbuf/README	\|	24	------------------------
D	pathbuf/buffer.ha	\|	127	-------------------------------------------------------------------------------
D	pathbuf/ops.ha	\|	40	----------------------------------------
M	scripts/gen-stdlib	\|	9	+--------
M	stdlib.mk	\|	36	++----------------------------------