commit d2e78b43d5e05701603e8635cbd091cc2450ce5c
parent 7db2bed83d7233a9a95e9e272994932d618ab9b5
Author: Autumn! <autumnull@posteo.net>
Date: Sun, 7 May 2023 01:43:26 +0000
path: remove trailing separators during normalization
this commit also rewrites the path appending functions from scratch
in order to make the logic neater.
Signed-off-by: Autumn! <autumnull@posteo.net>
Diffstat:
M | path/README | | | 14 | ++------------ |
M | path/buffer.ha | | | 175 | +------------------------------------------------------------------------------ |
M | path/stack.ha | | | 188 | ++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------- |
M | path/util.ha | | | 2 | ++ |
4 files changed, 125 insertions(+), 254 deletions(-)
diff --git a/path/README b/path/README
@@ -12,18 +12,8 @@ normalized, which is to say that it will not include any of the following:
- Redundant path separators
- Any "." components, except in the case of "."
-Assuming that PATHSEP is '/', "/usr//bin/../bin/./hare" becomes
-"/usr/bin/hare" and "../../foo/bar" is unchanged. The path will only end in
-a slash if the last item which was pushed ended in a slash, like so:
-
- let buf = path::init("foo", "bar")!;
- assert(path::string(&buf) == "foo/bar");
-
- path::set(&buf, "foo", "bar/")!;
- assert(path::string(&buf) == "foo/bar/");
-
- path::set(&buf, "foo", "bar", "/")!;
- assert(path::string(&buf) == "foo/bar/");
+Assuming that [[PATHSEP]] is '/', "/usr//bin/../bin/./hare/" becomes
+"/usr/bin/hare" and "../../foo/bar" is unchanged.
The buffer object includes an array of length [[PATH_MAX]], which can be
somewhat large; on Linux it's 4096 bytes. You can allocate this on the stack in
diff --git a/path/buffer.ha b/path/buffer.ha
@@ -28,177 +28,6 @@ export fn set(buf: *buffer, items: str...) (str | errors::overflow) = {
// The return value is borrowed from the buffer. Use [[strings::dup]] to
// extend the lifetime of the string.
export fn string(buf: *buffer) str = {
- const value = strings::fromutf8_unsafe(buf.buf[..buf.end]);
- if (value == "") {
- return ".";
- };
- return value;
-};
-
-const dot: []u8 = ['.'];
-const dotdot: []u8 = ['.', '.'];
-const dotdotslash: []u8 = ['.', '.', PATHSEP];
-
-// Normalizes and appends a path component to a buffer.
-//
-// Invariant: elem must either be equal to [PATHSEP], or contain no path
-// separators.
-fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = {
- const elem = match (elem) {
- case let elem: []u8 =>
- yield elem;
- case let string: str =>
- yield strings::toutf8(string);
- };
- if (len(elem) == 1 && elem[0] == PATHSEP) {
- if (buf.end == 0) {
- buf.buf[0] = PATHSEP;
- buf.end += 1;
- return;
- };
- return;
- } else if (bytes::equal(elem, dot)) {
- return;
- } else if (bytes::equal(elem, dotdot)) {
- return parent(buf);
- };
- return doappend(buf, elem);
-};
-
-// Moves the buffer to the parent of the current directory.
-fn parent(buf: *buffer) (void | errors::overflow) = {
- let ascending = true;
- const iter = iter(buf);
- for (true) {
- match (next(&iter)) {
- case let elem: str =>
- if (elem != "..") {
- ascending = false;
- break;
- };
- case void =>
- break;
- };
- };
- if (ascending) {
- // If we are appending ".." to a path which is entirely composed
- // of ".." elements, then we want to append it normally, so that
- // "../.." becomes "../../.." instead of "..".
- return doappend(buf, dotdot);
- };
- // XXX: This is not super efficient
- const name = dirname(string(buf));
- buf.end = 0;
- push(buf, name)?;
-};
-
-fn doappend(buf: *buffer, elem: []u8) (void | errors::overflow) = {
- if (buf.end + len(elem) + 1 >= PATH_MAX) {
- return errors::overflow;
- };
- if (buf.end > 0 && buf.buf[buf.end - 1] != PATHSEP) {
- buf.buf[buf.end] = PATHSEP;
- buf.end += 1;
- };
- buf.buf[buf.end..buf.end + len(elem)] = elem;
- buf.end += len(elem);
-};
-
-@test fn appendnorm() void = {
- let buf = init()!;
- assert(string(&buf) == ".");
- appendnorm(&buf, "foo")!;
- appendnorm(&buf, "bar")!;
- appendnorm(&buf, "baz")!;
- let s = strings::join(pathsepstr, "foo", "bar", "baz");
- assert(string(&buf) == s);
- appendnorm(&buf, ".")!;
- appendnorm(&buf, "bad")!;
- appendnorm(&buf, ".")!;
- free(s);
- s = strings::join(pathsepstr, "foo", "bar", "baz", "bad");
- assert(string(&buf) == s);
- free(s);
-
- buf.end = 0;
- appendnorm(&buf, pathsepstr)!;
- appendnorm(&buf, "foo")!;
- appendnorm(&buf, "bar")!;
- appendnorm(&buf, "baz")!;
- s = strings::join(pathsepstr, "", "foo", "bar", "baz");
- assert(string(&buf) == s);
- appendnorm(&buf, pathsepstr)!;
- appendnorm(&buf, pathsepstr)!;
- assert(string(&buf) == s);
- free(s);
-
- buf.end = 0;
- appendnorm(&buf, pathsepstr)!;
- appendnorm(&buf, pathsepstr)!;
- appendnorm(&buf, pathsepstr)!;
- assert(string(&buf) == pathsepstr);
-
- buf.end = 0;
- appendnorm(&buf, ".")!;
- appendnorm(&buf, "foo")!;
- assert(string(&buf) == "foo");
- appendnorm(&buf, "..")!;
- assert(string(&buf) == ".");
- appendnorm(&buf, "..")!;
- assert(string(&buf) == "..");
-
- buf.end = 0;
- appendnorm(&buf, "..")!;
- assert(string(&buf) == "..");
- appendnorm(&buf, "..")!;
- s = strings::join(pathsepstr, "..", "..");
- assert(string(&buf) == s);
- free(s);
- appendnorm(&buf, "..")!;
- s = strings::join(pathsepstr, "..", "..", "..");
- assert(string(&buf) == s);
- free(s);
-
- buf.end = 0;
- appendnorm(&buf, "foo")!;
- appendnorm(&buf, "bar")!;
- s = strings::join(pathsepstr, "foo", "bar");
- assert(string(&buf) == s);
- free(s);
- appendnorm(&buf, "..")!;
- assert(string(&buf) == "foo");
- appendnorm(&buf, "..")!;
- assert(string(&buf) == ".");
- appendnorm(&buf, "..")!;
- assert(string(&buf) == "..");
- appendnorm(&buf, "..")!;
- s = strings::join(pathsepstr, "..", "..");
- assert(string(&buf) == s);
- free(s);
-
- set(&buf, "foo", "bar")!;
- s = strings::join(pathsepstr, "foo", "bar");
- assert(string(&buf) == s);
- free(s);
- s = strings::concat("bar", pathsepstr);
- set(&buf, "foo", s)!;
- free(s);
- s = strings::join(pathsepstr, "foo", "bar", "");
- assert(string(&buf) == s);
- set(&buf, "foo", "bar", pathsepstr)!;
- assert(string(&buf) == s);
- free(s);
- s = strings::concat(pathsepstr, "baz");
- push(&buf, s)!;
- free(s);
- s = strings::join(pathsepstr, "foo", "bar", "baz");
- assert(string(&buf) == s);
- free(s);
-
- buf.end = 0;
- appendnorm(&buf, "a")!;
- appendnorm(&buf, "b")!;
- s = strings::join(pathsepstr, "a", "b");
- assert(string(&buf) == s);
- free(s);
+ if (buf.end == 0) return ".";
+ return strings::fromutf8_unsafe(buf.buf[..buf.end]);
};
diff --git a/path/stack.ha b/path/stack.ha
@@ -4,87 +4,149 @@ use bytes;
use errors;
use strings;
-// Joins path elements onto the end of a path buffer.
+// Appends path elements onto the end of a path buffer.
// Returns the new string value of the path.
export fn push(buf: *buffer, items: str...) (str | errors::overflow) = {
for (let i = 0z; i < len(items); i += 1) {
- const elem = strings::toutf8(items[i]);
- const tok = bytes::tokenize(elem, pathsep);
- for (let j = 0z; true; j += 1) {
- const next = match (bytes::next_token(&tok)) {
- case let tok: []u8 =>
- yield tok;
- case void =>
- break;
+ let elem = strings::toutf8(items[i]);
+ for (true) match (bytes::index(elem, PATHSEP)) {
+ case void =>
+ buf.end = appendnorm(buf, elem)?;
+ break;
+ case let j: size =>
+ if (j == 0 && buf.end == 0) {
+ buf.buf[0] = PATHSEP;
+ buf.end = 1;
+ } else {
+ buf.end = appendnorm(buf, elem[..j])?;
};
- if (len(next) == 0 && j == 0) {
- // Handles the push("/foo") case as
- // push("/", "foo");
- appendnorm(buf, pathsepstr)?;
- };
- appendnorm(buf, next)?;
+ elem = elem[j+1..];
};
};
return string(buf);
};
+const dot: []u8 = ['.'];
+const dotdot: []u8 = ['.', '.'];
+
+// append a path segment to a buffer, preserving normalization.
+// seg must not contain any PATHSEPs. if you need to make the path
+// absolute, you should do that manually. returns the new end of the buffer.
+// x + => x
+// x + . => x
+// / + .. => /
+// + .. => ..
+// x/.. + .. => x/../..
+// x/y + .. => x
+// x + y => x/y
+fn appendnorm(buf: *buffer, seg: []u8) (size | errors::overflow) = {
+ if (len(seg) == 0 || bytes::equal(dot, seg)) return buf.end;
+ if (bytes::equal(dotdot, seg)) {
+ if (isroot(buf)) return buf.end;
+ const isep = match (bytes::rindex(buf.buf[..buf.end], PATHSEP)) {
+ case void => yield 0z;
+ case let i: size => yield i + 1;
+ };
+ if (buf.end == 0 || bytes::equal(buf.buf[isep..buf.end], dotdot)) {
+ return appendlit(buf, dotdot)?;
+ } else {
+ return if (isep <= 1) isep else isep - 1;
+ };
+ } else {
+ return appendlit(buf, seg)?;
+ };
+};
+
+// append a segment to a buffer, *without* preserving normalization.
+// returns the new end of the buffer
+fn appendlit(buf: *buffer, bs: []u8) (size | errors::overflow) = {
+ let newend = buf.end;
+ if (buf.end == 0 || isroot(buf)) {
+ if (PATH_MAX < buf.end + len(bs)) return errors::overflow;
+ } else {
+ if (PATH_MAX < buf.end + len(bs) + 1) return errors::overflow;
+ buf.buf[buf.end] = PATHSEP;
+ newend += 1;
+ };
+ buf.buf[newend..newend+len(bs)] = bs;
+ return newend + len(bs);
+};
+
+
@test fn push() void = {
let buf = init()!;
- push(&buf, "foo", "bar", "baz")!;
- let s = strings::join(pathsepstr, "foo", "bar", "baz");
- assert(string(&buf) == s);
- free(s);
+ assert(string(&buf) == ".");
- buf.end = 0;
- s = strings::join(pathsepstr, "", "foo", "bar");
- push(&buf, s, "baz")!;
- free(s);
- s = strings::join(pathsepstr, "", "foo", "bar", "baz");
- assert(string(&buf) == s);
- free(s);
+ // current dir invariants
+ assert(push(&buf, "")! == ".");
+ assert(push(&buf, ".")! == ".");
- buf.end = 0;
- s = strings::join(pathsepstr, "foo", "bar");
- push(&buf, pathsepstr, s, "baz")!;
- free(s);
- s = strings::join(pathsepstr, "", "foo", "bar", "baz");
- assert(string(&buf) == s);
- free(s);
+ // parent dir invariants
+ assert(push(&buf, "..")! == "..");
+ assert(push(&buf, "")! == "..");
+ assert(push(&buf, ".")! == "..");
+ assert(push(&buf, pathsepstr)! == "..");
- buf.end = 0;
- s = strings::join(pathsepstr, ".", "foo", "bar");
- push(&buf, s)!;
- free(s);
- s = strings::join(pathsepstr, "foo", "bar");
- assert(string(&buf) == s);
- free(s);
+ assert(set(&buf)! == ".");
+ // root dir invariants
+ assert(push(&buf, pathsepstr)! == pathsepstr);
+ assert(push(&buf, "")! == pathsepstr);
+ assert(push(&buf, ".")! == pathsepstr);
+ assert(push(&buf, "..")! == pathsepstr);
+ assert(push(&buf, pathsepstr)! == pathsepstr);
+
+ assert(set(&buf)! == ".");
+ // regular path and parent
+ assert(push(&buf, "foo")! == "foo");
+ assert(push(&buf, ".")! == "foo");
+ assert(push(&buf, pathsepstr)! == "foo");
+ assert(push(&buf, "..")! == ".");
+
+ // multiple segments
+ push(&buf, "a", "b")!;
+ assert(bytes::equal(buf.buf[..buf.end], ['a', PATHSEP, 'b']));
+ push(&buf, "..", "c")!;
+ assert(bytes::equal(buf.buf[..buf.end], ['a', PATHSEP, 'c']));
+ assert(push(&buf, "..")! == "a");
+ push(&buf, strings::fromutf8([PATHSEP, 'd'])!)!;
+ assert(bytes::equal(buf.buf[..buf.end], ['a', PATHSEP, 'd']));
+ assert(push(&buf, "..", "..")! == ".");
+
+ // multiple segments, absolute
+ push(&buf, pathsepstr, "a", "b")!;
+ assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a', PATHSEP, 'b']));
+ push(&buf, "..", "c")!;
+ assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a', PATHSEP, 'c']));
+ push(&buf, "..")!;
+ assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a']));
+ push(&buf, strings::fromutf8([PATHSEP, 'd'])!)!;
+ assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a', PATHSEP, 'd']));
+ assert(push(&buf, "..", "..", "..")! == pathsepstr);
};
// Examine the final path segment in a buffer.
// Returns void if the path is empty or is the root dir.
-export fn peek(buf: *const buffer) (str | void) = {
- let trimmed = bytes::rtrim(buf.buf[..buf.end], PATHSEP);
- if (len(trimmed) == 0) return void;
- match (bytes::rindex(trimmed, PATHSEP)) {
- case void =>
- return strings::fromutf8_unsafe(buf.buf[..buf.end]);
- case let i: size =>
- return strings::fromutf8_unsafe(buf.buf[i+1..buf.end]);
- };
-};
+export fn peek(buf: *const buffer) (str | void) = split(buf).1;
// Remove and return the final path segment in a buffer.
// Returns void if the path is empty or is the root dir.
export fn pop(buf: *buffer) (str | void) = {
- let trimmed = bytes::rtrim(buf.buf[..buf.end], PATHSEP);
- if (len(trimmed) == 0) return void;
- match (bytes::rindex(trimmed, PATHSEP)) {
+ const (end, res) = split(buf);
+ buf.end = end;
+ return res;
+};
+
+// helper function for pop/peek, returns (new end of buffer, result)
+fn split(buf: *buffer) (size, (str | void)) = {
+ if (buf.end == 0 || isroot(buf)) return (buf.end, void);
+ match (bytes::rindex(buf.buf[..buf.end], PATHSEP)) {
case void =>
- defer buf.end = 0;
- return strings::fromutf8_unsafe(buf.buf[..buf.end]);
+ return (0z, strings::fromutf8_unsafe(buf.buf[..buf.end]));
case let i: size =>
- defer buf.end = i+1;
- return strings::fromutf8_unsafe(buf.buf[i+1..buf.end]);
+ return (
+ if (i == 0) 1z else i,
+ strings::fromutf8_unsafe(buf.buf[i+1..buf.end]),
+ );
};
};
@@ -111,18 +173,6 @@ export fn pop(buf: *buffer) (str | void) = {
push(&buf, pathsepstr, "foo")!;
assert(pop(&buf) as str == "foo");
assert(string(&buf) == pathsepstr);
-
- // relative dir
- buf.end = 0;
- push(&buf, "foo/")!;
- assert(pop(&buf) as str == "foo/");
- assert(string(&buf) == ".");
-
- // abs dir
- buf.end = 0;
- push(&buf, pathsepstr, "foo/")!;
- assert(pop(&buf) as str == "foo/");
- assert(string(&buf) == pathsepstr);
};
// Joins a list of path components together, normalizes it, and returns the
diff --git a/path/util.ha b/path/util.ha
@@ -25,3 +25,5 @@ export fn abs(path: (*buffer | str)) bool = {
let path = getbytes(path);
return 0 < len(path) && path[0] == PATHSEP;
};
+
+export fn isroot(buf: *buffer) bool = buf.end == 1 && buf.buf[0] == PATHSEP;