commit dd3540c094cd491be69195524149cf6605bc4d0e
parent 9346336ffe19f9216b9148763daf22e739ecec91
Author: Autumn! <autumnull@posteo.net>
Date: Sun, 7 May 2023 01:43:29 +0000
path: tidy up dirname/basename
Signed-off-by: Autumn! <autumnull@posteo.net>
Diffstat:
4 files changed, 81 insertions(+), 93 deletions(-)
diff --git a/path/names.ha b/path/names.ha
@@ -5,99 +5,6 @@
use bytes;
use strings;
-// Returns the directory name for a given path. For a path to a file name, this
-// returns the directory in which that file resides. For a path to a directory,
-// this returns the path to its parent directory. If the path consists solely of
-// the target's path separator, a string to the path is returned unmodified. If
-// the path is empty, "." is returned. The return value is either borrowed from
-// the input or statically allocated; use [[strings::dup]] to extend its
-// lifetime or modify it.
-export fn dirname(path: (str | *buffer)) const str = {
- let path = getstring(path);
- if (path == "") {
- return ".";
- };
- let trimmed = strings::rtrim(path, PATHSEP: u32: rune);
- if (trimmed == "") {
- return pathsepstr;
- };
- let b = strings::toutf8(trimmed);
- let i = match (bytes::rindex(b, PATHSEP)) {
- case void =>
- return ".";
- case let z: size =>
- yield z;
- };
- if (i == 0) {
- i += 1;
- };
- path = strings::fromutf8_unsafe(b[..i]);
- path = strings::rtrim(path, PATHSEP: u32: rune);
- if (path == "") {
- return pathsepstr;
- };
- return path;
-};
-
-@test fn dirname() void = {
- assertpatheql(&dirname, pathsepstr, "", "foo");
- assertpatheql(&dirname, pathsepstr, pathsepstr);
- assertpatheql(&dirname, pathsepstr, "", "", "");
- assertpatheql(&dirname, pathsepstr, "", "", "", "");
- assertpatheql(&dirname, "foo", "foo", "bar");
- assertpatheql(&dirname, ".", "");
- assertpatheql(&dirname, ".", "foo");
- assertpatheql(&dirname, ".", "foo", "");
- assertpatheql(&dirname, ".", "foo", "", "");
- assertpatheql(&dirname, pathsepstr, "", "", "", "foo");
- assertpatheql(&dirname, pathsepstr, "", "", "", "foo", "", "");
- let expected = strings::concat(pathsepstr, "foo");
- assertpatheql(&dirname, expected, "", "foo", "bar");
- free(expected);
- expected = strings::concat(pathsepstr, pathsepstr, "foo");
- assertpatheql(&dirname, expected, "", "", "foo", "", "", "bar", "", "");
- free(expected);
-};
-
-// Returns the final component of a given path. For a path to a file name, this
-// returns the file name. For a path to a directory, this returns the directory
-// name. If the path consists solely of the target's path separator, a string of
-// the path is returned unmodified. If the path is empty, "." is returned. The
-// return value is either borrowed from the input or statically allocated; use
-// [[strings::dup]] to extend its lifetime or modify it.
-export fn basename(path: (str | *buffer)) const str = {
- let path = getstring(path);
- if (path == "") {
- return ".";
- };
- let trimmed = strings::rtrim(path, PATHSEP: u32: rune);
- if (trimmed == "") {
- return pathsepstr;
- };
- let b = strings::toutf8(trimmed);
- let i = match (bytes::rindex(b, PATHSEP)) {
- case void =>
- return trimmed;
- case let z: size =>
- yield if (z + 1 < len(b)) z + 1z else 0z;
- };
- return strings::fromutf8_unsafe(b[i..]);
-};
-
-@test fn basename() void = {
- assertpatheql(&basename, "bar", "", "foo", "bar");
- assertpatheql(&basename, "foo", "", "foo");
- assertpatheql(&basename, pathsepstr, pathsepstr);
- assertpatheql(&basename, pathsepstr, "", "", "");
- assertpatheql(&basename, pathsepstr, "", "", "", "");
- assertpatheql(&basename, "bar", "foo", "bar");
- assertpatheql(&basename, "bar", "foo", "bar", "", "");
- assertpatheql(&basename, "foo", "foo");
- assertpatheql(&basename, "foo", "foo", "");
- assertpatheql(&basename, "bar", "foo", "bar", "");
- assertpatheql(&basename, ".", "");
-};
-
// Returns the file name and extension for a path. The return value is borrowed
// from the input, see [[strings::dup]] to extend its lifetime.
//
diff --git a/path/posix.ha b/path/posix.ha
@@ -0,0 +1,78 @@
+// License: MPL-2.0
+// (c) 2022 Alexey Yerin <yyp@disroot.org>
+// (c) 2021-2022 Drew DeVault <sir@cmpwn.com>
+// (c) 2021 Ember Sawady <ecs@d2evs.net>
+use bytes;
+use strings;
+
+// These functions have been confined here to POSIX jail. They are
+// POSIX-compliant, for their sins, but they do not fit in semantically
+// with the other stack-paradigm functions. Hence this POSIX-complaint.
+// They are based primarily off of `man 1p basename/dirname`, and secondarily
+// off of the examples in `man 3p basename`.
+
+// A POSIX-compliant implementation of dirname. See the POSIX specification
+// for more information. Note that this function does *not* normalize the
+// input. The return value is either borrowed from the input or statically
+// allocated; use [[strings::dup]] to extend its lifetime.
+export fn dirname(path: const str) const str = {
+ let path = strings::toutf8(path);
+ if (len(path) == 0) return ".";
+
+ path = bytes::rtrim(path, PATHSEP);
+ if (len(path) == 0) return pathsepstr;
+
+ match (bytes::rindex(path, PATHSEP)) {
+ case void => return ".";
+ case let z: size => path = path[..z];
+ };
+ path = bytes::rtrim(path, PATHSEP);
+
+ if (len(path) == 0) return pathsepstr;
+ return strings::fromutf8_unsafe(path);
+};
+
+// A POSIX-compliant implementation of basename. See the POSIX specification
+// for more information. Note that this function does *not* normalize the
+// input. The return value is either borrowed from the input or statically
+// allocated; use [[strings::dup]] to extend its lifetime.
+export fn basename(path: const str) const str = {
+ let path = strings::toutf8(path);
+ if (len(path) == 0) return ".";
+
+ path = bytes::rtrim(path, PATHSEP);
+ if (len(path) == 0) return pathsepstr;
+
+ match (bytes::rindex(path, PATHSEP)) {
+ case void => void;
+ case let z: size => path = path[z+1..];
+ };
+ return strings::fromutf8_unsafe(path);
+};
+
+@test fn dirname_basename() void = {
+ const table = [
+ // input , dirname , basename
+ ["usr" , "." , "usr" ],
+ ["usr/" , "." , "usr" ],
+ ["" , "." , "." ],
+ ["/" , "/" , "/" ],
+ ["//" , "/" , "/" ], // implementation defined
+ ["///" , "/" , "/" ],
+ ["/usr/" , "/" , "usr" ],
+ ["/usr/lib" , "/usr" , "lib" ],
+ ["//usr//lib//" , "//usr" , "lib" ],
+ ["/home//dwc//test", "/home//dwc", "test"],
+ ];
+ for (let i = 0z; i < len(table); i += 1) {
+ // replace '/' with system-dependent path separator
+ for (let j = 0z; j < len(table[i]); j += 1) {
+ const bs = strings::toutf8(table[i][j]);
+ for (let k = 0z; k < len(bs); k += 1) {
+ if (bs[k] == '/') bs[k] = PATHSEP;
+ };
+ };
+ assert(dirname(table[i][0]) == table[i][1]);
+ assert(basename(table[i][0]) == table[i][2]);
+ };
+};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -1207,6 +1207,7 @@ path() {
util.ha \
stack.ha \
names.ha \
+ posix.ha \
iter.ha
gen_ssa path strings bytes errors
}
diff --git a/stdlib.mk b/stdlib.mk
@@ -1840,6 +1840,7 @@ stdlib_path_any_srcs = \
$(STDLIB)/path/util.ha \
$(STDLIB)/path/stack.ha \
$(STDLIB)/path/names.ha \
+ $(STDLIB)/path/posix.ha \
$(STDLIB)/path/iter.ha
$(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_strings_$(PLATFORM)) $(stdlib_bytes_$(PLATFORM)) $(stdlib_errors_$(PLATFORM))
@@ -4115,6 +4116,7 @@ testlib_path_any_srcs = \
$(STDLIB)/path/util.ha \
$(STDLIB)/path/stack.ha \
$(STDLIB)/path/names.ha \
+ $(STDLIB)/path/posix.ha \
$(STDLIB)/path/iter.ha
$(TESTCACHE)/path/path-any.ssa: $(testlib_path_any_srcs) $(testlib_rt) $(testlib_strings_$(PLATFORM)) $(testlib_bytes_$(PLATFORM)) $(testlib_errors_$(PLATFORM))