hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit dd3540c094cd491be69195524149cf6605bc4d0e
parent 9346336ffe19f9216b9148763daf22e739ecec91
Author: Autumn! <autumnull@posteo.net>
Date:   Sun,  7 May 2023 01:43:29 +0000

path: tidy up dirname/basename

Signed-off-by: Autumn! <autumnull@posteo.net>

Diffstat:
Mpath/names.ha | 93-------------------------------------------------------------------------------
Apath/posix.ha | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 1+
Mstdlib.mk | 2++
4 files changed, 81 insertions(+), 93 deletions(-)

diff --git a/path/names.ha b/path/names.ha @@ -5,99 +5,6 @@ use bytes; use strings; -// Returns the directory name for a given path. For a path to a file name, this -// returns the directory in which that file resides. For a path to a directory, -// this returns the path to its parent directory. If the path consists solely of -// the target's path separator, a string to the path is returned unmodified. If -// the path is empty, "." is returned. The return value is either borrowed from -// the input or statically allocated; use [[strings::dup]] to extend its -// lifetime or modify it. -export fn dirname(path: (str | *buffer)) const str = { - let path = getstring(path); - if (path == "") { - return "."; - }; - let trimmed = strings::rtrim(path, PATHSEP: u32: rune); - if (trimmed == "") { - return pathsepstr; - }; - let b = strings::toutf8(trimmed); - let i = match (bytes::rindex(b, PATHSEP)) { - case void => - return "."; - case let z: size => - yield z; - }; - if (i == 0) { - i += 1; - }; - path = strings::fromutf8_unsafe(b[..i]); - path = strings::rtrim(path, PATHSEP: u32: rune); - if (path == "") { - return pathsepstr; - }; - return path; -}; - -@test fn dirname() void = { - assertpatheql(&dirname, pathsepstr, "", "foo"); - assertpatheql(&dirname, pathsepstr, pathsepstr); - assertpatheql(&dirname, pathsepstr, "", "", ""); - assertpatheql(&dirname, pathsepstr, "", "", "", ""); - assertpatheql(&dirname, "foo", "foo", "bar"); - assertpatheql(&dirname, ".", ""); - assertpatheql(&dirname, ".", "foo"); - assertpatheql(&dirname, ".", "foo", ""); - assertpatheql(&dirname, ".", "foo", "", ""); - assertpatheql(&dirname, pathsepstr, "", "", "", "foo"); - assertpatheql(&dirname, pathsepstr, "", "", "", "foo", "", ""); - let expected = strings::concat(pathsepstr, "foo"); - assertpatheql(&dirname, expected, "", "foo", "bar"); - free(expected); - expected = strings::concat(pathsepstr, pathsepstr, "foo"); - assertpatheql(&dirname, expected, "", "", "foo", "", "", "bar", "", ""); - free(expected); -}; - -// Returns the final component of a given path. For a path to a file name, this -// returns the file name. For a path to a directory, this returns the directory -// name. If the path consists solely of the target's path separator, a string of -// the path is returned unmodified. If the path is empty, "." is returned. The -// return value is either borrowed from the input or statically allocated; use -// [[strings::dup]] to extend its lifetime or modify it. -export fn basename(path: (str | *buffer)) const str = { - let path = getstring(path); - if (path == "") { - return "."; - }; - let trimmed = strings::rtrim(path, PATHSEP: u32: rune); - if (trimmed == "") { - return pathsepstr; - }; - let b = strings::toutf8(trimmed); - let i = match (bytes::rindex(b, PATHSEP)) { - case void => - return trimmed; - case let z: size => - yield if (z + 1 < len(b)) z + 1z else 0z; - }; - return strings::fromutf8_unsafe(b[i..]); -}; - -@test fn basename() void = { - assertpatheql(&basename, "bar", "", "foo", "bar"); - assertpatheql(&basename, "foo", "", "foo"); - assertpatheql(&basename, pathsepstr, pathsepstr); - assertpatheql(&basename, pathsepstr, "", "", ""); - assertpatheql(&basename, pathsepstr, "", "", "", ""); - assertpatheql(&basename, "bar", "foo", "bar"); - assertpatheql(&basename, "bar", "foo", "bar", "", ""); - assertpatheql(&basename, "foo", "foo"); - assertpatheql(&basename, "foo", "foo", ""); - assertpatheql(&basename, "bar", "foo", "bar", ""); - assertpatheql(&basename, ".", ""); -}; - // Returns the file name and extension for a path. The return value is borrowed // from the input, see [[strings::dup]] to extend its lifetime. // diff --git a/path/posix.ha b/path/posix.ha @@ -0,0 +1,78 @@ +// License: MPL-2.0 +// (c) 2022 Alexey Yerin <yyp@disroot.org> +// (c) 2021-2022 Drew DeVault <sir@cmpwn.com> +// (c) 2021 Ember Sawady <ecs@d2evs.net> +use bytes; +use strings; + +// These functions have been confined here to POSIX jail. They are +// POSIX-compliant, for their sins, but they do not fit in semantically +// with the other stack-paradigm functions. Hence this POSIX-complaint. +// They are based primarily off of `man 1p basename/dirname`, and secondarily +// off of the examples in `man 3p basename`. + +// A POSIX-compliant implementation of dirname. See the POSIX specification +// for more information. Note that this function does *not* normalize the +// input. The return value is either borrowed from the input or statically +// allocated; use [[strings::dup]] to extend its lifetime. +export fn dirname(path: const str) const str = { + let path = strings::toutf8(path); + if (len(path) == 0) return "."; + + path = bytes::rtrim(path, PATHSEP); + if (len(path) == 0) return pathsepstr; + + match (bytes::rindex(path, PATHSEP)) { + case void => return "."; + case let z: size => path = path[..z]; + }; + path = bytes::rtrim(path, PATHSEP); + + if (len(path) == 0) return pathsepstr; + return strings::fromutf8_unsafe(path); +}; + +// A POSIX-compliant implementation of basename. See the POSIX specification +// for more information. Note that this function does *not* normalize the +// input. The return value is either borrowed from the input or statically +// allocated; use [[strings::dup]] to extend its lifetime. +export fn basename(path: const str) const str = { + let path = strings::toutf8(path); + if (len(path) == 0) return "."; + + path = bytes::rtrim(path, PATHSEP); + if (len(path) == 0) return pathsepstr; + + match (bytes::rindex(path, PATHSEP)) { + case void => void; + case let z: size => path = path[z+1..]; + }; + return strings::fromutf8_unsafe(path); +}; + +@test fn dirname_basename() void = { + const table = [ + // input , dirname , basename + ["usr" , "." , "usr" ], + ["usr/" , "." , "usr" ], + ["" , "." , "." ], + ["/" , "/" , "/" ], + ["//" , "/" , "/" ], // implementation defined + ["///" , "/" , "/" ], + ["/usr/" , "/" , "usr" ], + ["/usr/lib" , "/usr" , "lib" ], + ["//usr//lib//" , "//usr" , "lib" ], + ["/home//dwc//test", "/home//dwc", "test"], + ]; + for (let i = 0z; i < len(table); i += 1) { + // replace '/' with system-dependent path separator + for (let j = 0z; j < len(table[i]); j += 1) { + const bs = strings::toutf8(table[i][j]); + for (let k = 0z; k < len(bs); k += 1) { + if (bs[k] == '/') bs[k] = PATHSEP; + }; + }; + assert(dirname(table[i][0]) == table[i][1]); + assert(basename(table[i][0]) == table[i][2]); + }; +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -1207,6 +1207,7 @@ path() { util.ha \ stack.ha \ names.ha \ + posix.ha \ iter.ha gen_ssa path strings bytes errors } diff --git a/stdlib.mk b/stdlib.mk @@ -1840,6 +1840,7 @@ stdlib_path_any_srcs = \ $(STDLIB)/path/util.ha \ $(STDLIB)/path/stack.ha \ $(STDLIB)/path/names.ha \ + $(STDLIB)/path/posix.ha \ $(STDLIB)/path/iter.ha $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_strings_$(PLATFORM)) $(stdlib_bytes_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) @@ -4115,6 +4116,7 @@ testlib_path_any_srcs = \ $(STDLIB)/path/util.ha \ $(STDLIB)/path/stack.ha \ $(STDLIB)/path/names.ha \ + $(STDLIB)/path/posix.ha \ $(STDLIB)/path/iter.ha $(TESTCACHE)/path/path-any.ssa: $(testlib_path_any_srcs) $(testlib_rt) $(testlib_strings_$(PLATFORM)) $(testlib_bytes_$(PLATFORM)) $(testlib_errors_$(PLATFORM))