hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit d2e78b43d5e05701603e8635cbd091cc2450ce5c
parent 7db2bed83d7233a9a95e9e272994932d618ab9b5
Author: Autumn! <autumnull@posteo.net>
Date:   Sun,  7 May 2023 01:43:26 +0000

path: remove trailing separators during normalization

this commit also rewrites the path appending functions from scratch
in order to make the logic neater.

Signed-off-by: Autumn! <autumnull@posteo.net>

Diffstat:
Mpath/README | 14++------------
Mpath/buffer.ha | 175+------------------------------------------------------------------------------
Mpath/stack.ha | 188++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mpath/util.ha | 2++
4 files changed, 125 insertions(+), 254 deletions(-)

diff --git a/path/README b/path/README @@ -12,18 +12,8 @@ normalized, which is to say that it will not include any of the following: - Redundant path separators - Any "." components, except in the case of "." -Assuming that PATHSEP is '/', "/usr//bin/../bin/./hare" becomes -"/usr/bin/hare" and "../../foo/bar" is unchanged. The path will only end in -a slash if the last item which was pushed ended in a slash, like so: - - let buf = path::init("foo", "bar")!; - assert(path::string(&buf) == "foo/bar"); - - path::set(&buf, "foo", "bar/")!; - assert(path::string(&buf) == "foo/bar/"); - - path::set(&buf, "foo", "bar", "/")!; - assert(path::string(&buf) == "foo/bar/"); +Assuming that [[PATHSEP]] is '/', "/usr//bin/../bin/./hare/" becomes +"/usr/bin/hare" and "../../foo/bar" is unchanged. The buffer object includes an array of length [[PATH_MAX]], which can be somewhat large; on Linux it's 4096 bytes. You can allocate this on the stack in diff --git a/path/buffer.ha b/path/buffer.ha @@ -28,177 +28,6 @@ export fn set(buf: *buffer, items: str...) (str | errors::overflow) = { // The return value is borrowed from the buffer. Use [[strings::dup]] to // extend the lifetime of the string. export fn string(buf: *buffer) str = { - const value = strings::fromutf8_unsafe(buf.buf[..buf.end]); - if (value == "") { - return "."; - }; - return value; -}; - -const dot: []u8 = ['.']; -const dotdot: []u8 = ['.', '.']; -const dotdotslash: []u8 = ['.', '.', PATHSEP]; - -// Normalizes and appends a path component to a buffer. -// -// Invariant: elem must either be equal to [PATHSEP], or contain no path -// separators. -fn appendnorm(buf: *buffer, elem: (str | []u8)) (void | errors::overflow) = { - const elem = match (elem) { - case let elem: []u8 => - yield elem; - case let string: str => - yield strings::toutf8(string); - }; - if (len(elem) == 1 && elem[0] == PATHSEP) { - if (buf.end == 0) { - buf.buf[0] = PATHSEP; - buf.end += 1; - return; - }; - return; - } else if (bytes::equal(elem, dot)) { - return; - } else if (bytes::equal(elem, dotdot)) { - return parent(buf); - }; - return doappend(buf, elem); -}; - -// Moves the buffer to the parent of the current directory. -fn parent(buf: *buffer) (void | errors::overflow) = { - let ascending = true; - const iter = iter(buf); - for (true) { - match (next(&iter)) { - case let elem: str => - if (elem != "..") { - ascending = false; - break; - }; - case void => - break; - }; - }; - if (ascending) { - // If we are appending ".." to a path which is entirely composed - // of ".." elements, then we want to append it normally, so that - // "../.." becomes "../../.." instead of "..". - return doappend(buf, dotdot); - }; - // XXX: This is not super efficient - const name = dirname(string(buf)); - buf.end = 0; - push(buf, name)?; -}; - -fn doappend(buf: *buffer, elem: []u8) (void | errors::overflow) = { - if (buf.end + len(elem) + 1 >= PATH_MAX) { - return errors::overflow; - }; - if (buf.end > 0 && buf.buf[buf.end - 1] != PATHSEP) { - buf.buf[buf.end] = PATHSEP; - buf.end += 1; - }; - buf.buf[buf.end..buf.end + len(elem)] = elem; - buf.end += len(elem); -}; - -@test fn appendnorm() void = { - let buf = init()!; - assert(string(&buf) == "."); - appendnorm(&buf, "foo")!; - appendnorm(&buf, "bar")!; - appendnorm(&buf, "baz")!; - let s = strings::join(pathsepstr, "foo", "bar", "baz"); - assert(string(&buf) == s); - appendnorm(&buf, ".")!; - appendnorm(&buf, "bad")!; - appendnorm(&buf, ".")!; - free(s); - s = strings::join(pathsepstr, "foo", "bar", "baz", "bad"); - assert(string(&buf) == s); - free(s); - - buf.end = 0; - appendnorm(&buf, pathsepstr)!; - appendnorm(&buf, "foo")!; - appendnorm(&buf, "bar")!; - appendnorm(&buf, "baz")!; - s = strings::join(pathsepstr, "", "foo", "bar", "baz"); - assert(string(&buf) == s); - appendnorm(&buf, pathsepstr)!; - appendnorm(&buf, pathsepstr)!; - assert(string(&buf) == s); - free(s); - - buf.end = 0; - appendnorm(&buf, pathsepstr)!; - appendnorm(&buf, pathsepstr)!; - appendnorm(&buf, pathsepstr)!; - assert(string(&buf) == pathsepstr); - - buf.end = 0; - appendnorm(&buf, ".")!; - appendnorm(&buf, "foo")!; - assert(string(&buf) == "foo"); - appendnorm(&buf, "..")!; - assert(string(&buf) == "."); - appendnorm(&buf, "..")!; - assert(string(&buf) == ".."); - - buf.end = 0; - appendnorm(&buf, "..")!; - assert(string(&buf) == ".."); - appendnorm(&buf, "..")!; - s = strings::join(pathsepstr, "..", ".."); - assert(string(&buf) == s); - free(s); - appendnorm(&buf, "..")!; - s = strings::join(pathsepstr, "..", "..", ".."); - assert(string(&buf) == s); - free(s); - - buf.end = 0; - appendnorm(&buf, "foo")!; - appendnorm(&buf, "bar")!; - s = strings::join(pathsepstr, "foo", "bar"); - assert(string(&buf) == s); - free(s); - appendnorm(&buf, "..")!; - assert(string(&buf) == "foo"); - appendnorm(&buf, "..")!; - assert(string(&buf) == "."); - appendnorm(&buf, "..")!; - assert(string(&buf) == ".."); - appendnorm(&buf, "..")!; - s = strings::join(pathsepstr, "..", ".."); - assert(string(&buf) == s); - free(s); - - set(&buf, "foo", "bar")!; - s = strings::join(pathsepstr, "foo", "bar"); - assert(string(&buf) == s); - free(s); - s = strings::concat("bar", pathsepstr); - set(&buf, "foo", s)!; - free(s); - s = strings::join(pathsepstr, "foo", "bar", ""); - assert(string(&buf) == s); - set(&buf, "foo", "bar", pathsepstr)!; - assert(string(&buf) == s); - free(s); - s = strings::concat(pathsepstr, "baz"); - push(&buf, s)!; - free(s); - s = strings::join(pathsepstr, "foo", "bar", "baz"); - assert(string(&buf) == s); - free(s); - - buf.end = 0; - appendnorm(&buf, "a")!; - appendnorm(&buf, "b")!; - s = strings::join(pathsepstr, "a", "b"); - assert(string(&buf) == s); - free(s); + if (buf.end == 0) return "."; + return strings::fromutf8_unsafe(buf.buf[..buf.end]); }; diff --git a/path/stack.ha b/path/stack.ha @@ -4,87 +4,149 @@ use bytes; use errors; use strings; -// Joins path elements onto the end of a path buffer. +// Appends path elements onto the end of a path buffer. // Returns the new string value of the path. export fn push(buf: *buffer, items: str...) (str | errors::overflow) = { for (let i = 0z; i < len(items); i += 1) { - const elem = strings::toutf8(items[i]); - const tok = bytes::tokenize(elem, pathsep); - for (let j = 0z; true; j += 1) { - const next = match (bytes::next_token(&tok)) { - case let tok: []u8 => - yield tok; - case void => - break; + let elem = strings::toutf8(items[i]); + for (true) match (bytes::index(elem, PATHSEP)) { + case void => + buf.end = appendnorm(buf, elem)?; + break; + case let j: size => + if (j == 0 && buf.end == 0) { + buf.buf[0] = PATHSEP; + buf.end = 1; + } else { + buf.end = appendnorm(buf, elem[..j])?; }; - if (len(next) == 0 && j == 0) { - // Handles the push("/foo") case as - // push("/", "foo"); - appendnorm(buf, pathsepstr)?; - }; - appendnorm(buf, next)?; + elem = elem[j+1..]; }; }; return string(buf); }; +const dot: []u8 = ['.']; +const dotdot: []u8 = ['.', '.']; + +// append a path segment to a buffer, preserving normalization. +// seg must not contain any PATHSEPs. if you need to make the path +// absolute, you should do that manually. returns the new end of the buffer. +// x + => x +// x + . => x +// / + .. => / +// + .. => .. +// x/.. + .. => x/../.. +// x/y + .. => x +// x + y => x/y +fn appendnorm(buf: *buffer, seg: []u8) (size | errors::overflow) = { + if (len(seg) == 0 || bytes::equal(dot, seg)) return buf.end; + if (bytes::equal(dotdot, seg)) { + if (isroot(buf)) return buf.end; + const isep = match (bytes::rindex(buf.buf[..buf.end], PATHSEP)) { + case void => yield 0z; + case let i: size => yield i + 1; + }; + if (buf.end == 0 || bytes::equal(buf.buf[isep..buf.end], dotdot)) { + return appendlit(buf, dotdot)?; + } else { + return if (isep <= 1) isep else isep - 1; + }; + } else { + return appendlit(buf, seg)?; + }; +}; + +// append a segment to a buffer, *without* preserving normalization. +// returns the new end of the buffer +fn appendlit(buf: *buffer, bs: []u8) (size | errors::overflow) = { + let newend = buf.end; + if (buf.end == 0 || isroot(buf)) { + if (PATH_MAX < buf.end + len(bs)) return errors::overflow; + } else { + if (PATH_MAX < buf.end + len(bs) + 1) return errors::overflow; + buf.buf[buf.end] = PATHSEP; + newend += 1; + }; + buf.buf[newend..newend+len(bs)] = bs; + return newend + len(bs); +}; + + @test fn push() void = { let buf = init()!; - push(&buf, "foo", "bar", "baz")!; - let s = strings::join(pathsepstr, "foo", "bar", "baz"); - assert(string(&buf) == s); - free(s); + assert(string(&buf) == "."); - buf.end = 0; - s = strings::join(pathsepstr, "", "foo", "bar"); - push(&buf, s, "baz")!; - free(s); - s = strings::join(pathsepstr, "", "foo", "bar", "baz"); - assert(string(&buf) == s); - free(s); + // current dir invariants + assert(push(&buf, "")! == "."); + assert(push(&buf, ".")! == "."); - buf.end = 0; - s = strings::join(pathsepstr, "foo", "bar"); - push(&buf, pathsepstr, s, "baz")!; - free(s); - s = strings::join(pathsepstr, "", "foo", "bar", "baz"); - assert(string(&buf) == s); - free(s); + // parent dir invariants + assert(push(&buf, "..")! == ".."); + assert(push(&buf, "")! == ".."); + assert(push(&buf, ".")! == ".."); + assert(push(&buf, pathsepstr)! == ".."); - buf.end = 0; - s = strings::join(pathsepstr, ".", "foo", "bar"); - push(&buf, s)!; - free(s); - s = strings::join(pathsepstr, "foo", "bar"); - assert(string(&buf) == s); - free(s); + assert(set(&buf)! == "."); + // root dir invariants + assert(push(&buf, pathsepstr)! == pathsepstr); + assert(push(&buf, "")! == pathsepstr); + assert(push(&buf, ".")! == pathsepstr); + assert(push(&buf, "..")! == pathsepstr); + assert(push(&buf, pathsepstr)! == pathsepstr); + + assert(set(&buf)! == "."); + // regular path and parent + assert(push(&buf, "foo")! == "foo"); + assert(push(&buf, ".")! == "foo"); + assert(push(&buf, pathsepstr)! == "foo"); + assert(push(&buf, "..")! == "."); + + // multiple segments + push(&buf, "a", "b")!; + assert(bytes::equal(buf.buf[..buf.end], ['a', PATHSEP, 'b'])); + push(&buf, "..", "c")!; + assert(bytes::equal(buf.buf[..buf.end], ['a', PATHSEP, 'c'])); + assert(push(&buf, "..")! == "a"); + push(&buf, strings::fromutf8([PATHSEP, 'd'])!)!; + assert(bytes::equal(buf.buf[..buf.end], ['a', PATHSEP, 'd'])); + assert(push(&buf, "..", "..")! == "."); + + // multiple segments, absolute + push(&buf, pathsepstr, "a", "b")!; + assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a', PATHSEP, 'b'])); + push(&buf, "..", "c")!; + assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a', PATHSEP, 'c'])); + push(&buf, "..")!; + assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a'])); + push(&buf, strings::fromutf8([PATHSEP, 'd'])!)!; + assert(bytes::equal(buf.buf[..buf.end], [PATHSEP, 'a', PATHSEP, 'd'])); + assert(push(&buf, "..", "..", "..")! == pathsepstr); }; // Examine the final path segment in a buffer. // Returns void if the path is empty or is the root dir. -export fn peek(buf: *const buffer) (str | void) = { - let trimmed = bytes::rtrim(buf.buf[..buf.end], PATHSEP); - if (len(trimmed) == 0) return void; - match (bytes::rindex(trimmed, PATHSEP)) { - case void => - return strings::fromutf8_unsafe(buf.buf[..buf.end]); - case let i: size => - return strings::fromutf8_unsafe(buf.buf[i+1..buf.end]); - }; -}; +export fn peek(buf: *const buffer) (str | void) = split(buf).1; // Remove and return the final path segment in a buffer. // Returns void if the path is empty or is the root dir. export fn pop(buf: *buffer) (str | void) = { - let trimmed = bytes::rtrim(buf.buf[..buf.end], PATHSEP); - if (len(trimmed) == 0) return void; - match (bytes::rindex(trimmed, PATHSEP)) { + const (end, res) = split(buf); + buf.end = end; + return res; +}; + +// helper function for pop/peek, returns (new end of buffer, result) +fn split(buf: *buffer) (size, (str | void)) = { + if (buf.end == 0 || isroot(buf)) return (buf.end, void); + match (bytes::rindex(buf.buf[..buf.end], PATHSEP)) { case void => - defer buf.end = 0; - return strings::fromutf8_unsafe(buf.buf[..buf.end]); + return (0z, strings::fromutf8_unsafe(buf.buf[..buf.end])); case let i: size => - defer buf.end = i+1; - return strings::fromutf8_unsafe(buf.buf[i+1..buf.end]); + return ( + if (i == 0) 1z else i, + strings::fromutf8_unsafe(buf.buf[i+1..buf.end]), + ); }; }; @@ -111,18 +173,6 @@ export fn pop(buf: *buffer) (str | void) = { push(&buf, pathsepstr, "foo")!; assert(pop(&buf) as str == "foo"); assert(string(&buf) == pathsepstr); - - // relative dir - buf.end = 0; - push(&buf, "foo/")!; - assert(pop(&buf) as str == "foo/"); - assert(string(&buf) == "."); - - // abs dir - buf.end = 0; - push(&buf, pathsepstr, "foo/")!; - assert(pop(&buf) as str == "foo/"); - assert(string(&buf) == pathsepstr); }; // Joins a list of path components together, normalizes it, and returns the diff --git a/path/util.ha b/path/util.ha @@ -25,3 +25,5 @@ export fn abs(path: (*buffer | str)) bool = { let path = getbytes(path); return 0 < len(path) && path[0] == PATHSEP; }; + +export fn isroot(buf: *buffer) bool = buf.end == 1 && buf.buf[0] == PATHSEP;