commit 5399f1779bee7067612f09f560cbccf4c387b0c4
parent 01ed35b2e7b8e6f45f2d4fe88562db5a66959a14
Author: Bor Grošelj Simić <bgs@turminal.net>
Date: Fri, 8 Apr 2022 21:36:14 +0200
add bytes::cut and use it in strings::cut
and document that calling strings::cut with empty delimiter is invalid
Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>
Diffstat:
2 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -186,3 +186,37 @@ export fn remaining_tokens(s: *tokenizer) []u8 = {
assert(peek_token(&t) is void);
assert(next_token(&t) is void);
};
+
+// Returns a the slice "cut" along the first instance of a delimiter, returning
+// everything up to the delimiter, and everything after the delimiter, in a
+// tuple.
+//
+// Caller must ensure delimiter is not an empty slice.
+export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
+ let ln = if (delim is u8) {
+ yield 1z;
+ } else {
+ let ln = len(delim: []u8);
+ assert(ln > 0, "bytes::cut called with empty delimiter");
+ yield ln;
+ };
+ match (index(in, delim)) {
+ case let i: size =>
+ return (in[..i], in[i + ln..]);
+ case void =>
+ return (in, []);
+ };
+};
+
+@test fn cut() void = {
+ const c = cut(['a', 'b', 'c'], ['b']);
+ assert(equal(c.0, ['a']) && equal(c.1, ['c']));
+ const c = cut(['a', 'b', 'c'], 'b');
+ assert(equal(c.0, ['a']) && equal(c.1, ['c']));
+ const c = cut(['a', 'b', 'c', 'b', 'a'], 'b');
+ assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a']));
+ const c = cut(['a', 'b', 'c'], 'x');
+ assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, []));
+ const c = cut([], 'x');
+ assert(equal(c.0, []) && equal(c.1, []));
+};
diff --git a/strings/tokenize.ha b/strings/tokenize.ha
@@ -143,17 +143,10 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX);
// strings::cut("hello world", "=") // ("hello world", "")
//
// The return value is borrowed from the 'in' parameter.
+// Caller must ensure delimiter is not an empty string
export fn cut(in: str, delim: str) (str, str) = {
- const tok = tokenize(in, delim);
- let res = ("", "");
- match (next_token(&tok)) {
- case let s: str =>
- res.0 = s;
- case void =>
- return res;
- };
- res.1 = remaining_tokens(&tok);
- return res;
+ let c = bytes::cut(toutf8(in), toutf8(delim));
+ return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
};
@test fn cut() void = {