commit da442e0bf76cac19a137a3f779b5e0d838b94c8a
parent ee6bbf85490c236f90e4eb789a8e9eb2eb77719d
Author: Autumn! <autumnull@posteo.net>
Date: Fri, 21 Apr 2023 12:39:08 +0000
bytes,strings: make {cut,rcut} return a tagged union value
additionally removes the requirement that the delimiter can't be empty.
Signed-off-by: Autumn! <autumnull@posteo.net>
Diffstat:
5 files changed, 78 insertions(+), 115 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -184,61 +184,45 @@ export fn remaining_tokens(s: *tokenizer) []u8 = {
};
// Returns the input slice "cut" along the first instance of a delimiter,
-// returning everything up to the delimiter, and everything after the delimiter,
-// in a tuple. The contents are borrowed from the input slice.
-//
-// The caller must ensure that 'delimiter' is not an empty slice.
-export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
- let ln = if (delim is u8) {
- yield 1z;
- } else {
- let ln = len(delim: []u8);
- assert(ln > 0, "bytes::cut called with empty delimiter");
- yield ln;
- };
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, returns void.
+// The contents are borrowed from the input slice.
+export fn cut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
+ let ln = if (delim is u8) 1z else len(delim: []u8);
match (index(in, delim)) {
case let i: size =>
return (in[..i], in[i + ln..]);
case void =>
- return (in, []);
+ return void;
};
};
// Returns the input slice "cut" along the last instance of a delimiter,
-// returning everything up to the delimiter, and everything after the delimiter,
-// in a tuple. The contents are borrowed from the input slice.
-//
-// The caller must ensure that 'delimiter' is not an empty slice.
-export fn rcut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
- let ln = if (delim is u8) {
- yield 1z;
- } else {
- let ln = len(delim: []u8);
- assert(ln > 0, "bytes::rcut called with empty delimiter");
- yield ln;
- };
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, returns void.
+// The contents are borrowed from the input slice.
+export fn rcut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
+ let ln = if (delim is u8) 1z else len(delim: []u8);
match (rindex(in, delim)) {
case let i: size =>
return (in[..i], in[i + ln..]);
case void =>
- return (in, []);
+ return void;
};
};
@test fn cut() void = {
- const c = cut(['a', 'b', 'c'], ['b']);
+ const c = cut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
assert(equal(c.0, ['a']) && equal(c.1, ['c']));
- const c = cut(['a', 'b', 'c'], 'b');
+ const c = cut(['a', 'b', 'c'], 'b') as ([]u8, []u8);
assert(equal(c.0, ['a']) && equal(c.1, ['c']));
- const c = cut(['a', 'b', 'c', 'b', 'a'], 'b');
+ const c = cut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a']));
- const c = cut(['a', 'b', 'c'], 'x');
- assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, []));
- const c = cut([], 'x');
- assert(equal(c.0, []) && equal(c.1, []));
+ assert(cut(['a', 'b', 'c'], 'x') is void);
+ assert(cut([], 'x') is void);
- const c = rcut(['a', 'b', 'c'], ['b']);
+ const c = rcut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
assert(equal(c.0, ['a']) && equal(c.1, ['c']));
- const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b');
+ const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, ['a']));
};
diff --git a/cmd/haredoc/env.ha b/cmd/haredoc/env.ha
@@ -42,10 +42,13 @@ fn default_tags() ([]module::tag | error) = {
for (true) match (bufio::scanline(pipe.0)?) {
case let b: []u8 =>
defer free(b);
- const (k, v) = strings::cut(strings::fromutf8(b)!, "\t");
- if (k == "Build tags") {
- tags = module::parsetags(v) as []module::tag;
- break;
+ match (strings::cut(strings::fromutf8(b)!, "\t")) {
+ case void => void;
+ case let s: (str, str) =>
+ if (s.0 == "Build tags") {
+ tags = module::parsetags(s.1) as []module::tag;
+ break;
+ };
};
case io::EOF =>
// process exited with failure; handled below
diff --git a/net/uri/query.ha b/net/uri/query.ha
@@ -30,7 +30,10 @@ export fn query_next(dec: *query_decoder) ((str, str) | invalid | void) = {
case => return;
};
- const raw = strings::cut(tok, "=");
+ const raw: (str, str) = match (strings::cut(tok, "=")) {
+ case let s: (str, str) => yield s;
+ case void => yield (tok, "");
+ };
strio::reset(&dec.bufs.0);
percent_decode_static(&dec.bufs.0, raw.0)?;
strio::reset(&dec.bufs.1);
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -401,65 +401,35 @@ export fn compile(expr: str) (regex | error) = {
};
};
+// returns min, max, and length of string matched
fn parse_repetition(
s: str
-) (((void | size), (void | size), size) | error) = {
- const first_comma = strings::index(s, ",");
- const first_endbrace = strings::index(s, "}");
- if (first_endbrace is void) {
- return `Repetition expression syntax error '{n}'`: error;
- };
- const first_endbrace = first_endbrace as size;
-
- let min_str = "";
- let max_str = "";
- let is_single_arg = false;
- if (first_comma is void || first_endbrace < first_comma as size) {
- const cut = strings::cut(s, "}");
- min_str = cut.0;
- max_str = cut.0;
- is_single_arg = true;
- } else {
- const cut = strings::cut(s, ",");
- min_str = cut.0;
- max_str = strings::cut(cut.1, "}").0;
+) ((size, size, size) | error) = {
+ const brace_cut = match (strings::cut(s, "}")) {
+ case void => return `Repetition expression syntax error '{n}'`: error;
+ case let s: (str, str) => yield s;
};
- let min: (void | size) = void;
- let max: (void | size) = void;
-
- if (len(min_str) > 0) {
- min = match (strconv::stoi(min_str)) {
- case let res: int =>
- yield if (res < 0) {
- return `Negative repitition count '{-n}'`: error;
- } else {
- yield res: size;
- };
- case => return `Repetition expression syntax error '{n}'`: error;
- };
- } else {
- min = 0;
+ const (min_str, max_str) = match (strings::cut(brace_cut.0, ",")) {
+ case void =>
+ let n = parse_repnum(brace_cut.0)?;
+ return (n, n, len(brace_cut.0));
+ case let s: (str, str) => yield s;
};
- if (len(max_str) > 0) {
- max = match (strconv::stoi(max_str)) {
- case let res: int =>
- yield if (res < 0) {
- return `Negative repitition count '{-n}'`: error;
- } else {
- yield res: size;
- };
- case => return `Repetition expression syntax error '{n}'`: error;
- };
- };
+ const min = if (len(min_str) == 0) 0: size else parse_repnum(min_str)?;
+ const max = if (len(max_str) == 0) -1: size else parse_repnum(max_str)?;
+ return (min, max, len(brace_cut.0));
+};
- const rep_len = if (is_single_arg) {
- yield len(min_str);
+fn parse_repnum(s: str) (size | error) = match (strconv::stoi(s)) {
+case let res: int =>
+ if (res < 0) {
+ return `Negative repetition count '{-n}'`: error;
} else {
- yield len(min_str) + 1 + len(max_str);
+ return res: size;
};
- return (min, max, rep_len);
+case => return `Repetition expression syntax error '{n}'`: error;
};
fn delete_thread(i: size, threads: *[]thread) void = {
diff --git a/strings/tokenize.ha b/strings/tokenize.ha
@@ -179,44 +179,47 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX);
};
};
-// Returns a string "cut" along the first instance of a delimiter, returning
-// everything up to the delimiter, and everything after the delimiter, in a
-// tuple.
+// Returns a string "cut" along the first instance of a delimiter,
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, returns void.
//
// strings::cut("hello=world=foobar", "=") // ("hello", "world=foobar")
-// strings::cut("hello world", "=") // ("hello world", "")
+// strings::cut("hello world", "=") // void
//
-// The return value is borrowed from the 'in' parameter. The caller must ensure
-// that 'delimiter' is not an empty string.
-export fn cut(in: str, delim: str) (str, str) = {
- let c = bytes::cut(toutf8(in), toutf8(delim));
- return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
+// The return value is borrowed from the 'in' parameter.
+export fn cut(in: str, delim: str) ((str, str) | void) = {
+ match (bytes::cut(toutf8(in), toutf8(delim))) {
+ case void => return void;
+ case let bs: ([]u8, []u8) =>
+ return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
+ };
};
-// Returns a string "cut" along the last instance of a delimiter, returning
-// everything up to the delimiter, and everything after the delimiter, in a
-// tuple.
+// Returns a string "cut" along the last instance of a delimiter,
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, the first result
+// will be void.
//
// strings::rcut("hello=world=foobar", "=") // ("hello=world", "foobar")
-// strings::rcut("hello world", "=") // ("hello world", "")
+// strings::rcut("hello world", "=") // void
//
-// The return value is borrowed from the 'in' parameter. The caller must ensure
-// that 'delimiter' is not an empty string.
-export fn rcut(in: str, delim: str) (str, str) = {
- let c = bytes::rcut(toutf8(in), toutf8(delim));
- return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
+// The return value is borrowed from the 'in' parameter.
+export fn rcut(in: str, delim: str) ((str, str) | void) = {
+ match (bytes::rcut(toutf8(in), toutf8(delim))) {
+ case void => return void;
+ case let bs: ([]u8, []u8) =>
+ return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
+ };
};
@test fn cut() void = {
- const sample = cut("hello=world", "=");
+ const sample = cut("hello=world", "=") as (str, str);
assert(sample.0 == "hello" && sample.1 == "world");
- const sample = cut("hello=world=foobar", "=");
+ const sample = cut("hello=world=foobar", "=") as (str, str);
assert(sample.0 == "hello" && sample.1 == "world=foobar");
- const sample = cut("hello world", "=");
- assert(sample.0 == "hello world" && sample.1 == "");
- const sample = cut("", "=");
- assert(sample.0 == "" && sample.1 == "");
+ assert(cut("hello world", "=") is void);
+ assert(cut("", "=") is void);
- const sample = rcut("hello=world=foobar", "=");
+ const sample = rcut("hello=world=foobar", "=") as (str, str);
assert(sample.0 == "hello=world" && sample.1 == "foobar");
};