commit 4df4835125b9f78e0aedec704cca4c6c1ea231e5
parent c22db267b94ff4a5524e8a0b38adb70924e9447d
Author: Drew DeVault <sir@cmpwn.com>
Date: Mon, 15 May 2023 10:01:12 +0200
Return tuple directly from strings,bytes::cut,rcut
strings::cut et al are convenience functions which aim to address the
common 95% of cases, an approach which is common to much of the standard
library's design. It is not important for this interface to be
exhaustive; other tools are available for those who need to treat the
presence or absence of the delimiter differently. The convenience of
this convenience function is greatly diminished should the 95% of users
who do not need to distinguish these cases be required to add `as (str,
str)` -- a full 25% of the 80-character line width budget -- for every
call.
This reverts commit da442e0bf76cac19a137a3f779b5e0d838b94c8a.
This reverts commit aa9d6b57fed162be8d5d1c59ef3fb0614e504bba.
Diffstat:
7 files changed, 122 insertions(+), 93 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -184,45 +184,61 @@ export fn remaining_tokens(s: *tokenizer) []u8 = {
};
// Returns the input slice "cut" along the first instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, returns void.
-// The contents are borrowed from the input slice.
-export fn cut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
- let ln = if (delim is u8) 1z else len(delim: []u8);
+// returning everything up to the delimiter, and everything after the delimiter,
+// in a tuple. The contents are borrowed from the input slice.
+//
+// The caller must ensure that 'delimiter' is not an empty slice.
+export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
+ let ln = if (delim is u8) {
+ yield 1z;
+ } else {
+ let ln = len(delim: []u8);
+ assert(ln > 0, "bytes::cut called with empty delimiter");
+ yield ln;
+ };
match (index(in, delim)) {
case let i: size =>
return (in[..i], in[i + ln..]);
case void =>
- return void;
+ return (in, []);
};
};
// Returns the input slice "cut" along the last instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, returns void.
-// The contents are borrowed from the input slice.
-export fn rcut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
- let ln = if (delim is u8) 1z else len(delim: []u8);
+// returning everything up to the delimiter, and everything after the delimiter,
+// in a tuple. The contents are borrowed from the input slice.
+//
+// The caller must ensure that 'delimiter' is not an empty slice.
+export fn rcut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
+ let ln = if (delim is u8) {
+ yield 1z;
+ } else {
+ let ln = len(delim: []u8);
+ assert(ln > 0, "bytes::rcut called with empty delimiter");
+ yield ln;
+ };
match (rindex(in, delim)) {
case let i: size =>
return (in[..i], in[i + ln..]);
case void =>
- return void;
+ return (in, []);
};
};
@test fn cut() void = {
- const c = cut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
+ const c = cut(['a', 'b', 'c'], ['b']);
assert(equal(c.0, ['a']) && equal(c.1, ['c']));
- const c = cut(['a', 'b', 'c'], 'b') as ([]u8, []u8);
+ const c = cut(['a', 'b', 'c'], 'b');
assert(equal(c.0, ['a']) && equal(c.1, ['c']));
- const c = cut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
+ const c = cut(['a', 'b', 'c', 'b', 'a'], 'b');
assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a']));
- assert(cut(['a', 'b', 'c'], 'x') is void);
- assert(cut([], 'x') is void);
+ const c = cut(['a', 'b', 'c'], 'x');
+ assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, []));
+ const c = cut([], 'x');
+ assert(equal(c.0, []) && equal(c.1, []));
- const c = rcut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
+ const c = rcut(['a', 'b', 'c'], ['b']);
assert(equal(c.0, ['a']) && equal(c.1, ['c']));
- const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
+ const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b');
assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, ['a']));
};
diff --git a/cmd/haredoc/env.ha b/cmd/haredoc/env.ha
@@ -42,13 +42,10 @@ fn default_tags() ([]module::tag | error) = {
for (true) match (bufio::scanline(pipe.0)?) {
case let b: []u8 =>
defer free(b);
- match (strings::cut(strings::fromutf8(b)!, "\t")) {
- case void => void;
- case let s: (str, str) =>
- if (s.0 == "Build tags") {
- tags = module::parsetags(s.1) as []module::tag;
- break;
- };
+ const (k, v) = strings::cut(strings::fromutf8(b)!, "\t");
+ if (k == "Build tags") {
+ tags = module::parsetags(v) as []module::tag;
+ break;
};
case io::EOF =>
// process exited with failure; handled below
diff --git a/mime/parse.ha b/mime/parse.ha
@@ -17,13 +17,11 @@ export type type_params = strings::tokenizer;
// cause [[errors::invalid]] to be returned unless [[next_param]] is used to
// enumerate all of the parameters.
export fn parse(in: str) ((str, type_params) | errors::invalid) = {
- const (mtype, params) = match (strings::cut(in, ";")) {
- case void => yield (in, "");
- case let items: (str, str) => yield items;
- };
- const items = match (strings::cut(mtype, "/")) {
- case void => return errors::invalid;
- case let items: (str, str) => yield items;
+ const items = strings::cut(in, ";");
+ const mtype = items.0, params = items.1;
+ const items = strings::cut(mtype, "/");
+ if (len(items.0) < 1 || len(items.1) < 1) {
+ return errors::invalid;
};
typevalid(items.0)?;
typevalid(items.1)?;
@@ -45,10 +43,7 @@ export fn next_param(in: *type_params) ((str, str) | void | errors::invalid) = {
return;
};
- const items = match (strings::cut(tok, "=")) {
- case void => return errors::invalid;
- case let items: (str, str) => yield items;
- };
+ const items = strings::cut(tok, "=");
// The RFC does not permit whitespace here, but whitespace is very
// common in the wild. ¯\_(ツ)_/¯
items.0 = strings::trim(items.0);
diff --git a/mime/system.ha b/mime/system.ha
@@ -49,10 +49,7 @@ fn load_systemdb() (void | fs::error | io::error) = {
continue;
};
- const items = match (strings::cut(line, "\t")) {
- case void => continue;
- case let items: (str, str) => yield items;
- };
+ const items = strings::cut(line, "\t");
const mime = strings::trim(items.0),
exts = strings::trim(items.1);
if (len(exts) == 0) {
diff --git a/net/uri/query.ha b/net/uri/query.ha
@@ -30,10 +30,7 @@ export fn query_next(dec: *query_decoder) ((str, str) | invalid | void) = {
case => return;
};
- const raw: (str, str) = match (strings::cut(tok, "=")) {
- case let s: (str, str) => yield s;
- case void => yield (tok, "");
- };
+ const raw = strings::cut(tok, "=");
strio::reset(&dec.bufs.0);
percent_decode_static(&dec.bufs.0, raw.0)?;
strio::reset(&dec.bufs.1);
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -401,35 +401,65 @@ export fn compile(expr: str) (regex | error) = {
};
};
-// returns min, max, and length of string matched
fn parse_repetition(
s: str
-) ((size, size, size) | error) = {
- const brace_cut = match (strings::cut(s, "}")) {
- case void => return `Repetition expression syntax error '{n}'`: error;
- case let s: (str, str) => yield s;
+) (((void | size), (void | size), size) | error) = {
+ const first_comma = strings::index(s, ",");
+ const first_endbrace = strings::index(s, "}");
+ if (first_endbrace is void) {
+ return `Repetition expression syntax error '{n}'`: error;
+ };
+ const first_endbrace = first_endbrace as size;
+
+ let min_str = "";
+ let max_str = "";
+ let is_single_arg = false;
+ if (first_comma is void || first_endbrace < first_comma as size) {
+ const cut = strings::cut(s, "}");
+ min_str = cut.0;
+ max_str = cut.0;
+ is_single_arg = true;
+ } else {
+ const cut = strings::cut(s, ",");
+ min_str = cut.0;
+ max_str = strings::cut(cut.1, "}").0;
};
- const (min_str, max_str) = match (strings::cut(brace_cut.0, ",")) {
- case void =>
- let n = parse_repnum(brace_cut.0)?;
- return (n, n, len(brace_cut.0));
- case let s: (str, str) => yield s;
+ let min: (void | size) = void;
+ let max: (void | size) = void;
+
+ if (len(min_str) > 0) {
+ min = match (strconv::stoi(min_str)) {
+ case let res: int =>
+ yield if (res < 0) {
+ return `Negative repitition count '{-n}'`: error;
+ } else {
+ yield res: size;
+ };
+ case => return `Repetition expression syntax error '{n}'`: error;
+ };
+ } else {
+ min = 0;
};
- const min = if (len(min_str) == 0) 0: size else parse_repnum(min_str)?;
- const max = if (len(max_str) == 0) -1: size else parse_repnum(max_str)?;
- return (min, max, len(brace_cut.0));
-};
+ if (len(max_str) > 0) {
+ max = match (strconv::stoi(max_str)) {
+ case let res: int =>
+ yield if (res < 0) {
+ return `Negative repitition count '{-n}'`: error;
+ } else {
+ yield res: size;
+ };
+ case => return `Repetition expression syntax error '{n}'`: error;
+ };
+ };
-fn parse_repnum(s: str) (size | error) = match (strconv::stoi(s)) {
-case let res: int =>
- if (res < 0) {
- return `Negative repetition count '{-n}'`: error;
+ const rep_len = if (is_single_arg) {
+ yield len(min_str);
} else {
- return res: size;
+ yield len(min_str) + 1 + len(max_str);
};
-case => return `Repetition expression syntax error '{n}'`: error;
+ return (min, max, rep_len);
};
fn delete_thread(i: size, threads: *[]thread) void = {
diff --git a/strings/tokenize.ha b/strings/tokenize.ha
@@ -179,47 +179,44 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX);
};
};
-// Returns a string "cut" along the first instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, returns void.
+// Returns a string "cut" along the first instance of a delimiter, returning
+// everything up to the delimiter, and everything after the delimiter, in a
+// tuple.
//
// strings::cut("hello=world=foobar", "=") // ("hello", "world=foobar")
-// strings::cut("hello world", "=") // void
+// strings::cut("hello world", "=") // ("hello world", "")
//
-// The return value is borrowed from the 'in' parameter.
-export fn cut(in: str, delim: str) ((str, str) | void) = {
- match (bytes::cut(toutf8(in), toutf8(delim))) {
- case void => return void;
- case let bs: ([]u8, []u8) =>
- return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
- };
+// The return value is borrowed from the 'in' parameter. The caller must ensure
+// that 'delimiter' is not an empty string.
+export fn cut(in: str, delim: str) (str, str) = {
+ let c = bytes::cut(toutf8(in), toutf8(delim));
+ return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
};
-// Returns a string "cut" along the last instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, the first result
-// will be void.
+// Returns a string "cut" along the last instance of a delimiter, returning
+// everything up to the delimiter, and everything after the delimiter, in a
+// tuple.
//
// strings::rcut("hello=world=foobar", "=") // ("hello=world", "foobar")
-// strings::rcut("hello world", "=") // void
+// strings::rcut("hello world", "=") // ("hello world", "")
//
-// The return value is borrowed from the 'in' parameter.
-export fn rcut(in: str, delim: str) ((str, str) | void) = {
- match (bytes::rcut(toutf8(in), toutf8(delim))) {
- case void => return void;
- case let bs: ([]u8, []u8) =>
- return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
- };
+// The return value is borrowed from the 'in' parameter. The caller must ensure
+// that 'delimiter' is not an empty string.
+export fn rcut(in: str, delim: str) (str, str) = {
+ let c = bytes::rcut(toutf8(in), toutf8(delim));
+ return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
};
@test fn cut() void = {
- const sample = cut("hello=world", "=") as (str, str);
+ const sample = cut("hello=world", "=");
assert(sample.0 == "hello" && sample.1 == "world");
- const sample = cut("hello=world=foobar", "=") as (str, str);
+ const sample = cut("hello=world=foobar", "=");
assert(sample.0 == "hello" && sample.1 == "world=foobar");
- assert(cut("hello world", "=") is void);
- assert(cut("", "=") is void);
+ const sample = cut("hello world", "=");
+ assert(sample.0 == "hello world" && sample.1 == "");
+ const sample = cut("", "=");
+ assert(sample.0 == "" && sample.1 == "");
- const sample = rcut("hello=world=foobar", "=") as (str, str);
+ const sample = rcut("hello=world=foobar", "=");
assert(sample.0 == "hello=world" && sample.1 == "foobar");
};