Return tuple directly from strings,bytes::cut,rcut - hare - [hare] The Hare programming language

commit 4df4835125b9f78e0aedec704cca4c6c1ea231e5
parent c22db267b94ff4a5524e8a0b38adb70924e9447d
Author: Drew DeVault <sir@cmpwn.com>
Date:   Mon, 15 May 2023 10:01:12 +0200

Return tuple directly from strings,bytes::cut,rcut

strings::cut et al are convenience functions which aim to address the
common 95% of cases, an approach which is common to much of the standard
library's design. It is not important for this interface to be
exhaustive; other tools are available for those who need to treat the
presence or absence of the delimiter differently. The convenience of
this convenience function is greatly diminished should the 95% of users
who do not need to distinguish these cases be required to add `as (str,
str)` -- a full 25% of the 80-character line width budget -- for every
call.

This reverts commit da442e0bf76cac19a137a3f779b5e0d838b94c8a.
This reverts commit aa9d6b57fed162be8d5d1c59ef3fb0614e504bba.

Diffstat:
M bytes/tokenize.ha  | 54 +++++++++++++++++++++++++++++++++++-------------------
M cmd/haredoc/env.ha  | 11 ++++-------
M mime/parse.ha  | 17 ++++++-----------
M mime/system.ha  | 5 +----
M net/uri/query.ha  | 5 +----
M regex/regex.ha  | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
M strings/tokenize.ha  | 53 +++++++++++++++++++++++++----------------------------

7 files changed, 122 insertions(+), 93 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -184,45 +184,61 @@ export fn remaining_tokens(s: *tokenizer) []u8 = {
 };
 
 // Returns the input slice "cut" along the first instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, returns void.
-// The contents are borrowed from the input slice.
-export fn cut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
-	let ln = if (delim is u8) 1z else len(delim: []u8);
+// returning everything up to the delimiter, and everything after the delimiter,
+// in a tuple. The contents are borrowed from the input slice.
+//
+// The caller must ensure that 'delimiter' is not an empty slice.
+export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
+	let ln = if (delim is u8) {
+		yield 1z;
+	} else {
+		let ln = len(delim: []u8);
+		assert(ln > 0, "bytes::cut called with empty delimiter");
+		yield ln;
+	};
 	match (index(in, delim)) {
 	case let i: size =>
 		return (in[..i], in[i + ln..]);
 	case void =>
-		return void;
+		return (in, []);
 	};
 };
 
 // Returns the input slice "cut" along the last instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, returns void.
-// The contents are borrowed from the input slice.
-export fn rcut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
-	let ln = if (delim is u8) 1z else len(delim: []u8);
+// returning everything up to the delimiter, and everything after the delimiter,
+// in a tuple. The contents are borrowed from the input slice.
+//
+// The caller must ensure that 'delimiter' is not an empty slice.
+export fn rcut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
+	let ln = if (delim is u8) {
+		yield 1z;
+	} else {
+		let ln = len(delim: []u8);
+		assert(ln > 0, "bytes::rcut called with empty delimiter");
+		yield ln;
+	};
 	match (rindex(in, delim)) {
 	case let i: size =>
 		return (in[..i], in[i + ln..]);
 	case void =>
-		return void;
+		return (in, []);
 	};
 };
 
 @test fn cut() void = {
-	const c = cut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
+	const c = cut(['a', 'b', 'c'], ['b']);
 	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
-	const c = cut(['a', 'b', 'c'], 'b') as ([]u8, []u8);
+	const c = cut(['a', 'b', 'c'], 'b');
 	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
-	const c = cut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
+	const c = cut(['a', 'b', 'c', 'b', 'a'], 'b');
 	assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a']));
-	assert(cut(['a', 'b', 'c'], 'x') is void);
-	assert(cut([], 'x') is void);
+	const c = cut(['a', 'b', 'c'], 'x');
+	assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, []));
+	const c = cut([], 'x');
+	assert(equal(c.0, []) && equal(c.1, []));
 
-	const c = rcut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
+	const c = rcut(['a', 'b', 'c'], ['b']);
 	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
-	const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
+	const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b');
 	assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, ['a']));
 };
diff --git a/cmd/haredoc/env.ha b/cmd/haredoc/env.ha
@@ -42,13 +42,10 @@ fn default_tags() ([]module::tag | error) = {
 	for (true) match (bufio::scanline(pipe.0)?) {
 	case let b: []u8 =>
 		defer free(b);
-		match (strings::cut(strings::fromutf8(b)!, "\t")) {
-		case void => void;
-		case let s: (str, str) =>
-			if (s.0 == "Build tags") {
-				tags = module::parsetags(s.1) as []module::tag;
-				break;
-			};
+		const (k, v) = strings::cut(strings::fromutf8(b)!, "\t");
+		if (k == "Build tags") {
+			tags = module::parsetags(v) as []module::tag;
+			break;
 		};
 	case io::EOF =>
 		// process exited with failure; handled below
diff --git a/mime/parse.ha b/mime/parse.ha
@@ -17,13 +17,11 @@ export type type_params = strings::tokenizer;
 // cause [[errors::invalid]] to be returned unless [[next_param]] is used to
 // enumerate all of the parameters.
 export fn parse(in: str) ((str, type_params) | errors::invalid) = {
-	const (mtype, params) = match (strings::cut(in, ";")) {
-	case void => yield (in, "");
-	case let items: (str, str) => yield items;
-	};
-	const items = match (strings::cut(mtype, "/")) {
-	case void => return errors::invalid;
-	case let items: (str, str) => yield items;
+	const items = strings::cut(in, ";");
+	const mtype = items.0, params = items.1;
+	const items = strings::cut(mtype, "/");
+	if (len(items.0) < 1 || len(items.1) < 1) {
+		return errors::invalid;
 	};
 	typevalid(items.0)?;
 	typevalid(items.1)?;
@@ -45,10 +43,7 @@ export fn next_param(in: *type_params) ((str, str) | void | errors::invalid) = {
 		return;
 	};
 
-	const items = match (strings::cut(tok, "=")) {
-	case void => return errors::invalid;
-	case let items: (str, str) => yield items;
-	};
+	const items = strings::cut(tok, "=");
 	// The RFC does not permit whitespace here, but whitespace is very
 	// common in the wild. ¯\_(ツ)_/¯
 	items.0 = strings::trim(items.0);
diff --git a/mime/system.ha b/mime/system.ha
@@ -49,10 +49,7 @@ fn load_systemdb() (void | fs::error | io::error) = {
 			continue;
 		};
 
-		const items = match (strings::cut(line, "\t")) {
-		case void => continue;
-		case let items: (str, str) => yield items;
-		};
+		const items = strings::cut(line, "\t");
 		const mime = strings::trim(items.0),
 			exts = strings::trim(items.1);
 		if (len(exts) == 0) {
diff --git a/net/uri/query.ha b/net/uri/query.ha
@@ -30,10 +30,7 @@ export fn query_next(dec: *query_decoder) ((str, str) | invalid | void) = {
 	case => return;
 	};
 
-	const raw: (str, str) = match (strings::cut(tok, "=")) {
-	case let s: (str, str) => yield s;
-	case void => yield (tok, "");
-	};
+	const raw = strings::cut(tok, "=");
 	strio::reset(&dec.bufs.0);
 	percent_decode_static(&dec.bufs.0, raw.0)?;
 	strio::reset(&dec.bufs.1);
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -401,35 +401,65 @@ export fn compile(expr: str) (regex | error) = {
 	};
 };
 
-// returns min, max, and length of string matched
 fn parse_repetition(
 	s: str
-) ((size, size, size) | error) = {
-	const brace_cut = match (strings::cut(s, "}")) {
-	case void => return `Repetition expression syntax error '{n}'`: error;
-	case let s: (str, str) => yield s;
+) (((void | size), (void | size), size) | error) = {
+	const first_comma = strings::index(s, ",");
+	const first_endbrace = strings::index(s, "}");
+	if (first_endbrace is void) {
+		return `Repetition expression syntax error '{n}'`: error;
+	};
+	const first_endbrace = first_endbrace as size;
+
+	let min_str = "";
+	let max_str = "";
+	let is_single_arg = false;
+	if (first_comma is void || first_endbrace < first_comma as size) {
+		const cut = strings::cut(s, "}");
+		min_str = cut.0;
+		max_str = cut.0;
+		is_single_arg = true;
+	} else {
+		const cut = strings::cut(s, ",");
+		min_str = cut.0;
+		max_str = strings::cut(cut.1, "}").0;
 	};
 
-	const (min_str, max_str) = match (strings::cut(brace_cut.0, ",")) {
-	case void =>
-		let n = parse_repnum(brace_cut.0)?;
-		return (n, n, len(brace_cut.0));
-	case let s: (str, str) => yield s;
+	let min: (void | size) = void;
+	let max: (void | size) = void;
+
+	if (len(min_str) > 0) {
+		min = match (strconv::stoi(min_str)) {
+		case let res: int =>
+			yield if (res < 0) {
+				return `Negative repitition count '{-n}'`: error;
+			} else {
+				yield res: size;
+			};
+		case => return `Repetition expression syntax error '{n}'`: error;
+		};
+	} else {
+		min = 0;
 	};
 
-	const min = if (len(min_str) == 0) 0: size else parse_repnum(min_str)?;
-	const max = if (len(max_str) == 0) -1: size else parse_repnum(max_str)?;
-	return (min, max, len(brace_cut.0));
-};
+	if (len(max_str) > 0) {
+		max = match (strconv::stoi(max_str)) {
+		case let res: int =>
+			yield if (res < 0) {
+				return `Negative repitition count '{-n}'`: error;
+			} else {
+				yield res: size;
+			};
+		case => return `Repetition expression syntax error '{n}'`: error;
+		};
+	};
 
-fn parse_repnum(s: str) (size | error) = match (strconv::stoi(s)) {
-case let res: int =>
-	if (res < 0) {
-		return `Negative repetition count '{-n}'`: error;
+	const rep_len = if (is_single_arg) {
+		yield len(min_str);
 	} else {
-		return res: size;
+		yield len(min_str) + 1 + len(max_str);
 	};
-case => return `Repetition expression syntax error '{n}'`: error;
+	return (min, max, rep_len);
 };
 
 fn delete_thread(i: size, threads: *[]thread) void = {
diff --git a/strings/tokenize.ha b/strings/tokenize.ha
@@ -179,47 +179,44 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX);
 	};
 };
 
-// Returns a string "cut" along the first instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, returns void.
+// Returns a string "cut" along the first instance of a delimiter, returning
+// everything up to the delimiter, and everything after the delimiter, in a
+// tuple.
 //
 // 	strings::cut("hello=world=foobar", "=")	// ("hello", "world=foobar")
-// 	strings::cut("hello world", "=")	// void
+// 	strings::cut("hello world", "=")	// ("hello world", "")
 //
-// The return value is borrowed from the 'in' parameter.
-export fn cut(in: str, delim: str) ((str, str) | void) = {
-	match (bytes::cut(toutf8(in), toutf8(delim))) {
-	case void => return void;
-	case let bs: ([]u8, []u8) =>
-		return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
-	};
+// The return value is borrowed from the 'in' parameter.  The caller must ensure
+// that 'delimiter' is not an empty string.
+export fn cut(in: str, delim: str) (str, str) = {
+	let c = bytes::cut(toutf8(in), toutf8(delim));
+	return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
 };
 
-// Returns a string "cut" along the last instance of a delimiter,
-// returning everything up to the delimiter, and everything after the
-// delimiter, in a tuple. If the delimiter is not found, the first result
-// will be void.
+// Returns a string "cut" along the last instance of a delimiter, returning
+// everything up to the delimiter, and everything after the delimiter, in a
+// tuple.
 //
 // 	strings::rcut("hello=world=foobar", "=")	// ("hello=world", "foobar")
-// 	strings::rcut("hello world", "=")	// void
+// 	strings::rcut("hello world", "=")	// ("hello world", "")
 //
-// The return value is borrowed from the 'in' parameter.
-export fn rcut(in: str, delim: str) ((str, str) | void) = {
-	match (bytes::rcut(toutf8(in), toutf8(delim))) {
-	case void => return void;
-	case let bs: ([]u8, []u8) =>
-		return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
-	};
+// The return value is borrowed from the 'in' parameter.  The caller must ensure
+// that 'delimiter' is not an empty string.
+export fn rcut(in: str, delim: str) (str, str) = {
+	let c = bytes::rcut(toutf8(in), toutf8(delim));
+	return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
 };
 
 @test fn cut() void = {
-	const sample = cut("hello=world", "=") as (str, str);
+	const sample = cut("hello=world", "=");
 	assert(sample.0 == "hello" && sample.1 == "world");
-	const sample = cut("hello=world=foobar", "=") as (str, str);
+	const sample = cut("hello=world=foobar", "=");
 	assert(sample.0 == "hello" && sample.1 == "world=foobar");
-	assert(cut("hello world", "=") is void);
-	assert(cut("", "=") is void);
+	const sample = cut("hello world", "=");
+	assert(sample.0 == "hello world" && sample.1 == "");
+	const sample = cut("", "=");
+	assert(sample.0 == "" && sample.1 == "");
 
-	const sample = rcut("hello=world=foobar", "=") as (str, str);
+	const sample = rcut("hello=world=foobar", "=");
 	assert(sample.0 == "hello=world" && sample.1 == "foobar");
 };

	hare [hare] The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE

M	bytes/tokenize.ha	\|	54	+++++++++++++++++++++++++++++++++++-------------------
M	cmd/haredoc/env.ha	\|	11	++++-------
M	mime/parse.ha	\|	17	++++++-----------
M	mime/system.ha	\|	5	+----
M	net/uri/query.ha	\|	5	+----
M	regex/regex.ha	\|	70	++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
M	strings/tokenize.ha	\|	53	+++++++++++++++++++++++++----------------------------