bytes,strings: make {cut,rcut} return a tagged union value - hare

commit da442e0bf76cac19a137a3f779b5e0d838b94c8a
parent ee6bbf85490c236f90e4eb789a8e9eb2eb77719d
Author: Autumn! <autumnull@posteo.net>
Date:   Fri, 21 Apr 2023 12:39:08 +0000

bytes,strings: make {cut,rcut} return a tagged union value

additionally removes the requirement that the delimiter can't be empty.

Signed-off-by: Autumn! <autumnull@posteo.net>

Diffstat:
M bytes/tokenize.ha  | 54 +++++++++++++++++++-----------------------------------
M cmd/haredoc/env.ha  | 11 +++++++----
M net/uri/query.ha  | 5 ++++-
M regex/regex.ha  | 70 ++++++++++++++++++++--------------------------------------------------
M strings/tokenize.ha  | 53 ++++++++++++++++++++++++++++-------------------------

5 files changed, 78 insertions(+), 115 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -184,61 +184,45 @@ export fn remaining_tokens(s: *tokenizer) []u8 = {
 };
 
 // Returns the input slice "cut" along the first instance of a delimiter,
-// returning everything up to the delimiter, and everything after the delimiter,
-// in a tuple. The contents are borrowed from the input slice.
-//
-// The caller must ensure that 'delimiter' is not an empty slice.
-export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
-	let ln = if (delim is u8) {
-		yield 1z;
-	} else {
-		let ln = len(delim: []u8);
-		assert(ln > 0, "bytes::cut called with empty delimiter");
-		yield ln;
-	};
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, returns void.
+// The contents are borrowed from the input slice.
+export fn cut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
+	let ln = if (delim is u8) 1z else len(delim: []u8);
 	match (index(in, delim)) {
 	case let i: size =>
 		return (in[..i], in[i + ln..]);
 	case void =>
-		return (in, []);
+		return void;
 	};
 };
 
 // Returns the input slice "cut" along the last instance of a delimiter,
-// returning everything up to the delimiter, and everything after the delimiter,
-// in a tuple. The contents are borrowed from the input slice.
-//
-// The caller must ensure that 'delimiter' is not an empty slice.
-export fn rcut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = {
-	let ln = if (delim is u8) {
-		yield 1z;
-	} else {
-		let ln = len(delim: []u8);
-		assert(ln > 0, "bytes::rcut called with empty delimiter");
-		yield ln;
-	};
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, returns void.
+// The contents are borrowed from the input slice.
+export fn rcut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = {
+	let ln = if (delim is u8) 1z else len(delim: []u8);
 	match (rindex(in, delim)) {
 	case let i: size =>
 		return (in[..i], in[i + ln..]);
 	case void =>
-		return (in, []);
+		return void;
 	};
 };
 
 @test fn cut() void = {
-	const c = cut(['a', 'b', 'c'], ['b']);
+	const c = cut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
 	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
-	const c = cut(['a', 'b', 'c'], 'b');
+	const c = cut(['a', 'b', 'c'], 'b') as ([]u8, []u8);
 	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
-	const c = cut(['a', 'b', 'c', 'b', 'a'], 'b');
+	const c = cut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
 	assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a']));
-	const c = cut(['a', 'b', 'c'], 'x');
-	assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, []));
-	const c = cut([], 'x');
-	assert(equal(c.0, []) && equal(c.1, []));
+	assert(cut(['a', 'b', 'c'], 'x') is void);
+	assert(cut([], 'x') is void);
 
-	const c = rcut(['a', 'b', 'c'], ['b']);
+	const c = rcut(['a', 'b', 'c'], ['b']) as ([]u8, []u8);
 	assert(equal(c.0, ['a']) && equal(c.1, ['c']));
-	const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b');
+	const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8);
 	assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, ['a']));
 };
diff --git a/cmd/haredoc/env.ha b/cmd/haredoc/env.ha
@@ -42,10 +42,13 @@ fn default_tags() ([]module::tag | error) = {
 	for (true) match (bufio::scanline(pipe.0)?) {
 	case let b: []u8 =>
 		defer free(b);
-		const (k, v) = strings::cut(strings::fromutf8(b)!, "\t");
-		if (k == "Build tags") {
-			tags = module::parsetags(v) as []module::tag;
-			break;
+		match (strings::cut(strings::fromutf8(b)!, "\t")) {
+		case void => void;
+		case let s: (str, str) =>
+			if (s.0 == "Build tags") {
+				tags = module::parsetags(s.1) as []module::tag;
+				break;
+			};
 		};
 	case io::EOF =>
 		// process exited with failure; handled below
diff --git a/net/uri/query.ha b/net/uri/query.ha
@@ -30,7 +30,10 @@ export fn query_next(dec: *query_decoder) ((str, str) | invalid | void) = {
 	case => return;
 	};
 
-	const raw = strings::cut(tok, "=");
+	const raw: (str, str) = match (strings::cut(tok, "=")) {
+	case let s: (str, str) => yield s;
+	case void => yield (tok, "");
+	};
 	strio::reset(&dec.bufs.0);
 	percent_decode_static(&dec.bufs.0, raw.0)?;
 	strio::reset(&dec.bufs.1);
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -401,65 +401,35 @@ export fn compile(expr: str) (regex | error) = {
 	};
 };
 
+// returns min, max, and length of string matched
 fn parse_repetition(
 	s: str
-) (((void | size), (void | size), size) | error) = {
-	const first_comma = strings::index(s, ",");
-	const first_endbrace = strings::index(s, "}");
-	if (first_endbrace is void) {
-		return `Repetition expression syntax error '{n}'`: error;
-	};
-	const first_endbrace = first_endbrace as size;
-
-	let min_str = "";
-	let max_str = "";
-	let is_single_arg = false;
-	if (first_comma is void || first_endbrace < first_comma as size) {
-		const cut = strings::cut(s, "}");
-		min_str = cut.0;
-		max_str = cut.0;
-		is_single_arg = true;
-	} else {
-		const cut = strings::cut(s, ",");
-		min_str = cut.0;
-		max_str = strings::cut(cut.1, "}").0;
+) ((size, size, size) | error) = {
+	const brace_cut = match (strings::cut(s, "}")) {
+	case void => return `Repetition expression syntax error '{n}'`: error;
+	case let s: (str, str) => yield s;
 	};
 
-	let min: (void | size) = void;
-	let max: (void | size) = void;
-
-	if (len(min_str) > 0) {
-		min = match (strconv::stoi(min_str)) {
-		case let res: int =>
-			yield if (res < 0) {
-				return `Negative repitition count '{-n}'`: error;
-			} else {
-				yield res: size;
-			};
-		case => return `Repetition expression syntax error '{n}'`: error;
-		};
-	} else {
-		min = 0;
+	const (min_str, max_str) = match (strings::cut(brace_cut.0, ",")) {
+	case void =>
+		let n = parse_repnum(brace_cut.0)?;
+		return (n, n, len(brace_cut.0));
+	case let s: (str, str) => yield s;
 	};
 
-	if (len(max_str) > 0) {
-		max = match (strconv::stoi(max_str)) {
-		case let res: int =>
-			yield if (res < 0) {
-				return `Negative repitition count '{-n}'`: error;
-			} else {
-				yield res: size;
-			};
-		case => return `Repetition expression syntax error '{n}'`: error;
-		};
-	};
+	const min = if (len(min_str) == 0) 0: size else parse_repnum(min_str)?;
+	const max = if (len(max_str) == 0) -1: size else parse_repnum(max_str)?;
+	return (min, max, len(brace_cut.0));
+};
 
-	const rep_len = if (is_single_arg) {
-		yield len(min_str);
+fn parse_repnum(s: str) (size | error) = match (strconv::stoi(s)) {
+case let res: int =>
+	if (res < 0) {
+		return `Negative repetition count '{-n}'`: error;
 	} else {
-		yield len(min_str) + 1 + len(max_str);
+		return res: size;
 	};
-	return (min, max, rep_len);
+case => return `Repetition expression syntax error '{n}'`: error;
 };
 
 fn delete_thread(i: size, threads: *[]thread) void = {
diff --git a/strings/tokenize.ha b/strings/tokenize.ha
@@ -179,44 +179,47 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX);
 	};
 };
 
-// Returns a string "cut" along the first instance of a delimiter, returning
-// everything up to the delimiter, and everything after the delimiter, in a
-// tuple.
+// Returns a string "cut" along the first instance of a delimiter,
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, returns void.
 //
 // 	strings::cut("hello=world=foobar", "=")	// ("hello", "world=foobar")
-// 	strings::cut("hello world", "=")	// ("hello world", "")
+// 	strings::cut("hello world", "=")	// void
 //
-// The return value is borrowed from the 'in' parameter.  The caller must ensure
-// that 'delimiter' is not an empty string.
-export fn cut(in: str, delim: str) (str, str) = {
-	let c = bytes::cut(toutf8(in), toutf8(delim));
-	return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
+// The return value is borrowed from the 'in' parameter.
+export fn cut(in: str, delim: str) ((str, str) | void) = {
+	match (bytes::cut(toutf8(in), toutf8(delim))) {
+	case void => return void;
+	case let bs: ([]u8, []u8) =>
+		return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
+	};
 };
 
-// Returns a string "cut" along the last instance of a delimiter, returning
-// everything up to the delimiter, and everything after the delimiter, in a
-// tuple.
+// Returns a string "cut" along the last instance of a delimiter,
+// returning everything up to the delimiter, and everything after the
+// delimiter, in a tuple. If the delimiter is not found, the first result
+// will be void.
 //
 // 	strings::rcut("hello=world=foobar", "=")	// ("hello=world", "foobar")
-// 	strings::rcut("hello world", "=")	// ("hello world", "")
+// 	strings::rcut("hello world", "=")	// void
 //
-// The return value is borrowed from the 'in' parameter.  The caller must ensure
-// that 'delimiter' is not an empty string.
-export fn rcut(in: str, delim: str) (str, str) = {
-	let c = bytes::rcut(toutf8(in), toutf8(delim));
-	return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1));
+// The return value is borrowed from the 'in' parameter.
+export fn rcut(in: str, delim: str) ((str, str) | void) = {
+	match (bytes::rcut(toutf8(in), toutf8(delim))) {
+	case void => return void;
+	case let bs: ([]u8, []u8) =>
+		return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1));
+	};
 };
 
 @test fn cut() void = {
-	const sample = cut("hello=world", "=");
+	const sample = cut("hello=world", "=") as (str, str);
 	assert(sample.0 == "hello" && sample.1 == "world");
-	const sample = cut("hello=world=foobar", "=");
+	const sample = cut("hello=world=foobar", "=") as (str, str);
 	assert(sample.0 == "hello" && sample.1 == "world=foobar");
-	const sample = cut("hello world", "=");
-	assert(sample.0 == "hello world" && sample.1 == "");
-	const sample = cut("", "=");
-	assert(sample.0 == "" && sample.1 == "");
+	assert(cut("hello world", "=") is void);
+	assert(cut("", "=") is void);
 
-	const sample = rcut("hello=world=foobar", "=");
+	const sample = rcut("hello=world=foobar", "=") as (str, str);
 	assert(sample.0 == "hello=world" && sample.1 == "foobar");
 };

	hare [hare] The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE

M	bytes/tokenize.ha	\|	54	+++++++++++++++++++-----------------------------------
M	cmd/haredoc/env.ha	\|	11	+++++++----
M	net/uri/query.ha	\|	5	++++-
M	regex/regex.ha	\|	70	++++++++++++++++++++--------------------------------------------------
M	strings/tokenize.ha	\|	53	++++++++++++++++++++++++++++-------------------------