hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 4df4835125b9f78e0aedec704cca4c6c1ea231e5
parent c22db267b94ff4a5524e8a0b38adb70924e9447d
Author: Drew DeVault <sir@cmpwn.com>
Date:   Mon, 15 May 2023 10:01:12 +0200

Return tuple directly from strings,bytes::cut,rcut

strings::cut et al are convenience functions which aim to address the
common 95% of cases, an approach which is common to much of the standard
library's design. It is not important for this interface to be
exhaustive; other tools are available for those who need to treat the
presence or absence of the delimiter differently. The convenience of
this convenience function is greatly diminished should the 95% of users
who do not need to distinguish these cases be required to add `as (str,
str)` -- a full 25% of the 80-character line width budget -- for every
call.

This reverts commit da442e0bf76cac19a137a3f779b5e0d838b94c8a.
This reverts commit aa9d6b57fed162be8d5d1c59ef3fb0614e504bba.

Diffstat:
Mbytes/tokenize.ha | 54+++++++++++++++++++++++++++++++++++-------------------
Mcmd/haredoc/env.ha | 11++++-------
Mmime/parse.ha | 17++++++-----------
Mmime/system.ha | 5+----
Mnet/uri/query.ha | 5+----
Mregex/regex.ha | 70++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Mstrings/tokenize.ha | 53+++++++++++++++++++++++++----------------------------
7 files changed, 122 insertions(+), 93 deletions(-)

diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha @@ -184,45 +184,61 @@ export fn remaining_tokens(s: *tokenizer) []u8 = { }; // Returns the input slice "cut" along the first instance of a delimiter, -// returning everything up to the delimiter, and everything after the -// delimiter, in a tuple. If the delimiter is not found, returns void. -// The contents are borrowed from the input slice. -export fn cut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = { - let ln = if (delim is u8) 1z else len(delim: []u8); +// returning everything up to the delimiter, and everything after the delimiter, +// in a tuple. The contents are borrowed from the input slice. +// +// The caller must ensure that 'delimiter' is not an empty slice. +export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = { + let ln = if (delim is u8) { + yield 1z; + } else { + let ln = len(delim: []u8); + assert(ln > 0, "bytes::cut called with empty delimiter"); + yield ln; + }; match (index(in, delim)) { case let i: size => return (in[..i], in[i + ln..]); case void => - return void; + return (in, []); }; }; // Returns the input slice "cut" along the last instance of a delimiter, -// returning everything up to the delimiter, and everything after the -// delimiter, in a tuple. If the delimiter is not found, returns void. -// The contents are borrowed from the input slice. -export fn rcut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = { - let ln = if (delim is u8) 1z else len(delim: []u8); +// returning everything up to the delimiter, and everything after the delimiter, +// in a tuple. The contents are borrowed from the input slice. +// +// The caller must ensure that 'delimiter' is not an empty slice. +export fn rcut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = { + let ln = if (delim is u8) { + yield 1z; + } else { + let ln = len(delim: []u8); + assert(ln > 0, "bytes::rcut called with empty delimiter"); + yield ln; + }; match (rindex(in, delim)) { case let i: size => return (in[..i], in[i + ln..]); case void => - return void; + return (in, []); }; }; @test fn cut() void = { - const c = cut(['a', 'b', 'c'], ['b']) as ([]u8, []u8); + const c = cut(['a', 'b', 'c'], ['b']); assert(equal(c.0, ['a']) && equal(c.1, ['c'])); - const c = cut(['a', 'b', 'c'], 'b') as ([]u8, []u8); + const c = cut(['a', 'b', 'c'], 'b'); assert(equal(c.0, ['a']) && equal(c.1, ['c'])); - const c = cut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8); + const c = cut(['a', 'b', 'c', 'b', 'a'], 'b'); assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a'])); - assert(cut(['a', 'b', 'c'], 'x') is void); - assert(cut([], 'x') is void); + const c = cut(['a', 'b', 'c'], 'x'); + assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, [])); + const c = cut([], 'x'); + assert(equal(c.0, []) && equal(c.1, [])); - const c = rcut(['a', 'b', 'c'], ['b']) as ([]u8, []u8); + const c = rcut(['a', 'b', 'c'], ['b']); assert(equal(c.0, ['a']) && equal(c.1, ['c'])); - const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8); + const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b'); assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, ['a'])); }; diff --git a/cmd/haredoc/env.ha b/cmd/haredoc/env.ha @@ -42,13 +42,10 @@ fn default_tags() ([]module::tag | error) = { for (true) match (bufio::scanline(pipe.0)?) { case let b: []u8 => defer free(b); - match (strings::cut(strings::fromutf8(b)!, "\t")) { - case void => void; - case let s: (str, str) => - if (s.0 == "Build tags") { - tags = module::parsetags(s.1) as []module::tag; - break; - }; + const (k, v) = strings::cut(strings::fromutf8(b)!, "\t"); + if (k == "Build tags") { + tags = module::parsetags(v) as []module::tag; + break; }; case io::EOF => // process exited with failure; handled below diff --git a/mime/parse.ha b/mime/parse.ha @@ -17,13 +17,11 @@ export type type_params = strings::tokenizer; // cause [[errors::invalid]] to be returned unless [[next_param]] is used to // enumerate all of the parameters. export fn parse(in: str) ((str, type_params) | errors::invalid) = { - const (mtype, params) = match (strings::cut(in, ";")) { - case void => yield (in, ""); - case let items: (str, str) => yield items; - }; - const items = match (strings::cut(mtype, "/")) { - case void => return errors::invalid; - case let items: (str, str) => yield items; + const items = strings::cut(in, ";"); + const mtype = items.0, params = items.1; + const items = strings::cut(mtype, "/"); + if (len(items.0) < 1 || len(items.1) < 1) { + return errors::invalid; }; typevalid(items.0)?; typevalid(items.1)?; @@ -45,10 +43,7 @@ export fn next_param(in: *type_params) ((str, str) | void | errors::invalid) = { return; }; - const items = match (strings::cut(tok, "=")) { - case void => return errors::invalid; - case let items: (str, str) => yield items; - }; + const items = strings::cut(tok, "="); // The RFC does not permit whitespace here, but whitespace is very // common in the wild. ¯\_(ツ)_/¯ items.0 = strings::trim(items.0); diff --git a/mime/system.ha b/mime/system.ha @@ -49,10 +49,7 @@ fn load_systemdb() (void | fs::error | io::error) = { continue; }; - const items = match (strings::cut(line, "\t")) { - case void => continue; - case let items: (str, str) => yield items; - }; + const items = strings::cut(line, "\t"); const mime = strings::trim(items.0), exts = strings::trim(items.1); if (len(exts) == 0) { diff --git a/net/uri/query.ha b/net/uri/query.ha @@ -30,10 +30,7 @@ export fn query_next(dec: *query_decoder) ((str, str) | invalid | void) = { case => return; }; - const raw: (str, str) = match (strings::cut(tok, "=")) { - case let s: (str, str) => yield s; - case void => yield (tok, ""); - }; + const raw = strings::cut(tok, "="); strio::reset(&dec.bufs.0); percent_decode_static(&dec.bufs.0, raw.0)?; strio::reset(&dec.bufs.1); diff --git a/regex/regex.ha b/regex/regex.ha @@ -401,35 +401,65 @@ export fn compile(expr: str) (regex | error) = { }; }; -// returns min, max, and length of string matched fn parse_repetition( s: str -) ((size, size, size) | error) = { - const brace_cut = match (strings::cut(s, "}")) { - case void => return `Repetition expression syntax error '{n}'`: error; - case let s: (str, str) => yield s; +) (((void | size), (void | size), size) | error) = { + const first_comma = strings::index(s, ","); + const first_endbrace = strings::index(s, "}"); + if (first_endbrace is void) { + return `Repetition expression syntax error '{n}'`: error; + }; + const first_endbrace = first_endbrace as size; + + let min_str = ""; + let max_str = ""; + let is_single_arg = false; + if (first_comma is void || first_endbrace < first_comma as size) { + const cut = strings::cut(s, "}"); + min_str = cut.0; + max_str = cut.0; + is_single_arg = true; + } else { + const cut = strings::cut(s, ","); + min_str = cut.0; + max_str = strings::cut(cut.1, "}").0; }; - const (min_str, max_str) = match (strings::cut(brace_cut.0, ",")) { - case void => - let n = parse_repnum(brace_cut.0)?; - return (n, n, len(brace_cut.0)); - case let s: (str, str) => yield s; + let min: (void | size) = void; + let max: (void | size) = void; + + if (len(min_str) > 0) { + min = match (strconv::stoi(min_str)) { + case let res: int => + yield if (res < 0) { + return `Negative repitition count '{-n}'`: error; + } else { + yield res: size; + }; + case => return `Repetition expression syntax error '{n}'`: error; + }; + } else { + min = 0; }; - const min = if (len(min_str) == 0) 0: size else parse_repnum(min_str)?; - const max = if (len(max_str) == 0) -1: size else parse_repnum(max_str)?; - return (min, max, len(brace_cut.0)); -}; + if (len(max_str) > 0) { + max = match (strconv::stoi(max_str)) { + case let res: int => + yield if (res < 0) { + return `Negative repitition count '{-n}'`: error; + } else { + yield res: size; + }; + case => return `Repetition expression syntax error '{n}'`: error; + }; + }; -fn parse_repnum(s: str) (size | error) = match (strconv::stoi(s)) { -case let res: int => - if (res < 0) { - return `Negative repetition count '{-n}'`: error; + const rep_len = if (is_single_arg) { + yield len(min_str); } else { - return res: size; + yield len(min_str) + 1 + len(max_str); }; -case => return `Repetition expression syntax error '{n}'`: error; + return (min, max, rep_len); }; fn delete_thread(i: size, threads: *[]thread) void = { diff --git a/strings/tokenize.ha b/strings/tokenize.ha @@ -179,47 +179,44 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX); }; }; -// Returns a string "cut" along the first instance of a delimiter, -// returning everything up to the delimiter, and everything after the -// delimiter, in a tuple. If the delimiter is not found, returns void. +// Returns a string "cut" along the first instance of a delimiter, returning +// everything up to the delimiter, and everything after the delimiter, in a +// tuple. // // strings::cut("hello=world=foobar", "=") // ("hello", "world=foobar") -// strings::cut("hello world", "=") // void +// strings::cut("hello world", "=") // ("hello world", "") // -// The return value is borrowed from the 'in' parameter. -export fn cut(in: str, delim: str) ((str, str) | void) = { - match (bytes::cut(toutf8(in), toutf8(delim))) { - case void => return void; - case let bs: ([]u8, []u8) => - return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1)); - }; +// The return value is borrowed from the 'in' parameter. The caller must ensure +// that 'delimiter' is not an empty string. +export fn cut(in: str, delim: str) (str, str) = { + let c = bytes::cut(toutf8(in), toutf8(delim)); + return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1)); }; -// Returns a string "cut" along the last instance of a delimiter, -// returning everything up to the delimiter, and everything after the -// delimiter, in a tuple. If the delimiter is not found, the first result -// will be void. +// Returns a string "cut" along the last instance of a delimiter, returning +// everything up to the delimiter, and everything after the delimiter, in a +// tuple. // // strings::rcut("hello=world=foobar", "=") // ("hello=world", "foobar") -// strings::rcut("hello world", "=") // void +// strings::rcut("hello world", "=") // ("hello world", "") // -// The return value is borrowed from the 'in' parameter. -export fn rcut(in: str, delim: str) ((str, str) | void) = { - match (bytes::rcut(toutf8(in), toutf8(delim))) { - case void => return void; - case let bs: ([]u8, []u8) => - return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1)); - }; +// The return value is borrowed from the 'in' parameter. The caller must ensure +// that 'delimiter' is not an empty string. +export fn rcut(in: str, delim: str) (str, str) = { + let c = bytes::rcut(toutf8(in), toutf8(delim)); + return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1)); }; @test fn cut() void = { - const sample = cut("hello=world", "=") as (str, str); + const sample = cut("hello=world", "="); assert(sample.0 == "hello" && sample.1 == "world"); - const sample = cut("hello=world=foobar", "=") as (str, str); + const sample = cut("hello=world=foobar", "="); assert(sample.0 == "hello" && sample.1 == "world=foobar"); - assert(cut("hello world", "=") is void); - assert(cut("", "=") is void); + const sample = cut("hello world", "="); + assert(sample.0 == "hello world" && sample.1 == ""); + const sample = cut("", "="); + assert(sample.0 == "" && sample.1 == ""); - const sample = rcut("hello=world=foobar", "=") as (str, str); + const sample = rcut("hello=world=foobar", "="); assert(sample.0 == "hello=world" && sample.1 == "foobar"); };