hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit da442e0bf76cac19a137a3f779b5e0d838b94c8a
parent ee6bbf85490c236f90e4eb789a8e9eb2eb77719d
Author: Autumn! <autumnull@posteo.net>
Date:   Fri, 21 Apr 2023 12:39:08 +0000

bytes,strings: make {cut,rcut} return a tagged union value

additionally removes the requirement that the delimiter can't be empty.

Signed-off-by: Autumn! <autumnull@posteo.net>

Diffstat:
Mbytes/tokenize.ha | 54+++++++++++++++++++-----------------------------------
Mcmd/haredoc/env.ha | 11+++++++----
Mnet/uri/query.ha | 5++++-
Mregex/regex.ha | 70++++++++++++++++++++--------------------------------------------------
Mstrings/tokenize.ha | 53++++++++++++++++++++++++++++-------------------------
5 files changed, 78 insertions(+), 115 deletions(-)

diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha @@ -184,61 +184,45 @@ export fn remaining_tokens(s: *tokenizer) []u8 = { }; // Returns the input slice "cut" along the first instance of a delimiter, -// returning everything up to the delimiter, and everything after the delimiter, -// in a tuple. The contents are borrowed from the input slice. -// -// The caller must ensure that 'delimiter' is not an empty slice. -export fn cut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = { - let ln = if (delim is u8) { - yield 1z; - } else { - let ln = len(delim: []u8); - assert(ln > 0, "bytes::cut called with empty delimiter"); - yield ln; - }; +// returning everything up to the delimiter, and everything after the +// delimiter, in a tuple. If the delimiter is not found, returns void. +// The contents are borrowed from the input slice. +export fn cut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = { + let ln = if (delim is u8) 1z else len(delim: []u8); match (index(in, delim)) { case let i: size => return (in[..i], in[i + ln..]); case void => - return (in, []); + return void; }; }; // Returns the input slice "cut" along the last instance of a delimiter, -// returning everything up to the delimiter, and everything after the delimiter, -// in a tuple. The contents are borrowed from the input slice. -// -// The caller must ensure that 'delimiter' is not an empty slice. -export fn rcut(in: []u8, delim: ([]u8 | u8)) ([]u8, []u8) = { - let ln = if (delim is u8) { - yield 1z; - } else { - let ln = len(delim: []u8); - assert(ln > 0, "bytes::rcut called with empty delimiter"); - yield ln; - }; +// returning everything up to the delimiter, and everything after the +// delimiter, in a tuple. If the delimiter is not found, returns void. +// The contents are borrowed from the input slice. +export fn rcut(in: []u8, delim: ([]u8 | u8)) (([]u8, []u8) | void) = { + let ln = if (delim is u8) 1z else len(delim: []u8); match (rindex(in, delim)) { case let i: size => return (in[..i], in[i + ln..]); case void => - return (in, []); + return void; }; }; @test fn cut() void = { - const c = cut(['a', 'b', 'c'], ['b']); + const c = cut(['a', 'b', 'c'], ['b']) as ([]u8, []u8); assert(equal(c.0, ['a']) && equal(c.1, ['c'])); - const c = cut(['a', 'b', 'c'], 'b'); + const c = cut(['a', 'b', 'c'], 'b') as ([]u8, []u8); assert(equal(c.0, ['a']) && equal(c.1, ['c'])); - const c = cut(['a', 'b', 'c', 'b', 'a'], 'b'); + const c = cut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8); assert(equal(c.0, ['a']) && equal(c.1, ['c', 'b', 'a'])); - const c = cut(['a', 'b', 'c'], 'x'); - assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, [])); - const c = cut([], 'x'); - assert(equal(c.0, []) && equal(c.1, [])); + assert(cut(['a', 'b', 'c'], 'x') is void); + assert(cut([], 'x') is void); - const c = rcut(['a', 'b', 'c'], ['b']); + const c = rcut(['a', 'b', 'c'], ['b']) as ([]u8, []u8); assert(equal(c.0, ['a']) && equal(c.1, ['c'])); - const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b'); + const c = rcut(['a', 'b', 'c', 'b', 'a'], 'b') as ([]u8, []u8); assert(equal(c.0, ['a', 'b', 'c']) && equal(c.1, ['a'])); }; diff --git a/cmd/haredoc/env.ha b/cmd/haredoc/env.ha @@ -42,10 +42,13 @@ fn default_tags() ([]module::tag | error) = { for (true) match (bufio::scanline(pipe.0)?) { case let b: []u8 => defer free(b); - const (k, v) = strings::cut(strings::fromutf8(b)!, "\t"); - if (k == "Build tags") { - tags = module::parsetags(v) as []module::tag; - break; + match (strings::cut(strings::fromutf8(b)!, "\t")) { + case void => void; + case let s: (str, str) => + if (s.0 == "Build tags") { + tags = module::parsetags(s.1) as []module::tag; + break; + }; }; case io::EOF => // process exited with failure; handled below diff --git a/net/uri/query.ha b/net/uri/query.ha @@ -30,7 +30,10 @@ export fn query_next(dec: *query_decoder) ((str, str) | invalid | void) = { case => return; }; - const raw = strings::cut(tok, "="); + const raw: (str, str) = match (strings::cut(tok, "=")) { + case let s: (str, str) => yield s; + case void => yield (tok, ""); + }; strio::reset(&dec.bufs.0); percent_decode_static(&dec.bufs.0, raw.0)?; strio::reset(&dec.bufs.1); diff --git a/regex/regex.ha b/regex/regex.ha @@ -401,65 +401,35 @@ export fn compile(expr: str) (regex | error) = { }; }; +// returns min, max, and length of string matched fn parse_repetition( s: str -) (((void | size), (void | size), size) | error) = { - const first_comma = strings::index(s, ","); - const first_endbrace = strings::index(s, "}"); - if (first_endbrace is void) { - return `Repetition expression syntax error '{n}'`: error; - }; - const first_endbrace = first_endbrace as size; - - let min_str = ""; - let max_str = ""; - let is_single_arg = false; - if (first_comma is void || first_endbrace < first_comma as size) { - const cut = strings::cut(s, "}"); - min_str = cut.0; - max_str = cut.0; - is_single_arg = true; - } else { - const cut = strings::cut(s, ","); - min_str = cut.0; - max_str = strings::cut(cut.1, "}").0; +) ((size, size, size) | error) = { + const brace_cut = match (strings::cut(s, "}")) { + case void => return `Repetition expression syntax error '{n}'`: error; + case let s: (str, str) => yield s; }; - let min: (void | size) = void; - let max: (void | size) = void; - - if (len(min_str) > 0) { - min = match (strconv::stoi(min_str)) { - case let res: int => - yield if (res < 0) { - return `Negative repitition count '{-n}'`: error; - } else { - yield res: size; - }; - case => return `Repetition expression syntax error '{n}'`: error; - }; - } else { - min = 0; + const (min_str, max_str) = match (strings::cut(brace_cut.0, ",")) { + case void => + let n = parse_repnum(brace_cut.0)?; + return (n, n, len(brace_cut.0)); + case let s: (str, str) => yield s; }; - if (len(max_str) > 0) { - max = match (strconv::stoi(max_str)) { - case let res: int => - yield if (res < 0) { - return `Negative repitition count '{-n}'`: error; - } else { - yield res: size; - }; - case => return `Repetition expression syntax error '{n}'`: error; - }; - }; + const min = if (len(min_str) == 0) 0: size else parse_repnum(min_str)?; + const max = if (len(max_str) == 0) -1: size else parse_repnum(max_str)?; + return (min, max, len(brace_cut.0)); +}; - const rep_len = if (is_single_arg) { - yield len(min_str); +fn parse_repnum(s: str) (size | error) = match (strconv::stoi(s)) { +case let res: int => + if (res < 0) { + return `Negative repetition count '{-n}'`: error; } else { - yield len(min_str) + 1 + len(max_str); + return res: size; }; - return (min, max, rep_len); +case => return `Repetition expression syntax error '{n}'`: error; }; fn delete_thread(i: size, threads: *[]thread) void = { diff --git a/strings/tokenize.ha b/strings/tokenize.ha @@ -179,44 +179,47 @@ export fn split(in: str, delim: str) []str = splitn(in, delim, types::SIZE_MAX); }; }; -// Returns a string "cut" along the first instance of a delimiter, returning -// everything up to the delimiter, and everything after the delimiter, in a -// tuple. +// Returns a string "cut" along the first instance of a delimiter, +// returning everything up to the delimiter, and everything after the +// delimiter, in a tuple. If the delimiter is not found, returns void. // // strings::cut("hello=world=foobar", "=") // ("hello", "world=foobar") -// strings::cut("hello world", "=") // ("hello world", "") +// strings::cut("hello world", "=") // void // -// The return value is borrowed from the 'in' parameter. The caller must ensure -// that 'delimiter' is not an empty string. -export fn cut(in: str, delim: str) (str, str) = { - let c = bytes::cut(toutf8(in), toutf8(delim)); - return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1)); +// The return value is borrowed from the 'in' parameter. +export fn cut(in: str, delim: str) ((str, str) | void) = { + match (bytes::cut(toutf8(in), toutf8(delim))) { + case void => return void; + case let bs: ([]u8, []u8) => + return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1)); + }; }; -// Returns a string "cut" along the last instance of a delimiter, returning -// everything up to the delimiter, and everything after the delimiter, in a -// tuple. +// Returns a string "cut" along the last instance of a delimiter, +// returning everything up to the delimiter, and everything after the +// delimiter, in a tuple. If the delimiter is not found, the first result +// will be void. // // strings::rcut("hello=world=foobar", "=") // ("hello=world", "foobar") -// strings::rcut("hello world", "=") // ("hello world", "") +// strings::rcut("hello world", "=") // void // -// The return value is borrowed from the 'in' parameter. The caller must ensure -// that 'delimiter' is not an empty string. -export fn rcut(in: str, delim: str) (str, str) = { - let c = bytes::rcut(toutf8(in), toutf8(delim)); - return (fromutf8_unsafe(c.0), fromutf8_unsafe(c.1)); +// The return value is borrowed from the 'in' parameter. +export fn rcut(in: str, delim: str) ((str, str) | void) = { + match (bytes::rcut(toutf8(in), toutf8(delim))) { + case void => return void; + case let bs: ([]u8, []u8) => + return (fromutf8_unsafe(bs.0), fromutf8_unsafe(bs.1)); + }; }; @test fn cut() void = { - const sample = cut("hello=world", "="); + const sample = cut("hello=world", "=") as (str, str); assert(sample.0 == "hello" && sample.1 == "world"); - const sample = cut("hello=world=foobar", "="); + const sample = cut("hello=world=foobar", "=") as (str, str); assert(sample.0 == "hello" && sample.1 == "world=foobar"); - const sample = cut("hello world", "="); - assert(sample.0 == "hello world" && sample.1 == ""); - const sample = cut("", "="); - assert(sample.0 == "" && sample.1 == ""); + assert(cut("hello world", "=") is void); + assert(cut("", "=") is void); - const sample = rcut("hello=world=foobar", "="); + const sample = rcut("hello=world=foobar", "=") as (str, str); assert(sample.0 == "hello=world" && sample.1 == "foobar"); };