hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit c6c03614587cb0626e32a4cc0c52161c52182285
parent fc8dc2d78e55b53494e5df248bfd1b9e2a3d5372
Author: Bor Grošelj Simić <bor.groseljsimic@telemach.net>
Date:   Tue, 16 Feb 2021 01:36:05 +0100

bytes::tokenize: handle the case where delimiter is whole array

Diffstat:
Mbytes/tokenize.ha | 32+++++++++++++++++++++++++-------
1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha @@ -1,10 +1,11 @@ // The state for a tokenizer. -export type tokenizer = struct { s: []u8, d: []u8 }; +export type tokenizer = struct { s: []u8, d: []u8, end: bool }; // Returns a tokenizer which yields sub-slices tokenized by a delimiter. export fn tokenize(s: []u8, delim: []u8) tokenizer = tokenizer { s = s, d = delim, + end = false, }; // Returns the next slice from a tokenizer, and advances the cursor. Returns @@ -12,21 +13,18 @@ export fn tokenize(s: []u8, delim: []u8) tokenizer = tokenizer { // string starts with, or ends with, a token, an empty slice is returned at the // beginning or end of the sequence, respectively. export fn next_token(s: *tokenizer) ([]u8 | void) = { - if (len(s.s) == 0) { + if (s.end) { return; }; match (index(s.s, s.d)) { i: size => { let tok = s.s[..i]; - if (len(tok) + len(s.d) == len(s.s) && len(tok) != 0) { - s.s = s.s[i..]; - } else { - s.s = s.s[i+len(s.d)..]; - }; + s.s = s.s[i+len(s.d)..]; return tok; }, void => { + s.end = true; let tok = s.s[..]; s.s = s.s[..0]; return tok; @@ -101,4 +99,24 @@ export fn remaining_tokens(s: *tokenizer) []u8 = { }; assert(next_token(&t) is void); + + const input4: [_]u8 = [1, 2]; + t = tokenize(input4, [1, 2]); + + match (next_token(&t)) { + b: []u8 => assert(equal([], b)), + void => abort(), + }; + + match (next_token(&t)) { + b: []u8 => assert(equal([], b)), + void => abort(), + }; + + match (next_token(&t)) { + b: []u8 => abort(), + void => void, + }; + + assert(next_token(&t) is void); };