commit c6c03614587cb0626e32a4cc0c52161c52182285
parent fc8dc2d78e55b53494e5df248bfd1b9e2a3d5372
Author: Bor Grošelj Simić <bor.groseljsimic@telemach.net>
Date: Tue, 16 Feb 2021 01:36:05 +0100
bytes::tokenize: handle the case where delimiter is whole array
Diffstat:
1 file changed, 25 insertions(+), 7 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -1,10 +1,11 @@
// The state for a tokenizer.
-export type tokenizer = struct { s: []u8, d: []u8 };
+export type tokenizer = struct { s: []u8, d: []u8, end: bool };
// Returns a tokenizer which yields sub-slices tokenized by a delimiter.
export fn tokenize(s: []u8, delim: []u8) tokenizer = tokenizer {
s = s,
d = delim,
+ end = false,
};
// Returns the next slice from a tokenizer, and advances the cursor. Returns
@@ -12,21 +13,18 @@ export fn tokenize(s: []u8, delim: []u8) tokenizer = tokenizer {
// string starts with, or ends with, a token, an empty slice is returned at the
// beginning or end of the sequence, respectively.
export fn next_token(s: *tokenizer) ([]u8 | void) = {
- if (len(s.s) == 0) {
+ if (s.end) {
return;
};
match (index(s.s, s.d)) {
i: size => {
let tok = s.s[..i];
- if (len(tok) + len(s.d) == len(s.s) && len(tok) != 0) {
- s.s = s.s[i..];
- } else {
- s.s = s.s[i+len(s.d)..];
- };
+ s.s = s.s[i+len(s.d)..];
return tok;
},
void => {
+ s.end = true;
let tok = s.s[..];
s.s = s.s[..0];
return tok;
@@ -101,4 +99,24 @@ export fn remaining_tokens(s: *tokenizer) []u8 = {
};
assert(next_token(&t) is void);
+
+ const input4: [_]u8 = [1, 2];
+ t = tokenize(input4, [1, 2]);
+
+ match (next_token(&t)) {
+ b: []u8 => assert(equal([], b)),
+ void => abort(),
+ };
+
+ match (next_token(&t)) {
+ b: []u8 => assert(equal([], b)),
+ void => abort(),
+ };
+
+ match (next_token(&t)) {
+ b: []u8 => abort(),
+ void => void,
+ };
+
+ assert(next_token(&t) is void);
};