commit f3bd0a8b6b24d788f5fcd090a937fbb8ebd4b926
parent 5d6c8071e375856f60d5ff741caf65b5a221e2f2
Author: Bor Grošelj Simić <bor.groseljsimic@telemach.net>
Date: Tue, 9 Feb 2021 21:17:11 +0100
bytes/tokenize.ha: use bytes::index
old tokenize implementation had the same bug bytes::index had before
ddfebe851bf. This is now fixed. No new test are neccessary because the
issue is already covered by bytes::index tests.
Diffstat:
1 file changed, 15 insertions(+), 30 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -7,26 +7,17 @@ export fn tokenize(s: []u8, delim: []u8) tokenizer = tokenizer {
d = delim,
};
-// Returns the number of bytes in a which are equal to bytes in b.
-fn nequal(a: []u8, b: []u8) size = {
- let i = 0z;
- for (i < len(a) && i < len(b); i += 1z) {
- if (a[i] != b[i]) {
- break;
- };
- };
- return i;
-};
-
// Returns the next slice from a tokenizer, and advances the cursor. Returns
-// void if there are no tokens left. If a string starts with, or ends with, a
-// token, an empty slice is returned at the beginning or end of the sequence,
-// respectively.
+// void if there are no tokens left and on all subsequent invocations. If a
+// string starts with, or ends with, a token, an empty slice is returned at the
+// beginning or end of the sequence, respectively.
export fn next_token(s: *tokenizer) ([]u8 | void) = {
- let i = 0z;
- for (i < len(s.s)) {
- let n = nequal(s.s[i..], s.d);
- if (n == len(s.d)) {
+ if (len(s.s) == 0z) {
+ return void;
+ };
+
+ match (index(s.s, s.d)) {
+ i: size => {
let tok = s.s[..i];
if (len(tok) + len(s.d) == len(s.s) && len(tok) != 0z) {
s.s = s.s[i..];
@@ -34,20 +25,14 @@ export fn next_token(s: *tokenizer) ([]u8 | void) = {
s.s = s.s[i+len(s.d)..];
};
return tok;
- } else if (n != 0z) {
- i += n;
- } else {
- i += 1z;
- };
- };
-
- if (len(s.s) != 0z) {
- let tok = s.s[..];
- s.s = s.s[..0];
- return tok;
+ },
+ void => {
+ let tok = s.s[..];
+ s.s = s.s[..0];
+ return tok;
+ },
};
- return void;
};
// Returns the remainder of the slice associated with a tokenizer, without doing