commit c18b4b62ea5bfb54af1e470946189ef004102add
parent 0f753c9c828d6073282bb05fac356d62899f029e
Author: Drew DeVault <sir@cmpwn.com>
Date: Wed, 13 Mar 2024 19:39:22 +0100
bytes: refactor tokenize
Should be about the same performance, but it's much more readable.
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Diffstat:
1 file changed, 37 insertions(+), 18 deletions(-)
diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha
@@ -46,8 +46,13 @@ export fn rtokenize(s: []u8, delim: []u8) tokenizer = {
// void if there are no tokens left and on all subsequent invocations. If a
// string starts with, or ends with, a token, an empty slice is returned at the
// beginning or end of the sequence, respectively.
-export fn next_token(s: *tokenizer) ([]u8 | void) = match (peek_token(s)) {
-case let b: []u8 =>
+export fn next_token(s: *tokenizer) ([]u8 | void) = {
+ const b = match (peek_token(s)) {
+ case let b: []u8 =>
+ yield b;
+ case => return;
+ };
+
if (s.p < 0) { // reverse
if (len(s.s): i64 + s.p + 1 == 0) {
s.d = s.d[..0];
@@ -66,8 +71,8 @@ case let b: []u8 =>
};
s.p = types::I64_MAX;
};
+
return b;
-case => void;
};
// Same as [[next_token]], but does not advance the cursor
@@ -75,25 +80,39 @@ export fn peek_token(s: *tokenizer) ([]u8 | void) = {
if (len(s.d) == 0) {
return;
};
- if (s.p < 0) { // reverse
- if (s.p == types::I64_MIN) {
- s.p = match (rindex(s.s, s.d)) {
- case let i: size =>
- yield (i + len(s.d)): i64 - len(s.s): i64 - 1;
- case void =>
- yield -(len(s.s): i64 + 1);
+
+ const reverse = s.p < 0;
+ const ifunc = if (reverse) &rindex else &index;
+
+ const known = ((s.p < 0 && s.p != types::I64_MIN) ||
+ (s.p >= 0 && s.p != types::I64_MAX));
+ if (!known) {
+ let i = 0i64;
+ let dlen = 0i64;
+ let slen = len(s.s): i64;
+
+ match (ifunc(s.s, s.d)) {
+ case let ix: size =>
+ dlen = len(s.d): i64;
+ i = ix: i64;
+ case void =>
+ i = slen;
+ };
+
+ if (reverse) {
+ if (i == slen) {
+ s.p = -(slen + 1);
+ } else {
+ s.p = i + dlen - slen - 1;
};
+ } else {
+ s.p = i;
};
+ };
+
+ if (reverse) {
return s.s[len(s.s) + s.p: size + 1..];
} else {
- if (s.p == types::I64_MAX) {
- s.p = match (index(s.s, s.d)) {
- case let i: size =>
- yield i: i64;
- case void =>
- yield len(s.s): i64;
- };
- };
return s.s[..s.p: size];
};
};