hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 9409fd01c1140b3786e2e7538ba0ee350be4010d
parent 8b3ed6ce7deb14357c21e695d9fea78a527898d1
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sun,  2 Jun 2024 11:35:54 +0200

bytes::tokenize: clean up code

Better names for struct fields and some minor re-arranging of things.

Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
Mbytes/tokenize.ha | 72+++++++++++++++++++++++++++++++++++++++---------------------------------
1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha @@ -4,22 +4,23 @@ use types; export type tokenizer = struct { - s: []u8, // string being tokenized - d: []u8, // delimiter - p: i64, // p < 0 for reverse tokenizers, 0 <= p for forward ones. + in: []u8, // string being tokenized + delim: []u8, // delimiter + p: i64, // p < 0 for reverse tokenizers, 0 <= p for forward ones. }; // Returns a tokenizer which yields sub-slices tokenized by a delimiter, starting // at the beginning of the slice. The caller must ensure that 'delim' is not an // empty slice. Can tokenize a slice of length less than [[types::I64_MAX]]. -export fn tokenize(s: []u8, delim: []u8) tokenizer = { +export fn tokenize(in: []u8, delim: []u8) tokenizer = { assert(len(delim) > 0, "bytes::tokenize called with empty slice"); - if (len(s) == 0) { + if (len(in) == 0) { delim = []; }; + return tokenizer { - s = s, - d = delim, + in = in, + delim = delim, p = types::I64_MAX, // I64_MAX means we haven't peeked the next token yet. }; }; @@ -28,17 +29,19 @@ export fn tokenize(s: []u8, delim: []u8) tokenizer = { // the end of the slice and moving backwards with each call to [[next_token]]. The // caller must ensure that 'delimiter' is not an empty slice. Can tokenize a slice // of length less than [[types::I64_MAX]]. -export fn rtokenize(s: []u8, delim: []u8) tokenizer = { +export fn rtokenize(in: []u8, delim: []u8) tokenizer = { assert(len(delim) > 0, "bytes::rtokenize called with empty slice"); - if (len(s) == 0) { + if (len(in) == 0) { delim = []; }; + return tokenizer { - s = s, - d = delim, - p = types::I64_MIN, // I64_MIN means we haven't peeked the next token yet. - // also note that p == -1 corresponds to an index of len(s), - // and p == -(1 - len(s)) corresponds to an index of 0. + in = in, + delim = delim, + // I64_MIN means we haven't peeked the next token yet. Note that + // p == -1 corresponds to an index of len(s), and + // p == -(1 - len(s)) corresponds to an index of 0. + p = types::I64_MIN, }; }; @@ -53,21 +56,24 @@ export fn next_token(s: *tokenizer) ([]u8 | done) = { case done => return done; }; - if (s.p < 0) { // reverse - if (len(s.s): i64 + s.p + 1 == 0) { - s.d = s.d[..0]; - s.s = s.s[..0]; + const slen = len(s.in): i64; + const dlen = len(s.delim); + const reverse = s.p < 0; + if (reverse) { + if (slen + s.p + 1 == 0) { + s.delim = s.delim[..0]; + s.in = s.in[..0]; } else { - const end = (len(s.s): i64 + s.p + 1): size - len(s.d); - s.s = s.s[..end]; + const end = (slen + s.p + 1): size - dlen; + s.in = s.in[..end]; }; s.p = types::I64_MIN; } else { - if (s.p == len(s.s): i64) { - s.d = s.d[..0]; - s.s = s.s[..0]; + if (s.p == slen) { + s.delim = s.delim[..0]; + s.in = s.in[..0]; } else { - s.s = s.s[s.p: size + len(s.d)..]; + s.in = s.in[s.p: size + dlen..]; }; s.p = types::I64_MAX; }; @@ -77,23 +83,23 @@ export fn next_token(s: *tokenizer) ([]u8 | done) = { // Same as [[next_token]], but does not advance the cursor export fn peek_token(s: *tokenizer) ([]u8 | done) = { - if (len(s.d) == 0) { + if (len(s.delim) == 0) { return done; }; const reverse = s.p < 0; const ifunc = if (reverse) &rindex else &index; - const known = ((s.p < 0 && s.p != types::I64_MIN) || - (s.p >= 0 && s.p != types::I64_MAX)); + const known = ((reverse && s.p != types::I64_MIN) || + (!reverse && s.p != types::I64_MAX)); if (!known) { let i = 0i64; let dlen = 0i64; - let slen = len(s.s): i64; + const slen = len(s.in): i64; - match (ifunc(s.s, s.d)) { + match (ifunc(s.in, s.delim)) { case let ix: size => - dlen = len(s.d): i64; + dlen = len(s.delim): i64; i = ix: i64; case void => i = slen; @@ -111,9 +117,9 @@ export fn peek_token(s: *tokenizer) ([]u8 | done) = { }; if (reverse) { - return s.s[len(s.s) + s.p: size + 1..]; + return s.in[len(s.in) + s.p: size + 1..]; } else { - return s.s[..s.p: size]; + return s.in[..s.p: size]; }; }; @@ -121,7 +127,7 @@ export fn peek_token(s: *tokenizer) ([]u8 | done) = { // Returns the remainder of the slice associated with a tokenizer, without doing // any further tokenization. export fn remaining_tokens(s: *tokenizer) []u8 = { - return s.s; + return s.in; }; @test fn tokenize() void = {