commit 002c60ab88bbb815f59234790b81f89ff3c78f8b
parent 59600480be9409ef17b86ddc650ef8052a7040db
Author: Tilman Sauerbeck <tilman@code-monkey.de>
Date: Sat, 25 Feb 2023 14:53:43 +0100
bufio/scanner: Decode all bytes in the input buffer
This fixes a problem where we stopped decoding once the input stream
no longer has four or more bytes available.
Signed-off-by: Tilman Sauerbeck <tilman@code-monkey.de>
Diffstat:
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/bufio/scanner.ha b/bufio/scanner.ha
@@ -169,7 +169,7 @@ export fn scan_bytes(
export fn scan_rune(
scan: *scanner,
) (rune | io::EOF | io::error | utf8::invalid) = {
- if (scan.pending < 4) {
+ if (scan.pending == 0) {
match (scan_readahead(scan)?) {
case io::EOF =>
return io::EOF;
@@ -184,6 +184,15 @@ export fn scan_rune(
return utf8::invalid;
};
+ for (scan.pending < sz) {
+ match (scan_readahead(scan)?) {
+ case io::EOF =>
+ return utf8::invalid;
+ case size =>
+ yield;
+ };
+ };
+
// Consume previous read, if any
scan_shift(scan);
// Consume this read right away
@@ -375,3 +384,49 @@ export fn scanrune(
};
};
};
+
+@test fn scan_rune() void = {
+ let in = fixed(strings::toutf8("hello"), io::mode::READ);
+ let scanner = newscanner(&in, 32);
+
+ const expected: [_](rune | utf8::invalid | io::EOF | io::error) = [
+ 'h', 'e', 'l', 'l', 'o', io::EOF,
+ ];
+ for (let i = 0z; i < len(expected); i += 1) {
+ let want = expected[i];
+
+ match (scan_rune(&scanner)) {
+ case let r: rune =>
+ assert(want is rune && want as rune == r);
+ case io::EOF =>
+ assert(want is io::EOF);
+ case =>
+ abort();
+ };
+ };
+};
+
+@test fn scan_rune_cutoff() void = {
+ let in = fixed([
+ 'a', 0xE3,
+ ], io::mode::READ);
+ let scanner = newscanner(&in, 32);
+
+ const expected: [_](rune | utf8::invalid | io::EOF | io::error) = [
+ 'a', utf8::invalid,
+ ];
+ for (let i = 0z; i < len(expected); i += 1) {
+ let want = expected[i];
+
+ match (scan_rune(&scanner)) {
+ case let r: rune =>
+ assert(want is rune && want as rune == r);
+ case io::EOF =>
+ assert(want is io::EOF);
+ case utf8::invalid =>
+ assert(want is utf8::invalid);
+ case =>
+ abort();
+ };
+ };
+};