commit 87b46ba774b65122d4b642dfa13feec3886f6ee0
parent 19872fd59f02f7f42f63b2488d2c516876593342
Author: Alexey Yerin <yyp@disroot.org>
Date: Mon, 22 Mar 2021 21:21:21 +0300
bufio: add scan* functions
They are used to read data from io::stream more easily and should
replace io::getrune and friends.
Diffstat:
3 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/bufio/scanner.ha b/bufio/scanner.ha
@@ -0,0 +1,115 @@
+use bytes;
+use encoding::utf8;
+use io;
+use strings;
+use types;
+
+// Reads a single byte from the stream.
+export fn scanbyte(stream: *io::stream) (u8 | io::EOF | io::error) = {
+ let buf: [1]u8 = [0...];
+
+ return match (io::read(stream, buf)?) {
+ read: size => if (read > 0) buf[0] else io::EOF,
+ io::EOF => io::EOF,
+ };
+};
+
+// Reads a slice of bytes until the delimiter. Delimiter is not included.
+export fn scantok(stream: *io::stream, delim: u8) ([]u8 | io::EOF | io::error) = {
+ let buf: []u8 = [];
+
+ for (true) {
+ match (scanbyte(stream)?) {
+ res: u8 => {
+ if (res == delim) {
+ break;
+ };
+ append(buf, res);
+ },
+ io::EOF => break,
+ };
+ };
+
+ return buf;
+};
+
+// Reads a slice of bytes until a newline character (\n, 0x10). Newline itself
+// is not included.
+export fn scanline(stream: *io::stream) ([]u8 | io::EOF | io::error) = scantok(stream, '\n': u32: u8);
+
+// Reads a rune from a UTF-8 stream.
+export fn scanrune(stream: *io::stream) (rune | utf8::invalid | io::EOF | io::error) = {
+ let b: [4]u8 = [0...];
+ match (io::read(stream, b[..1])?) {
+ n: size => assert(n == 1),
+ io::EOF => return io::EOF,
+ };
+
+ const sz = utf8::utf8sz(b[0]);
+ if (sz == types::SIZE_MAX) {
+ return utf8::invalid;
+ };
+
+ if (sz == 1) {
+ return b[0]: u32: rune;
+ };
+
+ match (io::read(stream, b[1..sz])) {
+ n: size => assert(n == sz - 1),
+ e: (io::error | io::EOF) => return e,
+ };
+
+ let dec = utf8::decode(b[..sz]);
+ return match (utf8::next(&dec)) {
+ r: rune => r,
+ utf8::invalid => utf8::invalid,
+ (void | utf8::more) => io::EOF,
+ };
+};
+
+@test fn scanbyte() void = {
+ let buf = fixed([1, 3, 3, 7], io::mode::READ);
+
+ assert(scanbyte(buf) as u8 == 1);
+ assert(scanbyte(buf) as u8 == 3);
+ assert(scanbyte(buf) as u8 == 3);
+ assert(scanbyte(buf) as u8 == 7);
+ assert(scanbyte(buf) is io::EOF);
+};
+
+@test fn scantok() void = {
+ let buf = fixed([1, 3, 4, 5, 3, 7], io::mode::READ);
+
+ assert(bytes::equal(scantok(buf, 4) as []u8, [1, 3]));
+ assert(bytes::equal(scantok(buf, 7) as []u8, [5, 3]));
+ assert(bytes::equal(scantok(buf, 1) as []u8, []));
+};
+
+@test fn scanline() void = {
+ let helloworld = strings::to_utf8("hello\nworld");
+ let buf = fixed(helloworld, io::mode::READ);
+
+ assert(bytes::equal(scanline(buf) as []u8, strings::to_utf8("hello")));
+ assert(bytes::equal(scanline(buf) as []u8, strings::to_utf8("world")));
+ assert(bytes::equal(scanline(buf) as []u8, []));
+};
+
+@test fn scanrune() void = {
+ let in = fixed([
+ 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81,
+ 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0x00,
+ ], io::mode::READ);
+
+ const expected: [_](rune | utf8::invalid | io::EOF | io::error) = [
+ 'こ', 'ん', 'に', 'ち', 'は', '\0', io::EOF,
+ ];
+ for (let i = 0z; i < len(expected); i += 1) {
+ let want = expected[i];
+
+ match (scanrune(in)) {
+ r: rune => assert(want is rune && want as rune == r),
+ io::EOF => assert(want is io::EOF),
+ * => abort(),
+ };
+ };
+};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -110,8 +110,9 @@ bufio() {
gen_srcs bufio \
buffered.ha \
dynamic.ha \
- fixed.ha
- gen_ssa bufio io bytes strings
+ fixed.ha \
+ scanner.ha
+ gen_ssa bufio io bytes strings encoding::utf8
}
bytes() {
diff --git a/stdlib.mk b/stdlib.mk
@@ -194,9 +194,10 @@ $(HARECACHE)/ascii/ascii.ssa: $(stdlib_ascii_srcs) $(stdlib_rt) $(stdlib_strings
stdlib_bufio_srcs= \
$(STDLIB)/bufio/buffered.ha \
$(STDLIB)/bufio/dynamic.ha \
- $(STDLIB)/bufio/fixed.ha
+ $(STDLIB)/bufio/fixed.ha \
+ $(STDLIB)/bufio/scanner.ha
-$(HARECACHE)/bufio/bufio.ssa: $(stdlib_bufio_srcs) $(stdlib_rt) $(stdlib_io) $(stdlib_bytes) $(stdlib_strings)
+$(HARECACHE)/bufio/bufio.ssa: $(stdlib_bufio_srcs) $(stdlib_rt) $(stdlib_io) $(stdlib_bytes) $(stdlib_strings) $(stdlib_encoding_utf8)
@printf 'HAREC \t$@\n'
@mkdir -p $(HARECACHE)/bufio
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nbufio \
@@ -815,9 +816,10 @@ $(TESTCACHE)/ascii/ascii.ssa: $(testlib_ascii_srcs) $(testlib_rt) $(testlib_stri
testlib_bufio_srcs= \
$(STDLIB)/bufio/buffered.ha \
$(STDLIB)/bufio/dynamic.ha \
- $(STDLIB)/bufio/fixed.ha
+ $(STDLIB)/bufio/fixed.ha \
+ $(STDLIB)/bufio/scanner.ha
-$(TESTCACHE)/bufio/bufio.ssa: $(testlib_bufio_srcs) $(testlib_rt) $(testlib_io) $(testlib_bytes) $(testlib_strings)
+$(TESTCACHE)/bufio/bufio.ssa: $(testlib_bufio_srcs) $(testlib_rt) $(testlib_io) $(testlib_bytes) $(testlib_strings) $(testlib_encoding_utf8)
@printf 'HAREC \t$@\n'
@mkdir -p $(TESTCACHE)/bufio
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nbufio \