hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 87b46ba774b65122d4b642dfa13feec3886f6ee0
parent 19872fd59f02f7f42f63b2488d2c516876593342
Author: Alexey Yerin <yyp@disroot.org>
Date:   Mon, 22 Mar 2021 21:21:21 +0300

bufio: add scan* functions

They are used to read data from io::stream more easily and should
replace io::getrune and friends.

Diffstat:
Abufio/scanner.ha | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 5+++--
Mstdlib.mk | 10++++++----
3 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/bufio/scanner.ha b/bufio/scanner.ha @@ -0,0 +1,115 @@ +use bytes; +use encoding::utf8; +use io; +use strings; +use types; + +// Reads a single byte from the stream. +export fn scanbyte(stream: *io::stream) (u8 | io::EOF | io::error) = { + let buf: [1]u8 = [0...]; + + return match (io::read(stream, buf)?) { + read: size => if (read > 0) buf[0] else io::EOF, + io::EOF => io::EOF, + }; +}; + +// Reads a slice of bytes until the delimiter. Delimiter is not included. +export fn scantok(stream: *io::stream, delim: u8) ([]u8 | io::EOF | io::error) = { + let buf: []u8 = []; + + for (true) { + match (scanbyte(stream)?) { + res: u8 => { + if (res == delim) { + break; + }; + append(buf, res); + }, + io::EOF => break, + }; + }; + + return buf; +}; + +// Reads a slice of bytes until a newline character (\n, 0x10). Newline itself +// is not included. +export fn scanline(stream: *io::stream) ([]u8 | io::EOF | io::error) = scantok(stream, '\n': u32: u8); + +// Reads a rune from a UTF-8 stream. +export fn scanrune(stream: *io::stream) (rune | utf8::invalid | io::EOF | io::error) = { + let b: [4]u8 = [0...]; + match (io::read(stream, b[..1])?) { + n: size => assert(n == 1), + io::EOF => return io::EOF, + }; + + const sz = utf8::utf8sz(b[0]); + if (sz == types::SIZE_MAX) { + return utf8::invalid; + }; + + if (sz == 1) { + return b[0]: u32: rune; + }; + + match (io::read(stream, b[1..sz])) { + n: size => assert(n == sz - 1), + e: (io::error | io::EOF) => return e, + }; + + let dec = utf8::decode(b[..sz]); + return match (utf8::next(&dec)) { + r: rune => r, + utf8::invalid => utf8::invalid, + (void | utf8::more) => io::EOF, + }; +}; + +@test fn scanbyte() void = { + let buf = fixed([1, 3, 3, 7], io::mode::READ); + + assert(scanbyte(buf) as u8 == 1); + assert(scanbyte(buf) as u8 == 3); + assert(scanbyte(buf) as u8 == 3); + assert(scanbyte(buf) as u8 == 7); + assert(scanbyte(buf) is io::EOF); +}; + +@test fn scantok() void = { + let buf = fixed([1, 3, 4, 5, 3, 7], io::mode::READ); + + assert(bytes::equal(scantok(buf, 4) as []u8, [1, 3])); + assert(bytes::equal(scantok(buf, 7) as []u8, [5, 3])); + assert(bytes::equal(scantok(buf, 1) as []u8, [])); +}; + +@test fn scanline() void = { + let helloworld = strings::to_utf8("hello\nworld"); + let buf = fixed(helloworld, io::mode::READ); + + assert(bytes::equal(scanline(buf) as []u8, strings::to_utf8("hello"))); + assert(bytes::equal(scanline(buf) as []u8, strings::to_utf8("world"))); + assert(bytes::equal(scanline(buf) as []u8, [])); +}; + +@test fn scanrune() void = { + let in = fixed([ + 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, + 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0x00, + ], io::mode::READ); + + const expected: [_](rune | utf8::invalid | io::EOF | io::error) = [ + 'こ', 'ん', 'に', 'ち', 'は', '\0', io::EOF, + ]; + for (let i = 0z; i < len(expected); i += 1) { + let want = expected[i]; + + match (scanrune(in)) { + r: rune => assert(want is rune && want as rune == r), + io::EOF => assert(want is io::EOF), + * => abort(), + }; + }; +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -110,8 +110,9 @@ bufio() { gen_srcs bufio \ buffered.ha \ dynamic.ha \ - fixed.ha - gen_ssa bufio io bytes strings + fixed.ha \ + scanner.ha + gen_ssa bufio io bytes strings encoding::utf8 } bytes() { diff --git a/stdlib.mk b/stdlib.mk @@ -194,9 +194,10 @@ $(HARECACHE)/ascii/ascii.ssa: $(stdlib_ascii_srcs) $(stdlib_rt) $(stdlib_strings stdlib_bufio_srcs= \ $(STDLIB)/bufio/buffered.ha \ $(STDLIB)/bufio/dynamic.ha \ - $(STDLIB)/bufio/fixed.ha + $(STDLIB)/bufio/fixed.ha \ + $(STDLIB)/bufio/scanner.ha -$(HARECACHE)/bufio/bufio.ssa: $(stdlib_bufio_srcs) $(stdlib_rt) $(stdlib_io) $(stdlib_bytes) $(stdlib_strings) +$(HARECACHE)/bufio/bufio.ssa: $(stdlib_bufio_srcs) $(stdlib_rt) $(stdlib_io) $(stdlib_bytes) $(stdlib_strings) $(stdlib_encoding_utf8) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/bufio @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nbufio \ @@ -815,9 +816,10 @@ $(TESTCACHE)/ascii/ascii.ssa: $(testlib_ascii_srcs) $(testlib_rt) $(testlib_stri testlib_bufio_srcs= \ $(STDLIB)/bufio/buffered.ha \ $(STDLIB)/bufio/dynamic.ha \ - $(STDLIB)/bufio/fixed.ha + $(STDLIB)/bufio/fixed.ha \ + $(STDLIB)/bufio/scanner.ha -$(TESTCACHE)/bufio/bufio.ssa: $(testlib_bufio_srcs) $(testlib_rt) $(testlib_io) $(testlib_bytes) $(testlib_strings) +$(TESTCACHE)/bufio/bufio.ssa: $(testlib_bufio_srcs) $(testlib_rt) $(testlib_io) $(testlib_bytes) $(testlib_strings) $(testlib_encoding_utf8) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/bufio @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nbufio \