hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 8e99e59ae9f1e41efa432bfab6d2818c954bab07
parent c66756b8e192480a8d837f60fcd4dc9a57aeac79
Author: Drew DeVault <sir@cmpwn.com>
Date:   Wed, 10 Feb 2021 14:58:51 -0500

io: add io::getrune

Diffstat:
Mencoding/utf8/decode.ha | 9+++------
Aio/+test/strings.ha | 45+++++++++++++++++++++++++++++++++++++++++++++
Aio/strings.ha | 36++++++++++++++++++++++++++++++++++++
3 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha @@ -30,7 +30,7 @@ export fn next(d: *decoder) (rune | void | more | invalid) = { }; // XXX: It would be faster if we decoded and measured at the same time. - const n = utf8sz(d.src[d.offs..]); + const n = utf8sz(d.src[d.offs]); if (n == types::SIZE_MAX) { return invalid; } else if (d.offs + n > len(d.src)) { @@ -97,15 +97,12 @@ export fn valid(src: (str | []u8)) bool = { abort(); }; -fn utf8sz(src: []u8) size = { - assert(len(src) > 0); - - let c = src[0]; +// Returns the expected length of a UTF-8 character in bytes. +export fn utf8sz(c: u8) size = { for (let i = 0z; i < len(sizes); i += 1) { if (c & sizes[i].mask == sizes[i].result) { return sizes[i].octets; }; }; - return types::SIZE_MAX; }; diff --git a/io/+test/strings.ha b/io/+test/strings.ha @@ -0,0 +1,45 @@ +use encoding::utf8; +use rt; + +type bufstream = struct { + stream: stream, + buf: []u8, +}; + +@test fn getrune() void = { + let bs = bufstream { + stream = stream { + name = "buffer", + reader = &bs_read, + ... + }, + buf = [ + 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, + 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0x00, + ], + }; + let in = &bs.stream; + const expected: [_](rune | utf8::invalid | EOF | error) = [ + 'こ', 'ん', 'に', 'ち', 'は', '\0', EOF, + ]; + for (let i = 0z; i < len(expected); i += 1) { + let want = expected[i]; + match (getrune(in)) { + r: rune => assert(want is rune && want as rune == r), + EOF => assert(want is EOF), + * => abort(), + }; + }; +}; + +fn bs_read(s: *stream, buf: []u8) (size | error | EOF) = { + let stream = s: *bufstream; + if (len(stream.buf) == 0) { + return EOF; + }; + const n = if (len(buf) > len(stream.buf)) len(stream.buf) else len(buf); + // TODO: Fix me up once slice copying is in + rt::memcpy(buf: *[*]u8, stream.buf: *[*]u8, n); + stream.buf = stream.buf[n..]; + return n; +}; diff --git a/io/strings.ha b/io/strings.ha @@ -0,0 +1,36 @@ +use encoding::utf8; +use types; + +// TODO: Do we want some kind of io::text_stream? + +// Reads a rune from a UTF-8 stream. +export fn getrune(in: *io::stream) (rune | utf8::invalid | EOF | error) = { + let b: [4]u8 = [0...]; + match (read(in, b[..1])) { + n: size => assert(n == 1), + err: error => return err, + EOF => return EOF, + }; + + const sz = utf8::utf8sz(b[0]); + if (sz == types::SIZE_MAX) { + return utf8::invalid; + }; + + if (sz == 1) { + return b[0]: u32: rune; + }; + + match (read(in, b[1..sz])) { + n: size => assert(n == sz - 1), + err: error => return err, + EOF => return EOF, + }; + + let dec = utf8::decode(b[..sz]); + return match (utf8::next(&dec)) { + r: rune => r, + utf8::invalid => utf8::invalid, + (void | utf8::more) => EOF, + }; +};