hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 28f905de101d6656ec4dd5fcc763834f58468299
parent dbed6d0e75f18ae8128af4482df919f9e2255ca6
Author: Sebastian LaVine <mail@smlavine.com>
Date:   Thu, 28 Dec 2023 08:17:09 -0500

encoding::utf8: Rename valid() to validate() and change return type

As shown in the changes made to the stdlib to complement this change,
this allows for a common idiom to be expressed more concisely with the
`?` operator. This also aligns with the interface of the prev() and
next() iterators.

Signed-off-by: Sebastian LaVine <mail@smlavine.com>

Diffstat:
Mencoding/utf8/decode.ha | 21+++++++++++----------
Mstrings/utf8.ha | 4+---
Mtypes/c/strings.ha | 4+---
3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha @@ -70,7 +70,7 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0x00, ]; - assert(valid(input)); + assert(validate(input) is void); const expected = ['こ', 'ん', 'に', 'ち', 'は', '\0']; let decoder = decode(input); for (let i = 0z; i < len(expected); i += 1) { @@ -98,39 +98,39 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = { assert(next(&decoder) is invalid); decoder.offs = 2; assert(prev(&decoder) is more); - assert(!valid(inv)); + assert(validate(inv) is invalid); const incomplete: [_]u8 = [0xE3, 0x81]; decoder = decode(incomplete); assert(next(&decoder) is more); decoder.offs = 2; assert(prev(&decoder) is invalid); - assert(!valid(incomplete)); + assert(validate(incomplete) is invalid); const surrogate: [_]u8 = [0xED, 0xA0, 0x80]; decoder = decode(surrogate); assert(next(&decoder) is invalid); decoder.offs = 3; assert(prev(&decoder) is invalid); - assert(!valid(surrogate)); + assert(validate(surrogate) is invalid); const overlong: [_]u8 = [0xF0, 0x82, 0x82, 0xAC]; decoder = decode(overlong); assert(next(&decoder) is invalid); decoder.offs = 4; assert(prev(&decoder) is invalid); - assert(!valid(overlong)); + assert(validate(overlong) is invalid); const badcont: [_]u8 = [0xC2, 0xFF]; decoder = decode(badcont); assert(next(&decoder) is invalid); - assert(!valid(badcont)); + assert(validate(badcont) is invalid); const extracont: [_]u8 = [0xC2, 0xA3, 0x95]; decoder = decode(extracont); decoder.offs = 3; assert(prev(&decoder) is invalid); - assert(!valid(extracont)); + assert(validate(extracont) is invalid); const maxinrange: [_]u8 = [0xF4, 0x8F, 0xBF, 0xBF]; decoder = decode(maxinrange); match (next(&decoder)) { @@ -152,11 +152,12 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = { assert(prev(&decoder) is invalid); }; -// Returns true if a given byte slice contains only valid UTF-8 sequences. -export fn valid(src: []u8) bool = { +// Returns void if a given byte slice contains only valid UTF-8 sequences, +// otherwise returns invalid. +export fn validate(src: []u8) (void | invalid) = { let state = 0; for (let i = 0z; i < len(src) && state >= 0; i += 1) { state = table[state][src[i]]; }; - return state == 0; + return if (state == 0) void else invalid; }; diff --git a/strings/utf8.ha b/strings/utf8.ha @@ -21,9 +21,7 @@ export fn fromutf8_unsafe(in: []u8) str = { // [[encoding::utf8::invalid]] is returned instead. export fn fromutf8(in: []u8) (str | utf8::invalid) = { let s = fromutf8_unsafe(in); - if (!utf8::valid(in)) { - return utf8::invalid; - }; + utf8::validate(in)?; return s; }; diff --git a/types/c/strings.ha b/types/c/strings.ha @@ -44,9 +44,7 @@ export fn tostr(cstr: *const char) (const str | utf8::invalid) = { // Converts a C string with a given length to a Hare string. If the string is // not valid UTF-8, return [[encoding::utf8::invalid]]. export fn tostrn(cstr: *const char, length: size) (const str | utf8::invalid) = { - if (!utf8::valid((cstr: *[*]u8)[..length])) { - return utf8::invalid; - }; + utf8::validate((cstr: *[*]u8)[..length])?; return tostrn_unsafe(cstr, length); };