commit 28f905de101d6656ec4dd5fcc763834f58468299
parent dbed6d0e75f18ae8128af4482df919f9e2255ca6
Author: Sebastian LaVine <mail@smlavine.com>
Date: Thu, 28 Dec 2023 08:17:09 -0500
encoding::utf8: Rename valid() to validate() and change return type
As shown in the changes made to the stdlib to complement this change,
this allows for a common idiom to be expressed more concisely with the
`?` operator. This also aligns with the interface of the prev() and
next() iterators.
Signed-off-by: Sebastian LaVine <mail@smlavine.com>
Diffstat:
3 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha
@@ -70,7 +70,7 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = {
0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81,
0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0x00,
];
- assert(valid(input));
+ assert(validate(input) is void);
const expected = ['こ', 'ん', 'に', 'ち', 'は', '\0'];
let decoder = decode(input);
for (let i = 0z; i < len(expected); i += 1) {
@@ -98,39 +98,39 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = {
assert(next(&decoder) is invalid);
decoder.offs = 2;
assert(prev(&decoder) is more);
- assert(!valid(inv));
+ assert(validate(inv) is invalid);
const incomplete: [_]u8 = [0xE3, 0x81];
decoder = decode(incomplete);
assert(next(&decoder) is more);
decoder.offs = 2;
assert(prev(&decoder) is invalid);
- assert(!valid(incomplete));
+ assert(validate(incomplete) is invalid);
const surrogate: [_]u8 = [0xED, 0xA0, 0x80];
decoder = decode(surrogate);
assert(next(&decoder) is invalid);
decoder.offs = 3;
assert(prev(&decoder) is invalid);
- assert(!valid(surrogate));
+ assert(validate(surrogate) is invalid);
const overlong: [_]u8 = [0xF0, 0x82, 0x82, 0xAC];
decoder = decode(overlong);
assert(next(&decoder) is invalid);
decoder.offs = 4;
assert(prev(&decoder) is invalid);
- assert(!valid(overlong));
+ assert(validate(overlong) is invalid);
const badcont: [_]u8 = [0xC2, 0xFF];
decoder = decode(badcont);
assert(next(&decoder) is invalid);
- assert(!valid(badcont));
+ assert(validate(badcont) is invalid);
const extracont: [_]u8 = [0xC2, 0xA3, 0x95];
decoder = decode(extracont);
decoder.offs = 3;
assert(prev(&decoder) is invalid);
- assert(!valid(extracont));
+ assert(validate(extracont) is invalid);
const maxinrange: [_]u8 = [0xF4, 0x8F, 0xBF, 0xBF];
decoder = decode(maxinrange);
match (next(&decoder)) {
@@ -152,11 +152,12 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = {
assert(prev(&decoder) is invalid);
};
-// Returns true if a given byte slice contains only valid UTF-8 sequences.
-export fn valid(src: []u8) bool = {
+// Returns void if a given byte slice contains only valid UTF-8 sequences,
+// otherwise returns invalid.
+export fn validate(src: []u8) (void | invalid) = {
let state = 0;
for (let i = 0z; i < len(src) && state >= 0; i += 1) {
state = table[state][src[i]];
};
- return state == 0;
+ return if (state == 0) void else invalid;
};
diff --git a/strings/utf8.ha b/strings/utf8.ha
@@ -21,9 +21,7 @@ export fn fromutf8_unsafe(in: []u8) str = {
// [[encoding::utf8::invalid]] is returned instead.
export fn fromutf8(in: []u8) (str | utf8::invalid) = {
let s = fromutf8_unsafe(in);
- if (!utf8::valid(in)) {
- return utf8::invalid;
- };
+ utf8::validate(in)?;
return s;
};
diff --git a/types/c/strings.ha b/types/c/strings.ha
@@ -44,9 +44,7 @@ export fn tostr(cstr: *const char) (const str | utf8::invalid) = {
// Converts a C string with a given length to a Hare string. If the string is
// not valid UTF-8, return [[encoding::utf8::invalid]].
export fn tostrn(cstr: *const char, length: size) (const str | utf8::invalid) = {
- if (!utf8::valid((cstr: *[*]u8)[..length])) {
- return utf8::invalid;
- };
+ utf8::validate((cstr: *[*]u8)[..length])?;
return tostrn_unsafe(cstr, length);
};