commit 39ef0f28cb371cdae62f7c6af0e8afea515f5251
parent e21e8aa8a24fb0341ca6e2916d7909713a13f773
Author: Drew DeVault <sir@cmpwn.com>
Date: Tue, 2 Feb 2021 13:57:18 -0500
strings: validate UTF-8 string invariants
Diffstat:
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/strings/cstrings.ha b/strings/cstrings.ha
@@ -1,3 +1,4 @@
+use encoding::utf8;
use types;
// Computes the length of a NUL-terminated C string, in octets, in O(n). The
@@ -9,9 +10,9 @@ export fn c_strlen(cstr: *const char) size = {
return ln;
};
-// Converts a C string to a Hare string in O(n).
-export fn from_c(cstr: *const char) const str = {
- // TODO: Validate UTF-8?
+// Converts a C string to a Hare string in O(n), and does not check if it's
+// valid UTF-8.
+export fn from_c_unsafe(cstr: *const char) const str = {
const l = c_strlen(cstr);
const s = types::string {
data = cstr: *[*]u8,
@@ -20,3 +21,11 @@ export fn from_c(cstr: *const char) const str = {
};
return *(&s: *const str);
};
+
+// Converts a C string to a Hare string in O(n). If the string is not valid
+// UTF-8, abort.
+export fn from_c(cstr: *const char) const str = {
+ let s = from_c_unsafe(cstr);
+ assert(utf8::valid(s));
+ return s;
+};
diff --git a/strings/utf8.ha b/strings/utf8.ha
@@ -1,3 +1,4 @@
+use encoding::utf8;
use types;
// Converts a byte slice into a string WITHOUT checking that the byte slice is a
@@ -15,8 +16,9 @@ export fn from_utf8_unsafe(in: []u8) str = {
// UTF-8. To handle such an error without aborting, see
// [encoding::utf8::decode] instead.
export fn from_utf8(in: []u8) str = {
- // TODO: Validate string
- return from_utf8_unsafe(in);
+ let s = from_utf8_unsafe(in);
+ assert(utf8::valid(s));
+ return s;
};
// Converts a string to a UTF-8 slice.