hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 39ef0f28cb371cdae62f7c6af0e8afea515f5251
parent e21e8aa8a24fb0341ca6e2916d7909713a13f773
Author: Drew DeVault <sir@cmpwn.com>
Date:   Tue,  2 Feb 2021 13:57:18 -0500

strings: validate UTF-8 string invariants

Diffstat:
Mstrings/cstrings.ha | 15++++++++++++---
Mstrings/utf8.ha | 6++++--
2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/strings/cstrings.ha b/strings/cstrings.ha @@ -1,3 +1,4 @@ +use encoding::utf8; use types; // Computes the length of a NUL-terminated C string, in octets, in O(n). The @@ -9,9 +10,9 @@ export fn c_strlen(cstr: *const char) size = { return ln; }; -// Converts a C string to a Hare string in O(n). -export fn from_c(cstr: *const char) const str = { - // TODO: Validate UTF-8? +// Converts a C string to a Hare string in O(n), and does not check if it's +// valid UTF-8. +export fn from_c_unsafe(cstr: *const char) const str = { const l = c_strlen(cstr); const s = types::string { data = cstr: *[*]u8, @@ -20,3 +21,11 @@ export fn from_c(cstr: *const char) const str = { }; return *(&s: *const str); }; + +// Converts a C string to a Hare string in O(n). If the string is not valid +// UTF-8, abort. +export fn from_c(cstr: *const char) const str = { + let s = from_c_unsafe(cstr); + assert(utf8::valid(s)); + return s; +}; diff --git a/strings/utf8.ha b/strings/utf8.ha @@ -1,3 +1,4 @@ +use encoding::utf8; use types; // Converts a byte slice into a string WITHOUT checking that the byte slice is a @@ -15,8 +16,9 @@ export fn from_utf8_unsafe(in: []u8) str = { // UTF-8. To handle such an error without aborting, see // [encoding::utf8::decode] instead. export fn from_utf8(in: []u8) str = { - // TODO: Validate string - return from_utf8_unsafe(in); + let s = from_utf8_unsafe(in); + assert(utf8::valid(s)); + return s; }; // Converts a string to a UTF-8 slice.