utf8.ha (1136B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use encoding::utf8; 5 use types; 6 7 // Converts a byte slice into a string, but does not test if it is valid UTF-8. 8 // This is faster than the safe equivalent, but if the string is not valid UTF-8 9 // it may cause undefined behavior. The return value is borrowed from the input. 10 export fn fromutf8_unsafe(in: []u8) str = { 11 const s = types::string { 12 data = in: *[*]u8, 13 length = len(in), 14 capacity = len(in), 15 }; 16 return *(&s: *const str); 17 }; 18 19 // Converts a byte slice into a string. The return value is borrowed from the 20 // input. If the slice contains invalid UTF-8 sequences, 21 // [[encoding::utf8::invalid]] is returned instead. 22 export fn fromutf8(in: []u8) (str | utf8::invalid) = { 23 utf8::validate(in)?; 24 return fromutf8_unsafe(in); 25 }; 26 27 // Converts a string to a UTF-8 byte slice. The return value is borrowed from 28 // the input. 29 export fn toutf8(in: str) []u8 = *(&in: *[]u8); 30 31 @test fn utf8() void = { 32 assert(fromutf8([ 33 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 34 ])! == "hello world"); 35 assert(fromutf8([])! == ""); 36 };