commit b8b0327ef2249b846f9f0cde2ac4d0f17af2f9d2
parent 79b55532d74c4b1b0d6b1a1a1846073a4363f60a
Author: Drew DeVault <sir@cmpwn.com>
Date: Fri, 5 Feb 2021 12:26:56 -0500
encoding: add tests
Diffstat:
2 files changed, 42 insertions(+), 0 deletions(-)
diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha
@@ -60,6 +60,33 @@ export fn next(d: *decoder) (rune | void | more | invalid) = {
return r: rune;
};
+@test fn decode() void = {
+ const input = [
+ 0xE3u8, 0x81u8, 0x93u8, 0xE3u8, 0x82u8, 0x93u8, 0xE3u8, 0x81u8,
+ 0xABu8, 0xE3u8, 0x81u8, 0xA1u8, 0xE3u8, 0x81u8, 0xAFu8, 0x00u8,
+ ];
+ const expected = ['こ', 'ん', 'に', 'ち', 'は', '\0'];
+ let decoder = decode(input);
+ for (let i = 0z; i < len(expected); i += 1z) {
+ match (next(&decoder)) {
+ r: rune => assert(r == expected[i]),
+ invalid => abort(),
+ more => abort(),
+ void => abort(),
+ };
+ };
+ assert(next(&decoder) is void);
+
+ // TODO: Test more invalid sequences
+ const invalid = [0xA0u8, 0xA1u8];
+ decoder = decode(invalid);
+ assert(next(&decoder) is invalid);
+
+ const incomplete = [0xE3u8, 0x81u8];
+ decoder = decode(incomplete);
+ assert(next(&decoder) is more);
+};
+
// Returns true if a given string or byte slice contains only valid UTF-8
// sequences. Note that Hare strings (str) are always valid UTF-8 - if this
// returns false for a str type, something funny is going on.
diff --git a/encoding/utf8/encode.ha b/encoding/utf8/encode.ha
@@ -24,3 +24,18 @@ export fn encode_rune(r: rune) []u8 = {
buf[0] = ch: u8 | first;
return buf[..n];
};
+
+@test fn encode() void = {
+ const expected: [_][]u8 = [
+ [0u8],
+ [0x25u8],
+ [0xE3u8, 0x81u8, 0x93u8],
+ ];
+ const inputs = ['\0', '%', 'こ'];
+ for (let i = 0z; i < len(inputs); i += 1z) {
+ const out = encode_rune(inputs[i]);
+ for (let j = 0z; j < len(expected[i]); j += 1z) {
+ assert(out[j] == expected[i][j]);
+ };
+ };
+};