commit c3974b628f6b403480e4743d498774a2194e58b6
parent 0e42bcc4c1e787c5350dc4045adc8b2a36f8a934
Author: Sebastian <sebastian@sebsite.pw>
Date: Thu, 14 Sep 2023 00:04:32 -0400
all: don't use invalid UTF-8 in runes
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/ascii/valid.ha b/ascii/valid.ha
@@ -19,7 +19,7 @@ export fn validstr(s: str) bool = {
@test fn valid() void = {
assert(valid('a') && valid('\0') && valid('\x7F'));
- assert(!valid('\x80') && !valid('こ'));
+ assert(!valid('\u0080') && !valid('こ'));
assert(validstr("abc\0"));
assert(!validstr("š"));
assert(!validstr("こんにちは"));
diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha
@@ -217,7 +217,7 @@ fn loc(line: uint, col: uint) location = location {
@test fn runes() void = {
const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' "
- "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'";
+ "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U0010abcd'";
const expected: [_]token = [
(ltok::LIT_RCONST, 'a', loc(1, 1)),
(ltok::LIT_RCONST, 'b', loc(1, 5)),
@@ -233,7 +233,7 @@ fn loc(line: uint, col: uint) location = location {
(ltok::LIT_RCONST, '\'', loc(1, 54)),
(ltok::LIT_RCONST, '\x0A', loc(1, 59)),
(ltok::LIT_RCONST, '\u1234', loc(1, 66)),
- (ltok::LIT_RCONST, '\U12345678', loc(1, 75)),
+ (ltok::LIT_RCONST, '\U0010abcd', loc(1, 75)),
];
lextest(in, expected);
};
diff --git a/types/limits.ha b/types/limits.ha
@@ -51,8 +51,8 @@ export def U64_MIN: u64 = 0;
// Maximum value which can be stored in a u64 type.
export def U64_MAX: u64 = 18446744073709551615;
-// Minimum value which can be stored in a rune.
-export def RUNE_MIN: rune = U32_MIN: rune;
+// Minimum Unicode codepoint which can be stored in a rune.
+export def RUNE_MIN: rune = '\0';
-// Maximum value which can be stored in a rune.
-export def RUNE_MAX: rune = U32_MAX: rune;
+// Maximum Unicode codepoint which can be stored in a rune.
+export def RUNE_MAX: rune = '\U0010ffff';