commit e5fc2405c2b4db3247cdf8b278aee30d8c50548e
parent 2a06bfce35c1314107e446e6c1c190cc3f0c00b5
Author: Armin Weigl <tb46305@gmail.com>
Date: Sun, 21 Feb 2021 11:03:50 +0100
lex: implement \x, \u and \U
Diffstat:
2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha
@@ -212,7 +212,7 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = {
@test fn runes() void = {
const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' "
- "'\\\\' '\\\''";
+ "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'";
const expected: [_](uint, uint, token) = [
(1, 1, literal { storage = literal_type::RUNE, _rune = 'a' }),
(1, 5, literal { storage = literal_type::RUNE, _rune = 'b' }),
@@ -226,8 +226,10 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = {
(1, 44, literal { storage = literal_type::RUNE, _rune = '\0' }),
(1, 49, literal { storage = literal_type::RUNE, _rune = '\\' }),
(1, 54, literal { storage = literal_type::RUNE, _rune = '\'' }),
+ (1, 59, literal { storage = literal_type::RUNE, _rune = '\x0A' }),
+ (1, 66, literal { storage = literal_type::RUNE, _rune = '\u1234' }),
+ (1, 75, literal { storage = literal_type::RUNE, _rune = '\U12345678' }),
];
- // TODO: test \x and \u and \U
lextest(in, expected);
};
diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha
@@ -3,6 +3,7 @@ use ascii;
use encoding::utf8;
use io;
use sort;
+use strconv;
use strings;
// State associated with a lexer.
@@ -99,6 +100,29 @@ fn ncmp(a: const *void, b: const *void) int = {
};
};
+fn lex_unicode(lex: *lexer, loc: location, n: size) (rune | error) = {
+ assert(n < 9);
+ let buf: [9]u8 = [0...];
+ for (let i = 0z; i < n; i += 1z) {
+ let r = match (next(lex)) {
+ io::EOF => return syntaxerr(loc,
+ "unexpected EOF scanning for escape"),
+ err: io::error => return err,
+ r: rune => r,
+ };
+ if (!ascii::isxdigit(r)) {
+ return syntaxerr(loc,
+ "unexpected rune scanning for escape");
+ };
+ buf[i] = r: u32: u8;
+ };
+ let s = strings::from_utf8_unsafe(buf[..n]);
+ return match (strconv::stou32b(s, strconv::base::HEX)) {
+ (strconv::overflow | strconv::invalid) => abort(), // Invariant
+ u: u32 => u: rune,
+ };
+};
+
fn lex_rune(lex: *lexer, loc: location) (rune | error) = {
let r = match (next(lex)) {
io::EOF => return syntaxerr(loc,
@@ -115,7 +139,7 @@ fn lex_rune(lex: *lexer, loc: location) (rune | error) = {
err: io::error => return err,
r: rune => r,
};
- let r = switch (r) {
+ return switch (r) {
'\\' => '\\',
'\'' => '\'',
'0' => '\0',
@@ -127,17 +151,16 @@ fn lex_rune(lex: *lexer, loc: location) (rune | error) = {
't' => '\t',
'v' => '\v',
'"' => '\"',
- 'x' => abort(), // TODO
- 'u' => abort(), // TODO
+ 'x' => lex_unicode(lex, loc, 2),
+ 'u' => lex_unicode(lex, loc, 4),
+ 'U' => lex_unicode(lex, loc, 8),
};
- return r;
};
fn lex_string(
lex: *lexer,
loc: location,
) ((token, location) | io::EOF | error) = {
- // TODO: test me
let chars: []u8 = [];
for (true) match (next(lex)) {
err: io::error => return err,