hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit e5fc2405c2b4db3247cdf8b278aee30d8c50548e
parent 2a06bfce35c1314107e446e6c1c190cc3f0c00b5
Author: Armin Weigl <tb46305@gmail.com>
Date:   Sun, 21 Feb 2021 11:03:50 +0100

lex: implement \x, \u and \U

Diffstat:
Mhare/lex/+test.ha | 6++++--
Mhare/lex/lex.ha | 33++++++++++++++++++++++++++++-----
2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -212,7 +212,7 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = { @test fn runes() void = { const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' " - "'\\\\' '\\\''"; + "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'"; const expected: [_](uint, uint, token) = [ (1, 1, literal { storage = literal_type::RUNE, _rune = 'a' }), (1, 5, literal { storage = literal_type::RUNE, _rune = 'b' }), @@ -226,8 +226,10 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = { (1, 44, literal { storage = literal_type::RUNE, _rune = '\0' }), (1, 49, literal { storage = literal_type::RUNE, _rune = '\\' }), (1, 54, literal { storage = literal_type::RUNE, _rune = '\'' }), + (1, 59, literal { storage = literal_type::RUNE, _rune = '\x0A' }), + (1, 66, literal { storage = literal_type::RUNE, _rune = '\u1234' }), + (1, 75, literal { storage = literal_type::RUNE, _rune = '\U12345678' }), ]; - // TODO: test \x and \u and \U lextest(in, expected); }; diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -3,6 +3,7 @@ use ascii; use encoding::utf8; use io; use sort; +use strconv; use strings; // State associated with a lexer. @@ -99,6 +100,29 @@ fn ncmp(a: const *void, b: const *void) int = { }; }; +fn lex_unicode(lex: *lexer, loc: location, n: size) (rune | error) = { + assert(n < 9); + let buf: [9]u8 = [0...]; + for (let i = 0z; i < n; i += 1z) { + let r = match (next(lex)) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning for escape"), + err: io::error => return err, + r: rune => r, + }; + if (!ascii::isxdigit(r)) { + return syntaxerr(loc, + "unexpected rune scanning for escape"); + }; + buf[i] = r: u32: u8; + }; + let s = strings::from_utf8_unsafe(buf[..n]); + return match (strconv::stou32b(s, strconv::base::HEX)) { + (strconv::overflow | strconv::invalid) => abort(), // Invariant + u: u32 => u: rune, + }; +}; + fn lex_rune(lex: *lexer, loc: location) (rune | error) = { let r = match (next(lex)) { io::EOF => return syntaxerr(loc, @@ -115,7 +139,7 @@ fn lex_rune(lex: *lexer, loc: location) (rune | error) = { err: io::error => return err, r: rune => r, }; - let r = switch (r) { + return switch (r) { '\\' => '\\', '\'' => '\'', '0' => '\0', @@ -127,17 +151,16 @@ fn lex_rune(lex: *lexer, loc: location) (rune | error) = { 't' => '\t', 'v' => '\v', '"' => '\"', - 'x' => abort(), // TODO - 'u' => abort(), // TODO + 'x' => lex_unicode(lex, loc, 2), + 'u' => lex_unicode(lex, loc, 4), + 'U' => lex_unicode(lex, loc, 8), }; - return r; }; fn lex_string( lex: *lexer, loc: location, ) ((token, location) | io::EOF | error) = { - // TODO: test me let chars: []u8 = []; for (true) match (next(lex)) { err: io::error => return err,