hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 025646da5faa323edd3ccd29abf786cc2664ac75
parent 88cb6c68e9cb248aac3bd54bf543b9f8a08ac607
Author: Drew DeVault <sir@cmpwn.com>
Date:   Fri, 19 Feb 2021 16:08:53 -0500

hare::lex: lex rune literals

Diffstat:
Mhare/lex/+test.ha | 66+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mhare/lex/lex.ha | 94++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mhare/lex/token.ha | 9++++++++-
3 files changed, 159 insertions(+), 10 deletions(-)

diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -29,6 +29,30 @@ use strings; assert(t.1.line == 1234 && t.1.col == 1234); }; +fn liteq(expected: literal, actual: literal) bool = { + if (expected.storage != actual.storage) { + return false; + }; + return switch (expected.storage) { + literal_type::U8, + literal_type::U16, + literal_type::U32, + literal_type::U64, + literal_type::UINT, + literal_type::UINTPTR => expected._uint == actual._uint, + literal_type::I8, + literal_type::I16, + literal_type::I32, + literal_type::I64, + literal_type::INT, + literal_type::ICONST => expected._int == actual._int, + literal_type::F32, + literal_type::F64, + literal_type::FCONST => expected.float == actual.float, + literal_type::RUNE => expected._rune == actual._rune, + }; +}; + fn lextest(in: str, expected: [](uint, uint, token)) void = { let lexer = lexer_init(bufio::fixed(strings::to_utf8(in)), "<test>"); for (let i = 0z; i < len(expected); i += 1) { @@ -36,7 +60,14 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = { etok = expected[i].2; let tl = match (lex(&lexer)) { tl: (token, location) => tl, - * => abort(), + io::EOF => { + fmt::errorln("unexpected EOF at {}", i); + abort(); + }, + err: error => { + fmt::errorln("{}: {}", i, errstr(err)); + abort(); + }, }; let tok = tl.0, loc = tl.1; match (tok) { @@ -50,6 +81,18 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = { i, tokstr(tok), tokstr(etok)); abort(); }, + l: literal => if (!(etok is literal)) { + fmt::errorln("bad token at {}: got {}, wanted {}", + i, tokstr(tok), tokstr(etok)); + abort(); + } else { + let e = etok as literal; + if (!liteq(l, e)) { + fmt::errorln("bad token at {}: got '{}', wanted '{}'", + i, tokstr(tok), tokstr(etok)); + abort(); + }; + }, * => abort("TODO"), }; assert(loc.path == "<test>"); @@ -150,3 +193,24 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = { ]; lextest(in, expected); }; + +@test fn runes() void = { + const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' " + "'\\\\' '\\\''"; + const expected: [_](uint, uint, token) = [ + (1, 1, literal { storage = literal_type::RUNE, _rune = 'a' }), + (1, 5, literal { storage = literal_type::RUNE, _rune = 'b' }), + (1, 9, literal { storage = literal_type::RUNE, _rune = '\a' }), + (1, 14, literal { storage = literal_type::RUNE, _rune = '\b' }), + (1, 19, literal { storage = literal_type::RUNE, _rune = '\f' }), + (1, 24, literal { storage = literal_type::RUNE, _rune = '\n' }), + (1, 29, literal { storage = literal_type::RUNE, _rune = '\r' }), + (1, 34, literal { storage = literal_type::RUNE, _rune = '\t' }), + (1, 39, literal { storage = literal_type::RUNE, _rune = '\v' }), + (1, 44, literal { storage = literal_type::RUNE, _rune = '\0' }), + (1, 49, literal { storage = literal_type::RUNE, _rune = '\\' }), + (1, 54, literal { storage = literal_type::RUNE, _rune = '\'' }), + ]; + // TODO: test \x and \u and \U + lextest(in, expected); +}; diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -15,7 +15,7 @@ export type lexer = struct { }; // A syntax error -export type syntax = location; +export type syntax = (location, str); // All possible lexer errors export type error = (io::error | syntax); @@ -23,7 +23,7 @@ export type error = (io::error | syntax); export fn errstr(err: error) const str = { return match (err) { err: io::error => io::errstr(err), - syntax => "Syntax error", // TODO: add line info + s: syntax => s.1, // TODO: format me }; }; @@ -66,8 +66,11 @@ export fn lex(lex: *lexer) ((token, location) | io::EOF | error) = { }; let tok: token = switch (r) { - * => return syntaxerr(loc), - '"', '\'' => abort(), // TODO: Strings/runes + * => return syntaxerr(loc, "invalid character"), + '"', '\'' => { + unget(lex, r); + return lex_string(lex, loc); + }, '.', '<', '>' => return lex3(lex, loc, r), '^', '*', '%', '/', '+', '-', ':', '!', '&', '|', '=' => { return lex2(lex, loc, r); @@ -96,7 +99,82 @@ fn ncmp(a: const *void, b: const *void) int = { }; }; -fn lex_name(lex: *lexer, loc: location) ((token, location) | io::EOF | error) = { +fn lex_rune(lex: *lexer, loc: location) (rune | error) = { + let r = match (next(lex)) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning for rune"), + err: io::error => return err, + r: rune => r, + }; + if (r != '\\') { + match (next(lex)) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning rune, expected \"\'\""), + err: io::error => return err, + r: rune => if (r != '\'') + return syntaxerr(loc, "expected \"\'\" after rune"), + }; + return r; + }; + r = match (next(lex)) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning for escape"), + err: io::error => return err, + r: rune => r, + }; + let r = switch (r) { + '\\' => '\\', + '\'' => '\'', + '0' => '\0', + 'a' => '\a', + 'b' => '\b', + 'f' => '\f', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'v' => '\v', + '"' => '\"', + 'x' => abort(), // TODO + 'u' => abort(), // TODO + }; + match (next(lex)) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning escape sequence, expected \"\'\""), + err: io::error => return err, + r: rune => if (r != '\'') return syntaxerr(loc, + "expected \"\'\" after escape sequence"), + }; + return r; +}; + +fn lex_string( + lex: *lexer, + loc: location, +) ((token, location) | io::EOF | error) = { + let r = match (next(lex)) { + r: rune => r, + (io::EOF | io::error) => abort(), + }; + return switch (r) { + '\'' => match (lex_rune(lex, loc)) { + err: error => err, + r: rune => (literal { + storage = literal_type::RUNE, + _rune = r, + }: token, loc), + }, + '\"' => { + let chars: []u8 = []; + abort(); // TODO + }, + * => abort(), // Invariant + }; +}; + +fn lex_name( + lex: *lexer, + loc: location, +) ((token, location) | io::EOF | error) = { let chars: []u8 = []; match (next(lex)) { r: rune => { @@ -226,7 +304,7 @@ fn lex2( }, io::EOF => btoken::EQUAL, }, - * => return syntaxerr(loc), + * => return syntaxerr(loc, "unknown token sequence"), }; unget(lexr, n); return (tok, loc); @@ -250,7 +328,7 @@ fn lex3( '.' => lex3dot(lex, loc, n), '<' => lex3lt(lex, loc, n), '>' => lex3gt(lex, loc, n), - * => syntaxerr(loc), + * => syntaxerr(loc, "unknown token sequence"), }; }; @@ -419,4 +497,4 @@ fn mkloc(lex: *lexer) location = location { col = lex.loc.1, }; -fn syntaxerr(loc: location) error = loc: syntax: error; +fn syntaxerr(loc: location, why: str) error = (loc, why): syntax: error; diff --git a/hare/lex/token.ha b/hare/lex/token.ha @@ -1,3 +1,6 @@ +use encoding::utf8; +use strings; + // A token with no additional context, such as '+' export type btoken = enum { // Keep ordered with bmap @@ -248,7 +251,7 @@ export type literal_type = enum { F32, F64, FCONST, - VOID, + RUNE, }; // A token for a literal value, such as '1337u32' @@ -278,6 +281,10 @@ export fn tokstr(tok: token) const str = { return match (tok) { b: btoken => bmap[b: int], n: name => n: str, + l: literal => switch (l.storage) { + literal_type::RUNE => "rune", + * => abort(), // TODO + }, * => abort(), // TODO }; };