hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 241031a34c6b8d6b343c2abf40ba90790fab6eec
parent 7a14688a94c324ef5267c83413bdc7aacf38eed7
Author: Eyal Sawady <ecs@d2evs.net>
Date:   Tue, 13 Apr 2021 08:15:44 -0400

hare::lex::token: further improvements

Diffstat:
Mhare/lex/+test.ha | 102+++++++++++++++++++++++++++++++------------------------------------------------
Mhare/lex/lex.ha | 10++--------
Mhare/lex/token.ha | 93+++++++++++++++++++++++++++++++++----------------------------------------------
Mhare/parse/decl.ha | 12++----------
Mhare/parse/expr.ha | 4+++-
5 files changed, 86 insertions(+), 135 deletions(-)

diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -32,29 +32,13 @@ use strings; assert(t.2.line == 1234 && t.2.col == 1234); }; -fn vassert(expected: value, actual: value) bool = { - if (expected.storage != actual.storage) { - return false; - }; - return switch (expected.storage) { - storage::U8, - storage::U16, - storage::U32, - storage::U64, - storage::UINT, - storage::UINTPTR => expected._uint == actual._uint, - storage::I8, - storage::I16, - storage::I32, - storage::I64, - storage::INT, - storage::ICONST => expected._int == actual._int, - storage::F32, - storage::F64, - storage::FCONST => expected.float == actual.float, - storage::RUNE => expected._rune == actual._rune, - storage::STR => expected.string == actual.string, - }; +fn vassert(expected: value, actual: value) void = match (expected) { + expected: str => assert(actual as str == expected), + expected: rune => assert(actual as rune == expected), + expected: i64 => assert(actual as i64 == expected), + expected: u64 => assert(actual as u64 == expected), + expected: f64 => assert(actual as f64 == expected), + void => assert(actual is void), }; fn lextest(in: str, expected: []token) void = { @@ -71,13 +55,7 @@ fn lextest(in: str, expected: []token) void = { }, }; assert(tl.0 == etok.0); - if (etok.1 is void) { - assert(tl.1 is void); - } else if (etok.1 is str) { - assert(tl.1 as str == etok.1 as str); - } else { - vassert(tl.1 as value, etok.1 as value); - }; + vassert(tl.1, etok.1); assert(tl.2.line == etok.2.line && tl.2.col == etok.2.col && tl.2.path == etok.2.path); }; @@ -195,21 +173,21 @@ fn loc(line: uint, col: uint) location = location { const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' " "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'"; const expected: [_]token = [ - (ltok::LITERAL, value { storage = storage::RUNE, _rune = 'a' }, loc(1, 1)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = 'b' }, loc(1, 5)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\a' }, loc(1, 9)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\b' }, loc(1, 14)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\f' }, loc(1, 19)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\n' }, loc(1, 24)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\r' }, loc(1, 29)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\t' }, loc(1, 34)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\v' }, loc(1, 39)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\0' }, loc(1, 44)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\\' }, loc(1, 49)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\'' }, loc(1, 54)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\x0A' }, loc(1, 59)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\u1234' }, loc(1, 66)), - (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\U12345678' }, loc(1, 75)), + (ltok::LIT_RUNE, 'a', loc(1, 1)), + (ltok::LIT_RUNE, 'b', loc(1, 5)), + (ltok::LIT_RUNE, '\a', loc(1, 9)), + (ltok::LIT_RUNE, '\b', loc(1, 14)), + (ltok::LIT_RUNE, '\f', loc(1, 19)), + (ltok::LIT_RUNE, '\n', loc(1, 24)), + (ltok::LIT_RUNE, '\r', loc(1, 29)), + (ltok::LIT_RUNE, '\t', loc(1, 34)), + (ltok::LIT_RUNE, '\v', loc(1, 39)), + (ltok::LIT_RUNE, '\0', loc(1, 44)), + (ltok::LIT_RUNE, '\\', loc(1, 49)), + (ltok::LIT_RUNE, '\'', loc(1, 54)), + (ltok::LIT_RUNE, '\x0A', loc(1, 59)), + (ltok::LIT_RUNE, '\u1234', loc(1, 66)), + (ltok::LIT_RUNE, '\U12345678', loc(1, 75)), ]; lextest(in, expected); }; @@ -218,32 +196,32 @@ fn loc(line: uint, col: uint) location = location { const in = "\"a\" \"b\" \"\\a\" \"\\b\" \"\\f\" \"\\n\" \"\\r\" " "\"\\t\" \"\\v\" \"\\0\" \"\\\\\" \"\\\'\""; const expected: [_]token = [ - (ltok::LITERAL, value { storage = storage::STR, string = "a" }, loc(1, 1)), - (ltok::LITERAL, value { storage = storage::STR, string = "b" }, loc(1, 5)), - (ltok::LITERAL, value { storage = storage::STR, string = "\a" }, loc(1, 9)), - (ltok::LITERAL, value { storage = storage::STR, string = "\b" }, loc(1, 14)), - (ltok::LITERAL, value { storage = storage::STR, string = "\f" }, loc(1, 19)), - (ltok::LITERAL, value { storage = storage::STR, string = "\n" }, loc(1, 24)), - (ltok::LITERAL, value { storage = storage::STR, string = "\r" }, loc(1, 29)), - (ltok::LITERAL, value { storage = storage::STR, string = "\t" }, loc(1, 34)), - (ltok::LITERAL, value { storage = storage::STR, string = "\v" }, loc(1, 39)), - (ltok::LITERAL, value { storage = storage::STR, string = "\0" }, loc(1, 44)), - (ltok::LITERAL, value { storage = storage::STR, string = "\\" }, loc(1, 49)), - (ltok::LITERAL, value { storage = storage::STR, string = "\'" }, loc(1, 54)), + (ltok::LIT_STR, "a", loc(1, 1)), + (ltok::LIT_STR, "b", loc(1, 5)), + (ltok::LIT_STR, "\a", loc(1, 9)), + (ltok::LIT_STR, "\b", loc(1, 14)), + (ltok::LIT_STR, "\f", loc(1, 19)), + (ltok::LIT_STR, "\n", loc(1, 24)), + (ltok::LIT_STR, "\r", loc(1, 29)), + (ltok::LIT_STR, "\t", loc(1, 34)), + (ltok::LIT_STR, "\v", loc(1, 39)), + (ltok::LIT_STR, "\0", loc(1, 44)), + (ltok::LIT_STR, "\\", loc(1, 49)), + (ltok::LIT_STR, "\'", loc(1, 54)), ]; // TODO: test \x and \u and \U lextest(in, expected); const in = "\"ab\\a\\b\\f\\n\\r\\t\\v\\0\\\\\\'\""; const expected: [_]token = [ - (ltok::LITERAL, value { storage = storage::STR, string = "ab\a\b\f\n\r\t\v\0\\\'" }, loc(1, 1)), + (ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)), ]; lextest(in, expected); const in = "\"hello world\" \"こんにちは\" \"return\" \"foo\""; const expected: [_]token = [ - (ltok::LITERAL, value { storage = storage::STR, string = "hell }o world" }, loc(1, 1)), - (ltok::LITERAL, value { storage = storage::STR, string = "こんにちは" }, loc(1, 15)), - (ltok::LITERAL, value { storage = storage::STR, string = "return" }, loc(1, 23)), - (ltok::LITERAL, value { storage = storage::STR, string = "foo" }, loc(1, 32)), + (ltok::LIT_STR, "hello world", loc(1, 1)), + (ltok::LIT_STR, "こんにちは", loc(1, 15)), + (ltok::LIT_STR, "return", loc(1, 23)), + (ltok::LIT_STR, "foo", loc(1, 32)), ]; lextest(in, expected); }; diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -168,10 +168,7 @@ fn lex_string(lex: *lexer, loc: location) (token | error) = { append(chars, utf8::encoderune(r)...); }, }; - return (ltok::LITERAL, value { - storage = storage::STR, - string = strings::fromutf8(chars), - }, loc); + return (ltok::LIT_STR, strings::fromutf8(chars), loc); }; fn lex_rn_str(lex: *lexer, loc: location) (token | error) = { @@ -186,10 +183,7 @@ fn lex_rn_str(lex: *lexer, loc: location) (token | error) = { }; // Rune literal - let ret: token = (ltok::LITERAL, value { - storage = storage::RUNE, - _rune = lex_rune(lex, loc)?, - }, loc); + let ret: token = (ltok::LIT_RUNE, lex_rune(lex, loc)?, loc); match (next(lex)?) { io::EOF => return syntaxerr(loc, "unexpected EOF"), diff --git a/hare/lex/token.ha b/hare/lex/token.ha @@ -119,8 +119,26 @@ export type ltok = enum { TIMESEQ, LAST_BTOK = TIMESEQ, + LIT_U8, + LIT_U16, + LIT_U32, + LIT_U64, + LIT_UINT, + LIT_UINTPTR, + LIT_I8, + LIT_I16, + LIT_I32, + LIT_I64, + LIT_INT, + LIT_ICONST, + LIT_F32, + LIT_F64, + LIT_FCONST, + LIT_RUNE, + LIT_STR, + LAST_LITERAL = STR, + NAME, - LITERAL, LABEL, EOF, }; @@ -238,38 +256,8 @@ const bmap: [_]str = [ "*=", ]; -// The storage of a [value], such as U32 for '1337u32' -export type storage = enum { - U8, - U16, - U32, - U64, - UINT, - UINTPTR, - I8, - I16, - I32, - I64, - INT, - ICONST, - F32, - F64, - FCONST, - RUNE, - STR, -}; - // A token value, used for tokens such as '1337u32' -export type value = struct { - storage: storage, - union { - string: str, - _rune: rune, - _int: i64, - _uint: u64, - float: f64, - }, -}; +export type value = (str | rune | i64 | u64 | f64 | void); // A location within a source file. // The path is borrowed from the file name given to the lexer. @@ -280,36 +268,33 @@ export type location = struct { }; // A single lexical token. -export type token = (ltok, (value | str | void), location); +export type token = (ltok, value, location); // Converts a token to its string representation export fn tokstr(tok: token) const str = { if (tok.0 <= ltok::LAST_BTOK) { return bmap[tok.0: int]; }; - let val = tok.1 as value; return switch (tok.0) { - ltok::NAME => val.string, + ltok::LIT_U8 => "u8", + ltok::LIT_U16 => "u16", + ltok::LIT_U32 => "u32", + ltok::LIT_U64 => "u64", + ltok::LIT_UINT => "uint", + ltok::LIT_UINTPTR => "uintptr", + ltok::LIT_I8 => "i8", + ltok::LIT_I16 => "i16", + ltok::LIT_I32 => "i32", + ltok::LIT_I64 => "i64", + ltok::LIT_INT => "int", + ltok::LIT_ICONST => "iconst", + ltok::LIT_F32 => "f32", + ltok::LIT_F64 => "f64", + ltok::LIT_FCONST => "fconst", + ltok::LIT_RUNE => "rune", + ltok::LIT_STR => "str", + ltok::NAME => tok.1 as str, ltok::LABEL => abort(), // TODO - ltok::LITERAL => switch (val.storage) { - storage::U8 => "u8", - storage::U16 => "u16", - storage::U32 => "u32", - storage::U64 => "u64", - storage::UINT => "uint", - storage::UINTPTR => "uintptr", - storage::I8 => "i8", - storage::I16 => "i16", - storage::I32 => "i32", - storage::I64 => "i64", - storage::INT => "int", - storage::ICONST => "iconst", - storage::F32 => "f32", - storage::F64 => "f64", - storage::FCONST => "fconst", - storage::RUNE => "rune", - storage::STR => "str", - }, ltok::EOF => "EOF", * => abort(), }; diff --git a/hare/parse/decl.ha b/hare/parse/decl.ha @@ -7,16 +7,8 @@ use strings; fn attr_symbol(lexer: *lex::lexer) (str | error) = { want(lexer, ltok::LPAREN)?; - let t = want(lexer, ltok::LITERAL)?; - let s = match (t.1) { - v: lex::value => if (v.storage == lex::storage::STR) v.string - else return syntaxerr(t.2, - "Unexpected {}, was expecting string", - lex::tokstr(t)), - * => return syntaxerr(t.2, - "Unexpected {}, was expecting string", - lex::tokstr(t)), - }; + let t = want(lexer, ltok::LIT_STR)?; + let s = t.1 as str; let d = strings::iter(s); match (strings::next(&d)) { void => void, diff --git a/hare/parse/expr.ha b/hare/parse/expr.ha @@ -79,6 +79,9 @@ fn objsel(lexer: *lex::lexer) (ast::expr | error) = { fn plain_expression(lexer: *lex::lexer) (ast::expr | error) = { let tok = peek(lexer)? as lex::token; + if (tok.0 > ltok::LAST_BTOK && tok.0 >= ltok::LAST_LITERAL) { + return constant(lexer); + }; return switch (tok.0) { ltok::TRUE, ltok::FALSE, @@ -95,7 +98,6 @@ fn plain_expression(lexer: *lex::lexer) (ast::expr | error) = { * => abort(), }; }, - ltok::LITERAL => return constant(lexer), ltok::NAME => { let id = ident(lexer)?; return match (try(lexer, ltok::LBRACE)?) {