hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit f1da658ed2e996c5b9d7c5521268518f524e3ec7
parent bf6029d3dc85a27873063a105c19265b599908ce
Author: Eyal Sawady <ecs@d2evs.net>
Date:   Sun, 11 Apr 2021 12:18:36 -0400

Revert "Refactor hare::lex::literal to be a tagged union"

This reverts commit 2ecee055f6865c8a1e41e3cc050ae7d9626d5ae0.

The tagged union approach didn't work when the size of
int/uint/uintptr/size differed between the host and the target systems.

Diffstat:
Mhare/lex/+test.ha | 114+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Mhare/lex/lex.ha | 10++++++++--
Mhare/lex/token.ha | 78++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mhare/parse/decl.ha | 5++++-
4 files changed, 126 insertions(+), 81 deletions(-)

diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -31,25 +31,29 @@ use strings; assert(t.2.line == 1234 && t.2.col == 1234); }; -fn vassert(expected: value, actual: value) void = match (expected) { - e: u8 => assert(actual as u8 == e), - e: u16 => assert(actual as u16 == e), - e: u32 => assert(actual as u32 == e), - e: u64 => assert(actual as u64 == e), - e: uint => assert(actual as uint == e), - e: uintptr => assert(actual as uintptr == e), - e: i8 => assert(actual as i8 == e), - e: i16 => assert(actual as i16 == e), - e: i32 => assert(actual as i32 == e), - e: i64 => assert(actual as i64 == e), - e: int => assert(actual as int == e), - e: iconst => assert(actual as iconst == e), - e: f32 => assert(actual as f32 == e), - e: f64 => assert(actual as f64 == e), - e: fconst => assert(actual as fconst == e), - e: rune => assert(actual as rune == e), - e: str => assert(actual as str == e), - e: void => assert(actual is void), +fn vassert(expected: value, actual: value) bool = { + if (expected.storage != actual.storage) { + return false; + }; + return switch (expected.storage) { + storage::U8, + storage::U16, + storage::U32, + storage::U64, + storage::UINT, + storage::UINTPTR => expected._uint == actual._uint, + storage::I8, + storage::I16, + storage::I32, + storage::I64, + storage::INT, + storage::ICONST => expected._int == actual._int, + storage::F32, + storage::F64, + storage::FCONST => expected.float == actual.float, + storage::RUNE => expected._rune == actual._rune, + storage::STR => expected.string == actual.string, + }; }; fn lextest(in: str, expected: []token) void = { @@ -65,7 +69,13 @@ fn lextest(in: str, expected: []token) void = { }, }; assert(tl.0 == etok.0); - vassert(tl.1, etok.1); + if (etok.1 is void) { + assert(tl.1 is void); + } else if (etok.1 is str) { + assert(tl.1 as str == etok.1 as str); + } else { + vassert(tl.1 as value, etok.1 as value); + }; assert(tl.2.line == etok.2.line && tl.2.col == etok.2.col && tl.2.path == etok.2.path); }; @@ -183,21 +193,21 @@ fn loc(line: uint, col: uint) location = location { const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' " "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'"; const expected: [_]token = [ - (ltok::LITERAL, 'a', loc(1, 1)), - (ltok::LITERAL, 'b', loc(1, 5)), - (ltok::LITERAL, '\a', loc(1, 9)), - (ltok::LITERAL, '\b', loc(1, 14)), - (ltok::LITERAL, '\f', loc(1, 19)), - (ltok::LITERAL, '\n', loc(1, 24)), - (ltok::LITERAL, '\r', loc(1, 29)), - (ltok::LITERAL, '\t', loc(1, 34)), - (ltok::LITERAL, '\v', loc(1, 39)), - (ltok::LITERAL, '\0', loc(1, 44)), - (ltok::LITERAL, '\\', loc(1, 49)), - (ltok::LITERAL, '\'', loc(1, 54)), - (ltok::LITERAL, '\x0A', loc(1, 59)), - (ltok::LITERAL, '\u1234', loc(1, 66)), - (ltok::LITERAL, '\U12345678', loc(1, 75)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = 'a' }, loc(1, 1)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = 'b' }, loc(1, 5)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\a' }, loc(1, 9)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\b' }, loc(1, 14)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\f' }, loc(1, 19)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\n' }, loc(1, 24)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\r' }, loc(1, 29)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\t' }, loc(1, 34)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\v' }, loc(1, 39)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\0' }, loc(1, 44)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\\' }, loc(1, 49)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\'' }, loc(1, 54)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\x0A' }, loc(1, 59)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\u1234' }, loc(1, 66)), + (ltok::LITERAL, value { storage = storage::RUNE, _rune = '\U12345678' }, loc(1, 75)), ]; lextest(in, expected); }; @@ -206,32 +216,32 @@ fn loc(line: uint, col: uint) location = location { const in = "\"a\" \"b\" \"\\a\" \"\\b\" \"\\f\" \"\\n\" \"\\r\" " "\"\\t\" \"\\v\" \"\\0\" \"\\\\\" \"\\\'\""; const expected: [_]token = [ - (ltok::LITERAL, "a", loc(1, 1)), - (ltok::LITERAL, "b", loc(1, 5)), - (ltok::LITERAL, "\a", loc(1, 9)), - (ltok::LITERAL, "\b", loc(1, 14)), - (ltok::LITERAL, "\f", loc(1, 19)), - (ltok::LITERAL, "\n", loc(1, 24)), - (ltok::LITERAL, "\r", loc(1, 29)), - (ltok::LITERAL, "\t", loc(1, 34)), - (ltok::LITERAL, "\v", loc(1, 39)), - (ltok::LITERAL, "\0", loc(1, 44)), - (ltok::LITERAL, "\\", loc(1, 49)), - (ltok::LITERAL, "\'", loc(1, 54)), + (ltok::LITERAL, value { storage = storage::STR, string = "a" }, loc(1, 1)), + (ltok::LITERAL, value { storage = storage::STR, string = "b" }, loc(1, 5)), + (ltok::LITERAL, value { storage = storage::STR, string = "\a" }, loc(1, 9)), + (ltok::LITERAL, value { storage = storage::STR, string = "\b" }, loc(1, 14)), + (ltok::LITERAL, value { storage = storage::STR, string = "\f" }, loc(1, 19)), + (ltok::LITERAL, value { storage = storage::STR, string = "\n" }, loc(1, 24)), + (ltok::LITERAL, value { storage = storage::STR, string = "\r" }, loc(1, 29)), + (ltok::LITERAL, value { storage = storage::STR, string = "\t" }, loc(1, 34)), + (ltok::LITERAL, value { storage = storage::STR, string = "\v" }, loc(1, 39)), + (ltok::LITERAL, value { storage = storage::STR, string = "\0" }, loc(1, 44)), + (ltok::LITERAL, value { storage = storage::STR, string = "\\" }, loc(1, 49)), + (ltok::LITERAL, value { storage = storage::STR, string = "\'" }, loc(1, 54)), ]; // TODO: test \x and \u and \U lextest(in, expected); const in = "\"ab\\a\\b\\f\\n\\r\\t\\v\\0\\\\\\'\""; const expected: [_]token = [ - (ltok::LITERAL, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)), + (ltok::LITERAL, value { storage = storage::STR, string = "ab\a\b\f\n\r\t\v\0\\\'" }, loc(1, 1)), ]; lextest(in, expected); const in = "\"hello world\" \"こんにちは\" \"return\" \"foo\""; const expected: [_]token = [ - (ltok::LITERAL, "hello world", loc(1, 1)), - (ltok::LITERAL, "こんにちは", loc(1, 15)), - (ltok::LITERAL, "return", loc(1, 23)), - (ltok::LITERAL, "foo", loc(1, 32)), + (ltok::LITERAL, value { storage = storage::STR, string = "hell }o world" }, loc(1, 1)), + (ltok::LITERAL, value { storage = storage::STR, string = "こんにちは" }, loc(1, 15)), + (ltok::LITERAL, value { storage = storage::STR, string = "return" }, loc(1, 23)), + (ltok::LITERAL, value { storage = storage::STR, string = "foo" }, loc(1, 32)), ]; lextest(in, expected); }; diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -168,7 +168,10 @@ fn lex_string(lex: *lexer, loc: location) (token | error) = { append(chars, utf8::encoderune(r)...); }, }; - return (ltok::LITERAL, strings::fromutf8(chars), loc); + return (ltok::LITERAL, value { + storage = storage::STR, + string = strings::fromutf8(chars), + }, loc); }; fn lex_rn_str(lex: *lexer, loc: location) (token | error) = { @@ -183,7 +186,10 @@ fn lex_rn_str(lex: *lexer, loc: location) (token | error) = { }; // Rune literal - let ret: token = (ltok::LITERAL, lex_rune(lex, loc)?, loc); + let ret: token = (ltok::LITERAL, value { + storage = storage::RUNE, + _rune = lex_rune(lex, loc)?, + }, loc); match (next(lex)?) { io::EOF => return syntaxerr(loc, "unexpected EOF"), diff --git a/hare/lex/token.ha b/hare/lex/token.ha @@ -238,13 +238,38 @@ const bmap: [_]str = [ "*=", ]; -export type iconst = i64; -export type fconst = f64; +// The storage of a [value], such as U32 for '1337u32' +export type storage = enum { + U8, + U16, + U32, + U64, + UINT, + UINTPTR, + I8, + I16, + I32, + I64, + INT, + ICONST, + F32, + F64, + FCONST, + RUNE, + STR, +}; -// A token for a literal value, such as '1337u32' -// TODO: Refactor this into a union { i64, u64, f64, str } + storage enum -export type value = (u8 | u16 | u32 | u64 | uint | uintptr | i8 | i16 | i32 | - i64 | int | iconst | f32 | f64 | fconst | rune | str | void); +// A token value, used for tokens such as '1337u32' +export type value = struct { + storage: storage, + union { + string: str, + _rune: rune, + _int: i64, + _uint: u64, + float: f64, + }, +}; // A location within a source file. // The path is borrowed from the file name given to the lexer. @@ -255,34 +280,35 @@ export type location = struct { }; // A single lexical token. -export type token = (ltok, value, location); +export type token = (ltok, (value | str | void), location); // Converts a token to its string representation export fn tokstr(tok: token) const str = { if (tok.0 <= ltok::LAST_BTOK) { return bmap[tok.0: int]; }; + let val = tok.1 as value; return switch (tok.0) { - ltok::NAME => tok.1 as str, + ltok::NAME => val.string, ltok::LABEL => abort(), // TODO - ltok::LITERAL => match (tok.1) { - u8 => "u8", - u16 => "u16", - u32 => "u32", - u64 => "u64", - uint => "uint", - uintptr => "uintptr", - i8 => "i8", - i16 => "i16", - i32 => "i32", - i64 => "i64", - int => "int", - iconst => "iconst", - f32 => "f32", - f64 => "f64", - fconst => "fconst", - rune => "rune", - str => "str", + ltok::LITERAL => switch (val.storage) { + storage::U8 => "u8", + storage::U16 => "u16", + storage::U32 => "u32", + storage::U64 => "u64", + storage::UINT => "uint", + storage::UINTPTR => "uintptr", + storage::I8 => "i8", + storage::I16 => "i16", + storage::I32 => "i32", + storage::I64 => "i64", + storage::INT => "int", + storage::ICONST => "iconst", + storage::F32 => "f32", + storage::F64 => "f64", + storage::FCONST => "fconst", + storage::RUNE => "rune", + storage::STR => "str", }, ltok::EOF => "EOF", * => abort(), diff --git a/hare/parse/decl.ha b/hare/parse/decl.ha @@ -9,7 +9,10 @@ fn attr_symbol(lexer: *lex::lexer) (str | error) = { want_tok(lexer, ltok::LPAREN)?; let t = want_tok(lexer, ltok::LITERAL)?; let s = match (t.1) { - s: str => s, + v: lex::value => if (v.storage == lex::storage::STR) v.string + else return syntaxerr(t.2, + "Unexpected {}, was expecting string", + lex::tokstr(t)), * => return syntaxerr(t.2, "Unexpected {}, was expecting string", lex::tokstr(t)),