hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 9920adea37b346405e22dc43e306be2860871082
parent 3e328305587675321d88dceda1aa6f5bf3af4969
Author: Sebastian <sebastian@sebsite.pw>
Date:   Wed, 18 May 2022 23:37:44 -0400

Handle negation in parse instead of lex

During the lexing stage, all number literals are taken to be unsigned.
The minus token is lexed on its own, independently of the number.
The sign of numbers is now determined during the parsing stage. This
fixes numerous edge cases, such as "1-1" incorrectly lexing as 1, -1.
This also simplifies the implementation of other language features, such
as parsing the index of a tuple access expression.

hare::parse doesn't check that the integer fits into the specified type.
That is, 1000u8 will successfully parse. This could be handled in
hare::parse with significant refactoring, but it's easier to handle it
during check in hare::unit.

hare::unit doesn't yet check for this, but hare::ast::number_constant
has been updated to aid with this when the time comes to implement it. A
sign field has been added, which will always match the sign of the
value, except when i64 overflows. This is necessary because
-9223372036854775808i64 is valid, but 9223372036854775808i64 isn't. In
the latter case, the value will overflow to a negative, but the sign
will remain positive, so hare::unit can detect the mismatch and report
an error.

Fixes: https://todo.sr.ht/~sircmpwn/hare/376
Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mhare/ast/expr.ha | 6+++---
Mhare/lex/+test.ha | 20+++++++++++---------
Mhare/lex/lex.ha | 49++++++-------------------------------------------
Mhare/lex/token.ha | 2+-
Mhare/parse/expr.ha | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mhare/unit/expr.ha | 2+-
Mhare/unit/process.ha | 8+-------
Mhare/unparse/expr.ha | 2--
Mscripts/gen-stdlib | 7++++---
Mstdlib.mk | 8++++----
10 files changed, 92 insertions(+), 87 deletions(-)

diff --git a/hare/ast/expr.ha b/hare/ast/expr.ha @@ -222,12 +222,13 @@ export type tuple_constant = []*expr; export type _null = void; // A scalar value. -export type value = (bool | _null | ...lex::value); +export type value = (bool | _null | str | rune | void); // An integer or float constant. export type number_constant = struct { suff: lex::ltok, value: (i64 | u64 | f64), + sign: bool, // true if negative, false otherwise }; // A constant expression. @@ -496,7 +497,6 @@ case let e: expr => free(c.label); case let c: constant_expr => match (c) { - case (void | _null | ...lex::value) => void; case let a: array_constant => for (let i = 0z; i < len(a.values); i += 1) { expr_finish(a.values[i]); @@ -509,7 +509,7 @@ case let e: expr => expr_finish(t[i]); }; free(t); - case number_constant => void; + case (value | number_constant) => void; }; case let c: continue_expr => free(c); diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -50,8 +50,6 @@ fn vassert(expected: value, actual: value) void = { assert(actual as str == expected); case let expected: rune => assert(actual as rune == expected); - case let expected: i64 => - assert(actual as i64 == expected); case let expected: u64 => assert(actual as u64 == expected); case let expected: f64 => @@ -277,18 +275,22 @@ fn loc(line: uint, col: uint) location = location { "0b00000010000001100000011100001111000000100000011000000111i64\n" "13.37 13.37f32 13.37f64 6.022e23 1.616255e-35f64 1e-1"; const expected: [_]token = [ - (ltok::LIT_ICONST, 1e5i64, loc(1, 1)), - (ltok::LIT_I32, -1i64, loc(1, 5)), - (ltok::LIT_U64, 9223372036854775809u64, loc(1, 11)), + (ltok::LIT_ICONST, 1e5u64, loc(1, 1)), + (ltok::MINUS, void, loc(1, 5)), + (ltok::LIT_I32, 1u64, loc(1, 6)), + (ltok::LIT_ICONST, 9223372036854775809u64, loc(1, 11)), (ltok::LIT_SIZE, 1e2u64, loc(1, 31)), (ltok::LIT_U8, 255u64, loc(1, 36)), (ltok::LIT_U16, 0o42u64, loc(1, 42)), (ltok::LIT_U32, 0b1000101u64, loc(2, 1)), (ltok::LIT_U64, 0xDEADBEEFu64, loc(2, 14)), - (ltok::LIT_I8, -0b10i64, loc(2, 28)), - (ltok::LIT_I16, -5e0i64, loc(2, 36)), - (ltok::LIT_I32, -0o16i64, loc(2, 44)), - (ltok::LIT_I64, 0b00000010000001100000011100001111000000100000011000000111i64, loc(3, 1)), + (ltok::MINUS, void, loc(2, 28)), + (ltok::LIT_I8, 0b10u64, loc(2, 29)), + (ltok::MINUS, void, loc(2, 36)), + (ltok::LIT_I16, 5e0u64, loc(2, 37)), + (ltok::MINUS, void, loc(2, 44)), + (ltok::LIT_I32, 0o16u64, loc(2, 45)), + (ltok::LIT_I64, 0b00000010000001100000011100001111000000100000011000000111u64, loc(3, 1)), (ltok::LIT_FCONST, 13.37, loc(4, 1)), (ltok::LIT_F32, 13.37, loc(4, 7)), (ltok::LIT_F64, 13.37, loc(4, 16)), diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -14,7 +14,6 @@ use sort; use strconv; use strings; use strio; -use types; export type lexer = struct { in: io::handle, @@ -402,22 +401,13 @@ fn lex_literal(lex: *lexer) (token | error) = { case let r: (rune, location) => yield r; }; - if (r.0 == '-') { - append(chars, utf8::encoderune(r.0)...); - r = match (next(lex)?) { - case io::EOF => - return (ltok::EOF, void, loc); - case let r: (rune, location) => - yield r; - }; - }; let base = 10u; if (r.0 == '0') { append(chars, utf8::encoderune(r.0)...); r = match (next(lex)?) { case io::EOF => - return (ltok::LIT_ICONST, 0i64, loc); + return (ltok::LIT_ICONST, 0u64, loc); case let r: (rune, location) => yield r; }; @@ -579,27 +569,11 @@ fn lex_literal(lex: *lexer) (token | error) = { let val = strings::fromutf8(chars[..end]); let val = switch (suff) { - case ltok::LIT_U8, ltok::LIT_U16, ltok::LIT_U32, ltok::LIT_U64, - ltok::LIT_UINT, ltok::LIT_SIZE => - yield strconv::stou64b(val, base); - case ltok::LIT_ICONST => - yield match (strconv::stoi64b(val, base)) { - case let i: i64 => - yield i; - case strconv::invalid => - abort(); - case strconv::overflow => - yield if (chars[0] != '-') { - suff = ltok::LIT_U64; - yield strconv::stou64b(val, base); - } else strconv::overflow; - }; - case ltok::LIT_I8, ltok::LIT_I16, ltok::LIT_I32, ltok::LIT_I64, - ltok::LIT_INT => - yield strconv::stoi64b(val, base); case ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST => val = strings::fromutf8(chars[..floatend]); yield strconv::stof64(val); + case => + yield strconv::stou64b(val, base); }; let val = match (val) { case let val: u64 => @@ -607,11 +581,6 @@ fn lex_literal(lex: *lexer) (token | error) = { val *= 10; }; yield val; - case let val: i64 => - for (let i = 0z; i < exp; i += 1) { - val *= 10; - }; - yield val; case let val: f64 => yield val; case strconv::invalid => @@ -658,15 +627,9 @@ fn lex2(lexr: *lexer) (token | error) = { line_comment(lexr)?; return (ltok::MINUSEQ, void, first.1); case => - if (ascii::isdigit(r.0)) { - unget(lexr, r); - unget(lexr, first); - return lex_literal(lexr); - } else { - unget(lexr, r); - line_comment(lexr)?; - return (ltok::MINUS, void, first.1); - }; + unget(lexr, r); + line_comment(lexr)?; + return (ltok::MINUS, void, first.1); }; case io::EOF => return (ltok::MINUS, void, first.1); diff --git a/hare/lex/token.ha b/hare/lex/token.ha @@ -282,7 +282,7 @@ const bmap: [_]str = [ export type _null = void; // A token value, used for tokens such as '1337' (an integer). -export type value = (str | rune | i64 | u64 | f64 | void); +export type value = (str | rune | u64 | f64 | void); // A location within a source file. // The path is borrowed from the file name given to the lexer. diff --git a/hare/parse/expr.ha b/hare/parse/expr.ha @@ -7,6 +7,8 @@ use hare::ast; use hare::lex::{ltok}; use hare::lex; +use math; +use types; use strings; // Parses an expression. @@ -511,19 +513,34 @@ fn constant(lexer: *lex::lexer) (ast::expr | error) = { const tok = want(lexer)?; const expr: ast::constant_expr = switch (tok.0) { case ltok::LIT_RUNE, ltok::LIT_STR => - yield tok.1; + yield tok.1 as (rune | str); case ltok::LIT_U8, ltok::LIT_U16, ltok::LIT_U32, ltok::LIT_U64, - ltok::LIT_UINT, ltok::LIT_SIZE, ltok::LIT_I8, ltok::LIT_I16, - ltok::LIT_I32, ltok::LIT_I64, ltok::LIT_INT, ltok::LIT_ICONST, - ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST => - const value = match (tok.1) { - case let v: (i64 | u64 | f64) => - yield v; - case => abort(); + ltok::LIT_UINT, ltok::LIT_SIZE => + yield ast::number_constant { + suff = tok.0, + value = tok.1 as u64, + sign = false, }; + case ltok::LIT_I8, ltok::LIT_I16, ltok::LIT_I32, ltok::LIT_I64, + ltok::LIT_INT => + const n = tok.1 as u64; yield ast::number_constant { suff = tok.0, - value = value, + value = n: i64, + sign = false, + }; + case ltok::LIT_ICONST => + const n = tok.1 as u64; + yield ast::number_constant { + suff = tok.0, + value = if (n <= types::I64_MAX: u64) n: i64 else n, + sign = false, + }; + case ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST => + yield ast::number_constant { + suff = tok.0, + value = tok.1 as f64, + sign = false, }; case ltok::VOID => yield void; @@ -1209,14 +1226,44 @@ fn unarithm(lexer: *lex::lexer) (ast::expr | error) = { case => abort(); }; - let operand = unarithm(lexer)?; + const operand = unarithm(lexer)?; + const expr = :blk { + if (op == ast::unarithm_op::MINUS) match (operand.expr) { + case let c: ast::constant_expr => + match (c) { + case let n: ast::number_constant => + // TODO: tuple unpacking + let sign = false; + const val = match (n.value) { + case let i: i64 => + sign = i < 0; + yield -i; + case let u: u64 => void; + case let f: f64 => + sign = math::signf64(f) < 0; + yield -f; + }; + + if (val is void) yield; + yield :blk, ast::number_constant { + suff = n.suff, + value = val as (i64 | f64), + sign = sign, + }: ast::constant_expr; + case => void; + }; + case => void; + }; + + yield ast::unarithm_expr { + op = op, + operand = alloc(operand), + }; + }; return ast::expr { start = tok.2, end = lex::prevloc(lexer), - expr = ast::unarithm_expr { - op = op, - operand = alloc(operand), - }, + expr = expr, }; }; diff --git a/hare/unit/expr.ha b/hare/unit/expr.ha @@ -52,7 +52,7 @@ export type bindings = []binding; export type compound = []*expr; // The value of a constant expression. -export type constant = ast::value; // TODO: composite types +export type constant = (...ast::value | i64 | u64 | f64); // TODO: composite types // A return expression, i.e. return <value> export type _return = nullable *expr; diff --git a/hare/unit/process.ha b/hare/unit/process.ha @@ -283,12 +283,6 @@ fn process_constant(ctx: *context, aexpr: *ast::expr) (*expr | error) = { yield ast::builtin_type::STR; case let r: rune => yield ast::builtin_type::RUNE; - case let i: i64 => - yield ast::builtin_type::INT; - case let u: u64 => - yield ast::builtin_type::UINT; - case let f: f64 => - yield ast::builtin_type::F64; case void => yield ast::builtin_type::VOID; }), @@ -372,7 +366,7 @@ fn process_constant(ctx: *context, aexpr: *ast::expr) (*expr | error) = { assert(expr.result.repr as types::builtin == types::builtin::NULL); assert(expr.expr is constant); - const cases: [_](str, types::builtin, ast::value) = [ + const cases: [_](str, types::builtin, constant) = [ ("1234", types::builtin::INT, 1234), ("1234u", types::builtin::UINT, 1234u), ("\"hello world\"", types::builtin::STR, "hello world"), diff --git a/hare/unparse/expr.ha b/hare/unparse/expr.ha @@ -483,8 +483,6 @@ fn constant( case void => abort(); case ast::_null => yield "null"; - case let v: (i64 | u64 | f64) => - yield v; case let b: bool => return fmt::fprint(out, b); case let s: str => diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -729,7 +729,7 @@ hare_lex() { gensrcs_hare_lex \ +test.ha fi - gen_ssa hare::lex io bufio strings types fmt sort strio + gen_ssa hare::lex io bufio strings fmt sort strio } hare_module() { @@ -761,7 +761,8 @@ hare_parse() { if [ $testing -eq 0 ] then gensrcs_hare_parse - gen_ssa hare::parse ascii hare::ast hare::lex fmt strings + gen_ssa hare::parse ascii hare::ast hare::lex fmt types \ + strings math else gensrcs_hare_parse \ +test/expr.ha \ @@ -771,7 +772,7 @@ hare_parse() { +test/types.ha \ +test/unit.ha gen_ssa hare::parse ascii bufio hare::ast hare::lex \ - hare::unparse io strio fmt strings + hare::unparse io strio fmt strings math fi } diff --git a/stdlib.mk b/stdlib.mk @@ -1223,7 +1223,7 @@ stdlib_hare_lex_any_srcs = \ $(STDLIB)/hare/lex/token.ha \ $(STDLIB)/hare/lex/lex.ha -$(HARECACHE)/hare/lex/hare_lex-any.ssa: $(stdlib_hare_lex_any_srcs) $(stdlib_rt) $(stdlib_io_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) $(stdlib_fmt_$(PLATFORM)) $(stdlib_sort_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) +$(HARECACHE)/hare/lex/hare_lex-any.ssa: $(stdlib_hare_lex_any_srcs) $(stdlib_rt) $(stdlib_io_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_fmt_$(PLATFORM)) $(stdlib_sort_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/hare/lex @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nhare::lex \ @@ -1253,7 +1253,7 @@ stdlib_hare_parse_any_srcs = \ $(STDLIB)/hare/parse/type.ha \ $(STDLIB)/hare/parse/unit.ha -$(HARECACHE)/hare/parse/hare_parse-any.ssa: $(stdlib_hare_parse_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_hare_ast_$(PLATFORM)) $(stdlib_hare_lex_$(PLATFORM)) $(stdlib_fmt_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) +$(HARECACHE)/hare/parse/hare_parse-any.ssa: $(stdlib_hare_parse_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_hare_ast_$(PLATFORM)) $(stdlib_hare_lex_$(PLATFORM)) $(stdlib_fmt_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_math_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/hare/parse @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nhare::parse \ @@ -3352,7 +3352,7 @@ testlib_hare_lex_any_srcs = \ $(STDLIB)/hare/lex/lex.ha \ $(STDLIB)/hare/lex/+test.ha -$(TESTCACHE)/hare/lex/hare_lex-any.ssa: $(testlib_hare_lex_any_srcs) $(testlib_rt) $(testlib_io_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_types_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_sort_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) +$(TESTCACHE)/hare/lex/hare_lex-any.ssa: $(testlib_hare_lex_any_srcs) $(testlib_rt) $(testlib_io_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_sort_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/hare/lex @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nhare::lex \ @@ -3388,7 +3388,7 @@ testlib_hare_parse_any_srcs = \ $(STDLIB)/hare/parse/+test/types.ha \ $(STDLIB)/hare/parse/+test/unit.ha -$(TESTCACHE)/hare/parse/hare_parse-any.ssa: $(testlib_hare_parse_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_hare_ast_$(PLATFORM)) $(testlib_hare_lex_$(PLATFORM)) $(testlib_hare_unparse_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) +$(TESTCACHE)/hare/parse/hare_parse-any.ssa: $(testlib_hare_parse_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_hare_ast_$(PLATFORM)) $(testlib_hare_lex_$(PLATFORM)) $(testlib_hare_unparse_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_math_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/hare/parse @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nhare::parse \