harec

[hare] Hare compiler, written in C11 for POSIX OSs
Log | Files | Refs | README | LICENSE

commit f9eff2c2abe9d87037320f7310e16f1d902a472e
parent 641f60a61b7fd59d640fa2c35fbf3b4dbd1b8e12
Author: Sebastian <sebastian@sebsite.pw>
Date:   Sun, 12 Jun 2022 19:30:55 -0400

Parse labels in parse instead of lex

As per the change to the spec which allows whitespace characters between
the colon and name of a label, labels are now handled during parse
instead of lex. The lexer only sees a colon and a name, and the label is
constructed by the parser from this.

Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Minclude/lex.h | 2--
Msrc/lex.c | 26--------------------------
Msrc/main.c | 7-------
Msrc/parse.c | 15+++++++++------
4 files changed, 9 insertions(+), 41 deletions(-)

diff --git a/include/lex.h b/include/lex.h @@ -129,7 +129,6 @@ enum lexical_token { T_LAST_OPERATOR = T_BXOREQ, // Tokens with additional information - T_LABEL, T_LITERAL, T_NAME, @@ -167,7 +166,6 @@ struct lexer { uint32_t c[2]; struct token un; struct location loc; - bool disable_labels; bool require_int; }; diff --git a/src/lex.c b/src/lex.c @@ -787,22 +787,6 @@ lex3(struct lexer *lexer, struct token *out, uint32_t c) return out->token; } -static enum lexical_token -lex_label(struct lexer *lexer, struct token *out) -{ - uint32_t c; - while ((c = next(lexer, NULL, true)) != UTF8_INVALID) { - if (c > 0x7F || (!isalnum(c) && c != '_')) { - push(lexer, c, true); - break; - } - } - out->token = T_LABEL; - out->name = strdup(lexer->buf); - consume(lexer, -1); - return out->token; -} - static enum lexical_token _lex(struct lexer *lexer, struct token *out); static enum lexical_token @@ -876,10 +860,6 @@ lex2(struct lexer *lexer, struct token *out, uint32_t c) break; default: push(lexer, c, false); - if (!lexer->disable_labels && c <= 0x7F - && (isalpha(c) || c == '_')) { - return lex_label(lexer, out); - } out->token = T_COLON; break; } @@ -1018,7 +998,6 @@ token_finish(struct token *tok) { switch (tok->token) { case T_NAME: - case T_LABEL: free(tok->name); break; case T_LITERAL: @@ -1046,8 +1025,6 @@ lexical_token_str(enum lexical_token tok) switch (tok) { case T_NAME: return "name"; - case T_LABEL: - return "label"; case T_LITERAL: return "literal"; case T_EOF: @@ -1142,9 +1119,6 @@ token_str(const struct token *tok) case T_NAME: snprintf(buf, sizeof(buf), "name %s", tok->name); return buf; - case T_LABEL: - snprintf(buf, sizeof(buf), ":%s", tok->name); - return buf; case T_LITERAL: switch (tok->storage) { case STORAGE_U8: diff --git a/src/main.c b/src/main.c @@ -64,13 +64,6 @@ parse_define(const char *argv_0, const char *in) sources = &d; lex_init(&lexer, f, 0); - // The syntax for this parameter is: - // - // -D ident:type=value - // - // :type is lexed as a label unless we disable it here. - lexer.disable_labels = true; - parse_identifier(&lexer, &def->ident, false); if (lex(&lexer, &tok) != T_COLON) { lex_finish(&lexer); diff --git a/src/parse.c b/src/parse.c @@ -2164,7 +2164,8 @@ parse_control_expression(struct lexer *lexer) exp->type = tok.token == T_BREAK ? EXPR_BREAK : EXPR_CONTINUE; exp->control.label = NULL; switch (lex(lexer, &tok)) { - case T_LABEL: + case T_COLON: + want(lexer, T_NAME, &tok); exp->control.label = tok.name; break; default: @@ -2193,7 +2194,8 @@ parse_control_expression(struct lexer *lexer) case T_SEMICOLON: unlex(lexer, &tok); break; - case T_LABEL: + case T_COLON: + want(lexer, T_NAME, &tok); exp->control.label = tok.name; switch (lex(lexer, &tok)) { case T_COMMA: @@ -2228,14 +2230,15 @@ parse_compound_expression(struct lexer *lexer) struct token tok = {0}; switch (lex(lexer, &tok)) { - case T_LABEL: + case T_COLON: + want(lexer, T_NAME, &tok); exp->compound.label = tok.name; want(lexer, T_LBRACE, &tok); break; case T_LBRACE: break; // no-op default: - synerr(&tok, T_LBRACE, T_LABEL, T_EOF); + synerr(&tok, T_LBRACE, T_COLON, T_EOF); break; }; @@ -2291,11 +2294,11 @@ parse_expression(struct lexer *lexer) case T_YIELD: case T_DEFER: case T_FOR: - case T_LABEL: case T_IF: case T_LBRACE: case T_MATCH: case T_SWITCH: + case T_COLON: switch (tok.token) { case T_BREAK: case T_CONTINUE: @@ -2315,7 +2318,7 @@ parse_expression(struct lexer *lexer) value = parse_if_expression(lexer); break; case T_LBRACE: - case T_LABEL: + case T_COLON: unlex(lexer, &tok); value = parse_compound_expression(lexer); value = parse_cast_expression(lexer, value);