commit f9eff2c2abe9d87037320f7310e16f1d902a472e
parent 641f60a61b7fd59d640fa2c35fbf3b4dbd1b8e12
Author: Sebastian <sebastian@sebsite.pw>
Date: Sun, 12 Jun 2022 19:30:55 -0400
Parse labels in parse instead of lex
As per the change to the spec which allows whitespace characters between
the colon and name of a label, labels are now handled during parse
instead of lex. The lexer only sees a colon and a name, and the label is
constructed by the parser from this.
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
4 files changed, 9 insertions(+), 41 deletions(-)
diff --git a/include/lex.h b/include/lex.h
@@ -129,7 +129,6 @@ enum lexical_token {
T_LAST_OPERATOR = T_BXOREQ,
// Tokens with additional information
- T_LABEL,
T_LITERAL,
T_NAME,
@@ -167,7 +166,6 @@ struct lexer {
uint32_t c[2];
struct token un;
struct location loc;
- bool disable_labels;
bool require_int;
};
diff --git a/src/lex.c b/src/lex.c
@@ -787,22 +787,6 @@ lex3(struct lexer *lexer, struct token *out, uint32_t c)
return out->token;
}
-static enum lexical_token
-lex_label(struct lexer *lexer, struct token *out)
-{
- uint32_t c;
- while ((c = next(lexer, NULL, true)) != UTF8_INVALID) {
- if (c > 0x7F || (!isalnum(c) && c != '_')) {
- push(lexer, c, true);
- break;
- }
- }
- out->token = T_LABEL;
- out->name = strdup(lexer->buf);
- consume(lexer, -1);
- return out->token;
-}
-
static enum lexical_token _lex(struct lexer *lexer, struct token *out);
static enum lexical_token
@@ -876,10 +860,6 @@ lex2(struct lexer *lexer, struct token *out, uint32_t c)
break;
default:
push(lexer, c, false);
- if (!lexer->disable_labels && c <= 0x7F
- && (isalpha(c) || c == '_')) {
- return lex_label(lexer, out);
- }
out->token = T_COLON;
break;
}
@@ -1018,7 +998,6 @@ token_finish(struct token *tok)
{
switch (tok->token) {
case T_NAME:
- case T_LABEL:
free(tok->name);
break;
case T_LITERAL:
@@ -1046,8 +1025,6 @@ lexical_token_str(enum lexical_token tok)
switch (tok) {
case T_NAME:
return "name";
- case T_LABEL:
- return "label";
case T_LITERAL:
return "literal";
case T_EOF:
@@ -1142,9 +1119,6 @@ token_str(const struct token *tok)
case T_NAME:
snprintf(buf, sizeof(buf), "name %s", tok->name);
return buf;
- case T_LABEL:
- snprintf(buf, sizeof(buf), ":%s", tok->name);
- return buf;
case T_LITERAL:
switch (tok->storage) {
case STORAGE_U8:
diff --git a/src/main.c b/src/main.c
@@ -64,13 +64,6 @@ parse_define(const char *argv_0, const char *in)
sources = &d;
lex_init(&lexer, f, 0);
- // The syntax for this parameter is:
- //
- // -D ident:type=value
- //
- // :type is lexed as a label unless we disable it here.
- lexer.disable_labels = true;
-
parse_identifier(&lexer, &def->ident, false);
if (lex(&lexer, &tok) != T_COLON) {
lex_finish(&lexer);
diff --git a/src/parse.c b/src/parse.c
@@ -2164,7 +2164,8 @@ parse_control_expression(struct lexer *lexer)
exp->type = tok.token == T_BREAK ? EXPR_BREAK : EXPR_CONTINUE;
exp->control.label = NULL;
switch (lex(lexer, &tok)) {
- case T_LABEL:
+ case T_COLON:
+ want(lexer, T_NAME, &tok);
exp->control.label = tok.name;
break;
default:
@@ -2193,7 +2194,8 @@ parse_control_expression(struct lexer *lexer)
case T_SEMICOLON:
unlex(lexer, &tok);
break;
- case T_LABEL:
+ case T_COLON:
+ want(lexer, T_NAME, &tok);
exp->control.label = tok.name;
switch (lex(lexer, &tok)) {
case T_COMMA:
@@ -2228,14 +2230,15 @@ parse_compound_expression(struct lexer *lexer)
struct token tok = {0};
switch (lex(lexer, &tok)) {
- case T_LABEL:
+ case T_COLON:
+ want(lexer, T_NAME, &tok);
exp->compound.label = tok.name;
want(lexer, T_LBRACE, &tok);
break;
case T_LBRACE:
break; // no-op
default:
- synerr(&tok, T_LBRACE, T_LABEL, T_EOF);
+ synerr(&tok, T_LBRACE, T_COLON, T_EOF);
break;
};
@@ -2291,11 +2294,11 @@ parse_expression(struct lexer *lexer)
case T_YIELD:
case T_DEFER:
case T_FOR:
- case T_LABEL:
case T_IF:
case T_LBRACE:
case T_MATCH:
case T_SWITCH:
+ case T_COLON:
switch (tok.token) {
case T_BREAK:
case T_CONTINUE:
@@ -2315,7 +2318,7 @@ parse_expression(struct lexer *lexer)
value = parse_if_expression(lexer);
break;
case T_LBRACE:
- case T_LABEL:
+ case T_COLON:
unlex(lexer, &tok);
value = parse_compound_expression(lexer);
value = parse_cast_expression(lexer, value);