commit 5c58caef2eed8e41897e06ce6e264ebffb95ca13
parent 090dd37eb360d5168131bf47fb078debd9fc94e2
Author: Bor Grošelj Simić <bor.groseljsimic@telemach.net>
Date: Sat, 20 Nov 2021 02:48:42 +0100
error out on codepoints not known to Hare grammar
Unknown codepoints were lexed as T_ERROR. T_ERROR is special-cased in
unlex(), which caused such errors to be ignored.
Signed-off-by: Bor Grošelj Simić <bor.groseljsimic@telemach.net>
Diffstat:
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/lex.c b/src/lex.c
@@ -897,6 +897,7 @@ _lex(struct lexer *lexer, struct token *out)
return lex_literal(lexer, out);
}
+ char p[5];
switch (c) {
case '"':
case '\'':
@@ -949,8 +950,10 @@ _lex(struct lexer *lexer, struct token *out)
out->token = T_QUESTION;
break;
default:
- out->token = T_ERROR;
- break;
+ p[utf8_encode(p, c)] = '\0';
+ fprintf(stderr, "Error: unexpected code point '%s' at %s:%d:%d\n",
+ p, lexer->loc.path, lexer->loc.lineno, lexer->loc.colno);
+ exit(EXIT_FAILURE);
}
return out->token;