harec

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 18c6531ec88a9ef589df0293231926453ee4c364
parent d51fce8616d362c73c93fb2d29d1b3a9d9e22603
Author: Drew DeVault <sir@cmpwn.com>
Date:   Thu,  8 Oct 2020 16:20:06 -0400

lex: handle numeric literals

Diffstat:
Minclude/lex.h | 1+
Msrc/lex.c | 95++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Msrc/main.c | 5++++-
3 files changed, 94 insertions(+), 7 deletions(-)

diff --git a/include/lex.h b/include/lex.h @@ -113,6 +113,7 @@ struct token { enum lexical_token token; union { char *name; + char *literal; }; }; diff --git a/src/lex.c b/src/lex.c @@ -182,10 +182,7 @@ static int lex_name(struct lexer *lexer, struct token *out) { int c = next(lexer, true); - if (c == EOF || (!isalpha(c) && c != '_')) { - out->token = T_ERROR; - return EOF; - } + assert(c != EOF && (isalpha(c) || c == '_')); while ((c = next(lexer, true)) != EOF) { if (!isalnum(c) && c != '_') { push(lexer, c, true); @@ -211,6 +208,92 @@ lookup:; static int lex_literal(struct lexer *lexer, struct token *out) { + int c = next(lexer, true); + assert(c != EOF && isdigit(c)); + + const char *base = "0123456789"; + switch ((c = next(lexer, true))) { + case 'b': + base = "01"; + break; + case 'o': + base = "01234567"; + break; + case 'x': + base = "0123456789ABCDEFabcdef"; + break; + default: + push(lexer, c, true); + break; + } + + char *suff = NULL; + bool isfloat = false, isexp = false, issuff = false; + while ((c = next(lexer, true)) != EOF) { + if (!strchr(base, c)) { + switch (c) { + case '.': + if (isfloat || issuff) { + push(lexer, c, true); + goto finalize; + } + isfloat = true; + break; + case 'e': + if (isexp || issuff) { + push(lexer, c, true); + goto finalize; + } + isexp = true; + isfloat = false; + break; + case 'i': + case 'u': + case 'f': + if (issuff) { + push(lexer, c, true); + goto finalize; + } + suff = lexer->buf; + issuff = true; + break; + default: + push(lexer, c, true); + goto finalize; + } + } + } + +finalize: + if (suff) { + const char *valid[] = { + "u8", "u16", "u32", "u64", + "i8", "i16", "i32", "i64", + "f32", "f64", "u", "i", "z", + }; + bool isvalid = false; + for (size_t i = 0; i < sizeof(valid) / sizeof(valid[0]); ++i) { + if (strcmp(suff, valid[i]) == 0) { + isvalid = true; + break; + } + } + if (!isvalid) { + out->token = T_ERROR; + consume(lexer, -1); + return c; + } + } + + out->token = T_LITERAL; + out->name = strdup(lexer->buf); + consume(lexer, -1); + return c; +} + +static int +lex_string(struct lexer *lexer, struct token *out) +{ assert(0); // TODO } @@ -445,7 +528,7 @@ lex(struct lexer *lexer, struct token *out) return c; } - if (isalpha(c)) { + if (isalpha(c) || c == '_') { push(lexer, c, false); return lex_name(lexer, out); } @@ -459,7 +542,7 @@ lex(struct lexer *lexer, struct token *out) case '"': case '\'': push(lexer, c, false); - return lex_literal(lexer, out); + return lex_string(lexer, out); case '.': // . .. ... case '<': // < << <= <<= case '>': // > >> >= >>= diff --git a/src/main.c b/src/main.c @@ -9,11 +9,15 @@ main(int argc, char *argv[]) struct token tok; while (tok.token != T_EOF) { + token_finish(&tok); lex(&lexer, &tok); switch (tok.token) { case T_NAME: fprintf(stderr, "'%s'\n", tok.name); break; + case T_LITERAL: + fprintf(stderr, "(%s)\n", tok.literal); + break; case T_ERROR: fprintf(stderr, "ERROR\n"); break; @@ -24,7 +28,6 @@ main(int argc, char *argv[]) fprintf(stderr, "%s\n", token_str(&tok)); break; } - token_finish(&tok); }; lex_finish(&lexer);