harec

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 84825112a9ad95f67ea2a2076cce86574fccf90d
parent 67de7c3e8553f1dc23a7b9d47ca595844e5e7a34
Author: Eyal Sawady <ecs@d2evs.net>
Date:   Tue, 17 Nov 2020 10:58:23 -0500

lex: parse integer literals

Diffstat:
Mconfigure | 1+
Minclude/lex.h | 9+++++++--
Ainclude/types.h | 23+++++++++++++++++++++++
Msrc/lex.c | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/parse.c | 24+++++++++++++++++++++++-
Asrc/types.c | 39+++++++++++++++++++++++++++++++++++++++
6 files changed, 167 insertions(+), 18 deletions(-)

diff --git a/configure b/configure @@ -7,6 +7,7 @@ harec() { src/lex.c \ src/main.c \ src/parse.c \ + src/types.c \ src/utf8.c } diff --git a/include/lex.h b/include/lex.h @@ -1,6 +1,7 @@ #ifndef HAREC_LEX_H #define HAREC_LEX_H #include <stdint.h> +#include "types.h" // Keep sorted enum lexical_token { @@ -103,8 +104,8 @@ enum lexical_token { T_LAST_OPERATOR = T_XOREQ, // Tokens with additional information - T_NAME, T_LITERAL, + T_NAME, T_RUNE, T_STRING, @@ -116,8 +117,12 @@ enum lexical_token { struct token { enum lexical_token token; union { + struct { + uintmax_t u; + intmax_t s; + enum type_storage storage; + } literal; char *name; - char *literal; uint32_t rune; struct { size_t len; diff --git a/include/types.h b/include/types.h @@ -0,0 +1,23 @@ +#ifndef HARE_TYPES_H +#define HARE_TYPES_H + +enum type_storage { + /* Scalar types */ + TYPE_STORAGE_U8, + TYPE_STORAGE_U16, + TYPE_STORAGE_U32, + TYPE_STORAGE_U64, + TYPE_STORAGE_I8, + TYPE_STORAGE_I16, + TYPE_STORAGE_I32, + TYPE_STORAGE_I64, + TYPE_STORAGE_INT, + TYPE_STORAGE_UINT, + TYPE_STORAGE_UINTPTR, + TYPE_STORAGE_SIZE, + TYPE_STORAGE_F32, + TYPE_STORAGE_F64, +}; + +const char *type_storage_unparse(enum type_storage storage); +#endif diff --git a/src/lex.c b/src/lex.c @@ -1,5 +1,7 @@ #include <assert.h> #include <ctype.h> +#include <errno.h> +#include <inttypes.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -236,34 +238,34 @@ lex_literal(struct lexer *lexer, struct token *out) } char *suff = NULL; - bool isfloat = false, isexp = false, issuff = false; + char *exp = NULL; + bool isfloat = false; while ((c = next(lexer, true)) != UTF8_INVALID) { if (!strchr(base, c)) { switch (c) { case '.': - if (isfloat || issuff) { + if (isfloat || suff) { push(lexer, c, true); goto finalize; } isfloat = true; break; case 'e': - if (isexp || issuff) { + if (exp || suff) { push(lexer, c, true); goto finalize; } - isexp = true; - isfloat = false; + exp = &lexer->buf[lexer->buflen]; break; case 'i': case 'u': case 'f': - if (issuff) { + case 'z': + if (suff) { push(lexer, c, true); goto finalize; } suff = &lexer->buf[lexer->buflen - 1]; - issuff = true; break; default: push(lexer, c, true); @@ -273,16 +275,32 @@ lex_literal(struct lexer *lexer, struct token *out) } finalize: + out->token = T_LITERAL; + out->literal.storage = TYPE_STORAGE_INT; if (suff) { - const char *valid[] = { - "u8", "u16", "u32", "u64", - "i8", "i16", "i32", "i64", - "f32", "f64", "u", "i", "z", + const char *suffs[] = { + [TYPE_STORAGE_U8] = "u8", + [TYPE_STORAGE_U16] = "u16", + [TYPE_STORAGE_U32] = "u32", + [TYPE_STORAGE_U64] = "u64", + [TYPE_STORAGE_I8] = "i8", + [TYPE_STORAGE_I16] = "i16", + [TYPE_STORAGE_I32] = "i32", + [TYPE_STORAGE_I64] = "i64", + + [TYPE_STORAGE_UINT] = "u", + [TYPE_STORAGE_INT] = "i", + [TYPE_STORAGE_SIZE] = "z", + [TYPE_STORAGE_F32] = "f32", + [TYPE_STORAGE_F64] = "f64", }; bool isvalid = false; - for (size_t i = 0; i < sizeof(valid) / sizeof(valid[0]); ++i) { - if (strcmp(suff, valid[i]) == 0) { + for (enum type_storage i = 0; + i < sizeof(suffs) / sizeof(suffs[0]); ++i) { + if (suffs[i] && strcmp(suff, suffs[i]) == 0) { isvalid = true; + out->literal.storage = i; + isfloat = true; break; } } @@ -293,8 +311,49 @@ finalize: } } - out->token = T_LITERAL; - out->name = strdup(lexer->buf); + uintmax_t exponent = 0; + if (exp) { + char *endptr = NULL; + exponent = strtoumax(exp, &endptr, 10); + if (endptr == exp) { + out->token = T_ERROR; + consume(lexer, -1); + return c; + } + } + + errno = 0; + switch (out->literal.storage) { + case TYPE_STORAGE_U8: + case TYPE_STORAGE_U16: + case TYPE_STORAGE_U32: + case TYPE_STORAGE_UINT: + case TYPE_STORAGE_U64: + case TYPE_STORAGE_SIZE: + out->literal.u = strtoumax(lexer->buf, NULL, strlen(base)); + for (uintmax_t i = 0; i < exponent; i++) { + out->literal.u *= 10; + } + break; + case TYPE_STORAGE_I8: + case TYPE_STORAGE_I16: + case TYPE_STORAGE_I32: + case TYPE_STORAGE_INT: + case TYPE_STORAGE_I64: + out->literal.s = strtoimax(lexer->buf, NULL, strlen(base)); + for (uintmax_t i = 0; i < exponent; i++) { + out->literal.s *= 10; + } + break; + case TYPE_STORAGE_F32: + case TYPE_STORAGE_F64: + assert(0); // TODO + default: + assert(0); + } + if (errno == ERANGE) { + out->token = T_ERROR; + } consume(lexer, -1); return c; } diff --git a/src/parse.c b/src/parse.c @@ -1,9 +1,11 @@ +#include <assert.h> #include <stdio.h> #include "ast.h" #include "identifier.h" #include "lex.h" #include "parse.h" #include "utf8.h" +#include "types.h" void parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit) @@ -18,7 +20,27 @@ parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit) fprintf(stderr, "'%s'\n", tok.name); break; case T_LITERAL: - fprintf(stderr, "(%s)\n", tok.literal); + switch (tok.literal.storage) { + case TYPE_STORAGE_U8: + case TYPE_STORAGE_U16: + case TYPE_STORAGE_U32: + case TYPE_STORAGE_UINT: + case TYPE_STORAGE_U64: + case TYPE_STORAGE_SIZE: + fprintf(stderr, "(%ju: %s)\n", tok.literal.u, + type_storage_unparse(tok.literal.storage)); + break; + case TYPE_STORAGE_I8: + case TYPE_STORAGE_I16: + case TYPE_STORAGE_I32: + case TYPE_STORAGE_INT: + case TYPE_STORAGE_I64: + fprintf(stderr, "(%jd: %s)\n", tok.literal.s, + type_storage_unparse(tok.literal.storage)); + break; + default: + assert(0); + } break; case T_RUNE: putc('\'', stderr); diff --git a/src/types.c b/src/types.c @@ -0,0 +1,39 @@ +#include <assert.h> +#include "types.h" + +const char * +type_storage_unparse(enum type_storage storage) +{ + switch (storage) { + case TYPE_STORAGE_U8: + return "u8"; + case TYPE_STORAGE_U16: + return "u16"; + case TYPE_STORAGE_U32: + return "u32"; + case TYPE_STORAGE_U64: + return "u64"; + case TYPE_STORAGE_I8: + return "i8"; + case TYPE_STORAGE_I16: + return "i16"; + case TYPE_STORAGE_I32: + return "i32"; + case TYPE_STORAGE_I64: + return "i64"; + case TYPE_STORAGE_INT: + return "int"; + case TYPE_STORAGE_UINT: + return "uint"; + case TYPE_STORAGE_UINTPTR: + return "uintptr"; + case TYPE_STORAGE_SIZE: + return "size"; + case TYPE_STORAGE_F32: + return "f32"; + case TYPE_STORAGE_F64: + return "f64"; + default: + assert(0); + } +}