commit 84825112a9ad95f67ea2a2076cce86574fccf90d
parent 67de7c3e8553f1dc23a7b9d47ca595844e5e7a34
Author: Eyal Sawady <ecs@d2evs.net>
Date: Tue, 17 Nov 2020 10:58:23 -0500
lex: parse integer literals
Diffstat:
6 files changed, 167 insertions(+), 18 deletions(-)
diff --git a/configure b/configure
@@ -7,6 +7,7 @@ harec() {
src/lex.c \
src/main.c \
src/parse.c \
+ src/types.c \
src/utf8.c
}
diff --git a/include/lex.h b/include/lex.h
@@ -1,6 +1,7 @@
#ifndef HAREC_LEX_H
#define HAREC_LEX_H
#include <stdint.h>
+#include "types.h"
// Keep sorted
enum lexical_token {
@@ -103,8 +104,8 @@ enum lexical_token {
T_LAST_OPERATOR = T_XOREQ,
// Tokens with additional information
- T_NAME,
T_LITERAL,
+ T_NAME,
T_RUNE,
T_STRING,
@@ -116,8 +117,12 @@ enum lexical_token {
struct token {
enum lexical_token token;
union {
+ struct {
+ uintmax_t u;
+ intmax_t s;
+ enum type_storage storage;
+ } literal;
char *name;
- char *literal;
uint32_t rune;
struct {
size_t len;
diff --git a/include/types.h b/include/types.h
@@ -0,0 +1,23 @@
+#ifndef HARE_TYPES_H
+#define HARE_TYPES_H
+
+enum type_storage {
+ /* Scalar types */
+ TYPE_STORAGE_U8,
+ TYPE_STORAGE_U16,
+ TYPE_STORAGE_U32,
+ TYPE_STORAGE_U64,
+ TYPE_STORAGE_I8,
+ TYPE_STORAGE_I16,
+ TYPE_STORAGE_I32,
+ TYPE_STORAGE_I64,
+ TYPE_STORAGE_INT,
+ TYPE_STORAGE_UINT,
+ TYPE_STORAGE_UINTPTR,
+ TYPE_STORAGE_SIZE,
+ TYPE_STORAGE_F32,
+ TYPE_STORAGE_F64,
+};
+
+const char *type_storage_unparse(enum type_storage storage);
+#endif
diff --git a/src/lex.c b/src/lex.c
@@ -1,5 +1,7 @@
#include <assert.h>
#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -236,34 +238,34 @@ lex_literal(struct lexer *lexer, struct token *out)
}
char *suff = NULL;
- bool isfloat = false, isexp = false, issuff = false;
+ char *exp = NULL;
+ bool isfloat = false;
while ((c = next(lexer, true)) != UTF8_INVALID) {
if (!strchr(base, c)) {
switch (c) {
case '.':
- if (isfloat || issuff) {
+ if (isfloat || suff) {
push(lexer, c, true);
goto finalize;
}
isfloat = true;
break;
case 'e':
- if (isexp || issuff) {
+ if (exp || suff) {
push(lexer, c, true);
goto finalize;
}
- isexp = true;
- isfloat = false;
+ exp = &lexer->buf[lexer->buflen];
break;
case 'i':
case 'u':
case 'f':
- if (issuff) {
+ case 'z':
+ if (suff) {
push(lexer, c, true);
goto finalize;
}
suff = &lexer->buf[lexer->buflen - 1];
- issuff = true;
break;
default:
push(lexer, c, true);
@@ -273,16 +275,32 @@ lex_literal(struct lexer *lexer, struct token *out)
}
finalize:
+ out->token = T_LITERAL;
+ out->literal.storage = TYPE_STORAGE_INT;
if (suff) {
- const char *valid[] = {
- "u8", "u16", "u32", "u64",
- "i8", "i16", "i32", "i64",
- "f32", "f64", "u", "i", "z",
+ const char *suffs[] = {
+ [TYPE_STORAGE_U8] = "u8",
+ [TYPE_STORAGE_U16] = "u16",
+ [TYPE_STORAGE_U32] = "u32",
+ [TYPE_STORAGE_U64] = "u64",
+ [TYPE_STORAGE_I8] = "i8",
+ [TYPE_STORAGE_I16] = "i16",
+ [TYPE_STORAGE_I32] = "i32",
+ [TYPE_STORAGE_I64] = "i64",
+
+ [TYPE_STORAGE_UINT] = "u",
+ [TYPE_STORAGE_INT] = "i",
+ [TYPE_STORAGE_SIZE] = "z",
+ [TYPE_STORAGE_F32] = "f32",
+ [TYPE_STORAGE_F64] = "f64",
};
bool isvalid = false;
- for (size_t i = 0; i < sizeof(valid) / sizeof(valid[0]); ++i) {
- if (strcmp(suff, valid[i]) == 0) {
+ for (enum type_storage i = 0;
+ i < sizeof(suffs) / sizeof(suffs[0]); ++i) {
+ if (suffs[i] && strcmp(suff, suffs[i]) == 0) {
isvalid = true;
+ out->literal.storage = i;
+ isfloat = true;
break;
}
}
@@ -293,8 +311,49 @@ finalize:
}
}
- out->token = T_LITERAL;
- out->name = strdup(lexer->buf);
+ uintmax_t exponent = 0;
+ if (exp) {
+ char *endptr = NULL;
+ exponent = strtoumax(exp, &endptr, 10);
+ if (endptr == exp) {
+ out->token = T_ERROR;
+ consume(lexer, -1);
+ return c;
+ }
+ }
+
+ errno = 0;
+ switch (out->literal.storage) {
+ case TYPE_STORAGE_U8:
+ case TYPE_STORAGE_U16:
+ case TYPE_STORAGE_U32:
+ case TYPE_STORAGE_UINT:
+ case TYPE_STORAGE_U64:
+ case TYPE_STORAGE_SIZE:
+ out->literal.u = strtoumax(lexer->buf, NULL, strlen(base));
+ for (uintmax_t i = 0; i < exponent; i++) {
+ out->literal.u *= 10;
+ }
+ break;
+ case TYPE_STORAGE_I8:
+ case TYPE_STORAGE_I16:
+ case TYPE_STORAGE_I32:
+ case TYPE_STORAGE_INT:
+ case TYPE_STORAGE_I64:
+ out->literal.s = strtoimax(lexer->buf, NULL, strlen(base));
+ for (uintmax_t i = 0; i < exponent; i++) {
+ out->literal.s *= 10;
+ }
+ break;
+ case TYPE_STORAGE_F32:
+ case TYPE_STORAGE_F64:
+ assert(0); // TODO
+ default:
+ assert(0);
+ }
+ if (errno == ERANGE) {
+ out->token = T_ERROR;
+ }
consume(lexer, -1);
return c;
}
diff --git a/src/parse.c b/src/parse.c
@@ -1,9 +1,11 @@
+#include <assert.h>
#include <stdio.h>
#include "ast.h"
#include "identifier.h"
#include "lex.h"
#include "parse.h"
#include "utf8.h"
+#include "types.h"
void
parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit)
@@ -18,7 +20,27 @@ parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit)
fprintf(stderr, "'%s'\n", tok.name);
break;
case T_LITERAL:
- fprintf(stderr, "(%s)\n", tok.literal);
+ switch (tok.literal.storage) {
+ case TYPE_STORAGE_U8:
+ case TYPE_STORAGE_U16:
+ case TYPE_STORAGE_U32:
+ case TYPE_STORAGE_UINT:
+ case TYPE_STORAGE_U64:
+ case TYPE_STORAGE_SIZE:
+ fprintf(stderr, "(%ju: %s)\n", tok.literal.u,
+ type_storage_unparse(tok.literal.storage));
+ break;
+ case TYPE_STORAGE_I8:
+ case TYPE_STORAGE_I16:
+ case TYPE_STORAGE_I32:
+ case TYPE_STORAGE_INT:
+ case TYPE_STORAGE_I64:
+ fprintf(stderr, "(%jd: %s)\n", tok.literal.s,
+ type_storage_unparse(tok.literal.storage));
+ break;
+ default:
+ assert(0);
+ }
break;
case T_RUNE:
putc('\'', stderr);
diff --git a/src/types.c b/src/types.c
@@ -0,0 +1,39 @@
+#include <assert.h>
+#include "types.h"
+
+const char *
+type_storage_unparse(enum type_storage storage)
+{
+ switch (storage) {
+ case TYPE_STORAGE_U8:
+ return "u8";
+ case TYPE_STORAGE_U16:
+ return "u16";
+ case TYPE_STORAGE_U32:
+ return "u32";
+ case TYPE_STORAGE_U64:
+ return "u64";
+ case TYPE_STORAGE_I8:
+ return "i8";
+ case TYPE_STORAGE_I16:
+ return "i16";
+ case TYPE_STORAGE_I32:
+ return "i32";
+ case TYPE_STORAGE_I64:
+ return "i64";
+ case TYPE_STORAGE_INT:
+ return "int";
+ case TYPE_STORAGE_UINT:
+ return "uint";
+ case TYPE_STORAGE_UINTPTR:
+ return "uintptr";
+ case TYPE_STORAGE_SIZE:
+ return "size";
+ case TYPE_STORAGE_F32:
+ return "f32";
+ case TYPE_STORAGE_F64:
+ return "f64";
+ default:
+ assert(0);
+ }
+}