harec

[hare] Hare compiler, written in C11 for POSIX OSs
Log | Files | Refs | README | LICENSE

commit 12393e2c946a002b7482c34bb37c45adcfdc19f4
parent 9371a410171d2deccc4509e7620c7abbfc05ad76
Author: Bor Grošelj Simić <bor.groseljsimic@telemach.net>
Date:   Sat, 28 May 2022 03:01:13 +0200

location: pass around indices instead of pathnames

Indices >0 will serve as file IDs in DWARF. Index 0 is used in places
where the expression isn't really sourced from an input file, like
command line defines and typedef files.

This change also substantially reduces number of allocations harec
makes.

Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>

Diffstat:
Minclude/check.h | 1+
Minclude/lex.h | 4++--
Minclude/util.h | 2++
Msrc/check.c | 28++++++++++++++++++----------
Msrc/gen.c | 6++++--
Msrc/lex.c | 21++++++++++-----------
Msrc/main.c | 15++++++++++++---
Msrc/mod.c | 5++++-
Msrc/parse.c | 32+++++++-------------------------
Msrc/util.c | 2++
Mtests/30-reduction.c | 15+++++++++------
11 files changed, 71 insertions(+), 60 deletions(-)

diff --git a/include/check.h b/include/check.h @@ -81,6 +81,7 @@ enum declaration_type { struct declaration { enum declaration_type type; struct identifier ident; + struct location loc; char *symbol; bool exported; union { diff --git a/include/lex.h b/include/lex.h @@ -139,7 +139,7 @@ enum lexical_token { }; struct location { - const char *path; + int file; int lineno, colno; }; @@ -171,7 +171,7 @@ struct lexer { bool require_int; }; -void lex_init(struct lexer *lexer, FILE *f, const char *filename); +void lex_init(struct lexer *lexer, FILE *f, int fileid); void lex_finish(struct lexer *lexer); enum lexical_token lex(struct lexer *lexer, struct token *out); void unlex(struct lexer *lexer, struct token *in); diff --git a/include/util.h b/include/util.h @@ -3,6 +3,8 @@ #include <assert.h> #include <stdint.h> +extern const char **sources; + #define FNV1A_INIT 2166136261u uint32_t fnv1a(uint32_t hash, unsigned char c); diff --git a/src/check.c b/src/check.c @@ -33,7 +33,7 @@ expect(const struct location *loc, bool constraint, char *fmt, ...) va_start(ap, fmt); fprintf(stderr, "Error %s:%d:%d: ", - loc->path, loc->lineno, loc->colno); + sources[loc->file], loc->lineno, loc->colno); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); exit(EXIT_FAILURE); @@ -56,7 +56,7 @@ handle_errors(struct errors *errors) { struct errors *error = errors; while (error) { - fprintf(stderr, "Error %s:%d:%d: %s\n", error->loc.path, + fprintf(stderr, "Error %s:%d:%d: %s\n", sources[error->loc.file], error->loc.lineno, error->loc.colno, error->msg); struct errors *next = error->next; free(error); @@ -577,11 +577,12 @@ check_expr_assert(struct context *ctx, assert(expr->assert.message->type == EXPR_CONSTANT); size_t n = snprintf(NULL, 0, "%s:%d:%d: ", - aexpr->loc.path, aexpr->loc.lineno, aexpr->loc.colno); + sources[aexpr->loc.file], + aexpr->loc.lineno, aexpr->loc.colno); size_t s_len = expr->assert.message->constant.string.len; char *s = xcalloc(1, n + s_len + 1); - snprintf(s, n + 1, "%s:%d:%d: ", - aexpr->loc.path, aexpr->loc.lineno, aexpr->loc.colno); + snprintf(s, n + 1, "%s:%d:%d: ", sources[aexpr->loc.file], + aexpr->loc.lineno, aexpr->loc.colno); memcpy(s+n, expr->assert.message->constant.string.value, s_len); s[n + s_len] = '\0'; @@ -589,10 +590,12 @@ check_expr_assert(struct context *ctx, expr->assert.message->constant.string.len = n + s_len; } else { int n = snprintf(NULL, 0, "Assertion failed: %s:%d:%d", - aexpr->loc.path, aexpr->loc.lineno, aexpr->loc.colno); + sources[aexpr->loc.file], + aexpr->loc.lineno, aexpr->loc.colno); char *s = xcalloc(1, n + 1); snprintf(s, n, "Assertion failed: %s:%d:%d", - aexpr->loc.path, aexpr->loc.lineno, aexpr->loc.colno); + sources[aexpr->loc.file], + aexpr->loc.lineno, aexpr->loc.colno); expr->assert.message->type = EXPR_CONSTANT; expr->assert.message->result = &builtin_type_const_str; @@ -2147,10 +2150,12 @@ check_expr_propagate(struct context *ctx, case_err->value->assert.is_static = false; int n = snprintf(NULL, 0, "Assertion failed: error occured at %s:%d:%d", - aexpr->loc.path, aexpr->loc.lineno, aexpr->loc.colno); + sources[aexpr->loc.file], + aexpr->loc.lineno, aexpr->loc.colno); char *s = xcalloc(1, n + 1); snprintf(s, n, "Assertion failed: error occured at %s:%d:%d", - aexpr->loc.path, aexpr->loc.lineno, aexpr->loc.colno); + sources[aexpr->loc.file], + aexpr->loc.lineno, aexpr->loc.colno); case_err->value->assert.message = xcalloc(1, sizeof(struct expression)); case_err->value->assert.message->type = EXPR_CONSTANT; @@ -3178,6 +3183,7 @@ check_declarations(struct context *ctx, struct declarations *decls = *next = xcalloc(1, sizeof(struct declarations)); decl->exported = adecl->exported; + decl->loc = adecl->loc; decls->decl = decl; next = &decls->next; } @@ -3196,6 +3202,7 @@ check_declarations(struct context *ctx, struct declarations *decls = *next = xcalloc(1, sizeof(struct declarations)); decl->exported = adecl->exported; + decl->loc = adecl->loc; decls->decl = decl; next = &decls->next; } @@ -3219,6 +3226,7 @@ check_declarations(struct context *ctx, struct declarations *decls = *next = xcalloc(1, sizeof(struct declarations)); decl->exported = adecl->exported; + decl->loc = adecl->loc; decls->decl = decl; next = &decls->next; } @@ -3908,7 +3916,7 @@ check_internal(struct type_store *ts, // XXX: This duplicates a lot of code with scan_const for (struct define *def = defines; def; def = def->next) { struct location loc = { - .path = "-D", .lineno = 1, .colno = 1, + .file = 0, .lineno = 1, .colno = 1, }; const struct type *type = type_store_lookup_atype( ctx.store, def->type); diff --git a/src/gen.c b/src/gen.c @@ -201,9 +201,11 @@ static void gen_fixed_abort(struct gen_context *ctx, struct location loc, enum fixed_aborts reason) { - int n = snprintf(NULL, 0, "%s:%d:%d", loc.path, loc.lineno, loc.colno); + int n = snprintf(NULL, 0, "%s:%d:%d", + sources[loc.file], loc.lineno, loc.colno); char *s = xcalloc(1, n + 1); - snprintf(s, n, "%s:%d:%d", loc.path, loc.lineno, loc.colno); + snprintf(s, n, "%s:%d:%d", + sources[loc.file], loc.lineno, loc.colno); struct expression eloc = {0}; eloc.type = EXPR_CONSTANT; eloc.result = &builtin_type_const_str; diff --git a/src/lex.c b/src/lex.c @@ -136,7 +136,7 @@ static const char *tokens[] = { }; void -lex_init(struct lexer *lexer, FILE *f, const char *filename) +lex_init(struct lexer *lexer, FILE *f, int fileid) { memset(lexer, 0, sizeof(*lexer)); lexer->in = f; @@ -145,7 +145,7 @@ lex_init(struct lexer *lexer, FILE *f, const char *filename) lexer->un.token = T_ERROR; lexer->loc.lineno = 1; lexer->loc.colno = 0; - lexer->loc.path = filename; + lexer->loc.file = fileid; lexer->c[0] = UINT32_MAX; lexer->c[1] = UINT32_MAX; } @@ -183,9 +183,7 @@ next(struct lexer *lexer, struct location *loc, bool buffer) update_lineno(&lexer->loc, c); } if (loc != NULL) { - loc->path = lexer->loc.path; - loc->lineno = lexer->loc.lineno; - loc->colno = lexer->loc.colno; + *loc = lexer->loc; for (size_t i = 0; i < 2 && lexer->c[i] != UINT32_MAX; i++) { update_lineno(&lexer->loc, lexer->c[i]); } @@ -540,7 +538,7 @@ lex_rune(struct lexer *lexer) if (*endptr != '\0') { fprintf(stderr, "Error: invalid hex literal at %s:%d:%d\n", - lexer->loc.path, lexer->loc.lineno, + sources[lexer->loc.file], lexer->loc.lineno, lexer->loc.colno); exit(EXIT_FAILURE); } @@ -555,7 +553,7 @@ lex_rune(struct lexer *lexer) if (*endptr != '\0') { fprintf(stderr, "Error: invalid hex literal at %s:%d:%d\n", - lexer->loc.path, lexer->loc.lineno, + sources[lexer->loc.file], lexer->loc.lineno, lexer->loc.colno); exit(EXIT_FAILURE); } @@ -574,7 +572,7 @@ lex_rune(struct lexer *lexer) if (*endptr != '\0') { fprintf(stderr, "Error: invalid hex literal at %s:%d:%d\n", - lexer->loc.path, lexer->loc.lineno, + sources[lexer->loc.file], lexer->loc.lineno, lexer->loc.colno); exit(EXIT_FAILURE); } @@ -582,7 +580,7 @@ lex_rune(struct lexer *lexer) default: fprintf(stderr, "Error: invalid escape '\\%c' at %s:%d:%d\n", - c, lexer->loc.path, lexer->loc.lineno, + c, sources[lexer->loc.file], lexer->loc.lineno, lexer->loc.colno); exit(EXIT_FAILURE); } @@ -1001,7 +999,8 @@ _lex(struct lexer *lexer, struct token *out) default: p[utf8_encode(p, c)] = '\0'; fprintf(stderr, "Error: unexpected code point '%s' at %s:%d:%d\n", - p, lexer->loc.path, lexer->loc.lineno, lexer->loc.colno); + p, sources[lexer->loc.file], lexer->loc.lineno, + lexer->loc.colno); exit(EXIT_FAILURE); } @@ -1036,7 +1035,7 @@ token_finish(struct token *tok) } tok->token = 0; tok->storage = 0; - tok->loc.path = NULL; + tok->loc.file = 0; tok->loc.colno = 0; tok->loc.lineno = 0; } diff --git a/src/main.c b/src/main.c @@ -60,7 +60,9 @@ parse_define(const char *argv_0, const char *in) struct token tok; struct lexer lexer; FILE *f = fmemopen((char *)in, strlen(in), "r"); - lex_init(&lexer, f, "-D"); + const char *d = "-D"; + sources = &d; + lex_init(&lexer, f, 0); // The syntax for this parameter is: // @@ -117,7 +119,9 @@ main(int argc, char *argv[]) case 'N': unit.ns = xcalloc(1, sizeof(struct identifier)); FILE *in = fmemopen(optarg, strlen(optarg), "r"); - lex_init(&lexer, in, "-N"); + const char *ns = "-N"; + sources = &ns; + lex_init(&lexer, in, 0); parse_identifier(&lexer, unit.ns, false); lex_finish(&lexer); break; @@ -139,6 +143,11 @@ main(int argc, char *argv[]) struct ast_subunit **next = &aunit.subunits.next; enum stage stage = parse_stage(getenv("HA_STAGE")); + sources = xcalloc(ninputs + 2, sizeof(char **)); + memcpy((char **)sources + 1, argv + optind, sizeof(char **) * ninputs); + sources[0] = "-D"; + sources[ninputs + 1] = NULL; + for (size_t i = 0; i < ninputs; ++i) { FILE *in; const char *path = argv[optind + i]; @@ -154,7 +163,7 @@ main(int argc, char *argv[]) return EXIT_FAILURE; } - lex_init(&lexer, in, path); + lex_init(&lexer, in, i + 1); if (stage == STAGE_LEX) { struct token tok; while (lex(&lexer, &tok) != T_EOF); diff --git a/src/mod.c b/src/mod.c @@ -88,9 +88,12 @@ module_resolve(struct modcache *cache[], exit(EXIT_FAILURE); } - lex_init(&lexer, f, path); + const char *old = sources[0]; + sources[0] = path; + lex_init(&lexer, f, 0); parse(&lexer, &aunit.subunits); lex_finish(&lexer); + sources[0] = old; // TODO: Free unused bits struct unit u = {0}; diff --git a/src/parse.c b/src/parse.c @@ -19,7 +19,7 @@ synassert_msg(bool cond, const char *msg, struct token *tok) { if (!cond) { fprintf(stderr, "Syntax error: %s at %s:%d:%d (found '%s')\n", msg, - tok->loc.path, tok->loc.lineno, tok->loc.colno, + sources[tok->loc.file], tok->loc.lineno, tok->loc.colno, token_str(tok)); exit(EXIT_FAILURE); } @@ -35,7 +35,7 @@ synassert(bool cond, struct token *tok, ...) enum lexical_token t = va_arg(ap, enum lexical_token); fprintf(stderr, "Syntax error: unexpected '%s' at %s:%d:%d%s", - token_str(tok), tok->loc.path, tok->loc.lineno, + token_str(tok), sources[tok->loc.file], tok->loc.lineno, tok->loc.colno, t == T_EOF ? "\n" : ", expected " ); while (t != T_EOF) { if (t == T_LITERAL || t == T_NAME) { @@ -61,24 +61,12 @@ want(struct lexer *lexer, enum lexical_token ltok, struct token *tok) token_finish(out); } } -static struct location -locdup(const struct location *loc) -{ - struct location new_loc = { - .lineno = loc->lineno, - .colno = loc->colno, - .path = strdup(loc->path), - }; - return new_loc; -} static struct ast_expression * mkexpr(const struct location *loc) { struct ast_expression *exp = xcalloc(1, sizeof(struct ast_expression)); - exp->loc.lineno = loc->lineno; - exp->loc.colno = loc->colno; - exp->loc.path = strdup(loc->path); + exp->loc = *loc; return exp; } @@ -86,9 +74,7 @@ static struct ast_type * mktype(const struct location *loc) { struct ast_type *t = xcalloc(1, sizeof(struct ast_type)); - t->loc.lineno = loc->lineno; - t->loc.colno = loc->colno; - t->loc.path = strdup(loc->path); + t->loc = *loc; return t; } @@ -97,9 +83,7 @@ mkfuncparams(const struct location *loc) { struct ast_function_parameters *p = xcalloc(1, sizeof(struct ast_function_parameters)); - p->loc.lineno = loc->lineno; - p->loc.colno = loc->colno; - p->loc.path = strdup(loc->path); + p->loc = *loc; return p; } @@ -151,7 +135,7 @@ parse_name_list(struct lexer *lexer, struct ast_imports *name) struct token tok = {0}; want(lexer, T_NAME, &tok); name->ident.name = strdup(tok.name); - name->loc = locdup(&tok.loc); + name->loc = tok.loc; token_finish(&tok); switch (lex(lexer, &tok)) { @@ -2515,9 +2499,7 @@ static void parse_decl(struct lexer *lexer, struct ast_decl *decl) { struct token tok = {0}; - decl->loc.lineno = lexer->loc.lineno; - decl->loc.colno = lexer->loc.colno; - decl->loc.path = strdup(lexer->loc.path); + decl->loc = lexer->loc; switch (lex(lexer, &tok)) { case T_CONST: case T_LET: diff --git a/src/util.c b/src/util.c @@ -7,6 +7,8 @@ #undef calloc #undef realloc +const char **sources; + uint32_t fnv1a(uint32_t hash, unsigned char c) { diff --git a/tests/30-reduction.c b/tests/30-reduction.c @@ -11,24 +11,27 @@ #include "scope.h" #include "type_store.h" #include "typedef.h" +#include "util.h" -void test(struct context *ctx, char *expected, char *input) { +void test(struct context *ctx, const char *expected, const char *input) { builtin_types_init(); ctx->errors = NULL; ctx->next = &ctx->errors; + sources = (const char *[2]){"<expected>", input}; + const struct type *etype = NULL; if (strlen(expected) != 0) { - FILE *ebuf = fmemopen(expected, strlen(expected), "r"); + FILE *ebuf = fmemopen((char *)expected, strlen(expected), "r"); struct lexer elex; - lex_init(&elex, ebuf, "<expected>"); + lex_init(&elex, ebuf, 0); struct ast_type *eatype = parse_type(&elex); etype = type_store_lookup_atype(ctx->store, eatype); } - FILE *ibuf = fmemopen(input, strlen(input), "r"); + FILE *ibuf = fmemopen((char *)input, strlen(input), "r"); struct lexer ilex; - lex_init(&ilex, ibuf, input); + lex_init(&ilex, ibuf, 1); struct ast_expression *iaexpr = parse_expression(&ilex); struct expression iexpr = {0}; check_expression(ctx, iaexpr, &iexpr, NULL); @@ -40,7 +43,7 @@ void test(struct context *ctx, char *expected, char *input) { struct errors *error = ctx->errors; while (error) { - fprintf(stderr, "Error %s:%d:%d: %s\n", error->loc.path, + fprintf(stderr, "Error %s:%d:%d: %s\n", sources[error->loc.file], error->loc.lineno, error->loc.colno, error->msg); struct errors *next = error->next; free(error);