harec

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 56e3c4ea3d2096cce28bbda2ef626f1112f6c34d
parent 78ff6f8f8a7873d2ea073064733bc221b7833024
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sun, 22 Nov 2020 08:44:04 -0500

parse: initial riggings

Diffstat:
Minclude/lex.h | 1+
Minclude/parse.h | 5++---
Msrc/lex.c | 45++++++++++++++++++++++++++++++++-------------
Msrc/main.c | 4++--
Msrc/parse.c | 153++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
5 files changed, 134 insertions(+), 74 deletions(-)

diff --git a/include/lex.h b/include/lex.h @@ -146,5 +146,6 @@ uint32_t lex(struct lexer *lexer, struct token *out); void token_finish(struct token *tok); const char *token_str(const struct token *tok); +const char *lexical_token_str(enum lexical_token tok); #endif diff --git a/include/parse.h b/include/parse.h @@ -2,10 +2,9 @@ #define HAREC_PARSE_H #include <stdio.h> -struct ast_unit; -struct identifier; +struct ast_subunit; struct lexer; -void parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit); +void parse(struct lexer *lexer, struct ast_subunit *unit); #endif diff --git a/src/lex.c b/src/lex.c @@ -203,7 +203,7 @@ lex_name(struct lexer *lexer, struct token *out) } } out->token = T_EOF; - return c; + return out->token; lookup:; void *token = bsearch(&lexer->buf, tokens, T_LAST_KEYWORD + 1, @@ -215,7 +215,7 @@ lookup:; out->token = (const char **)token - tokens; } consume(lexer, -1); - return c; + return out->token; } static uint32_t @@ -319,7 +319,7 @@ finalize: if (!isvalid) { out->token = T_ERROR; consume(lexer, -1); - return c; + return out->token; } } @@ -330,7 +330,7 @@ finalize: if (endptr == exp) { out->token = T_ERROR; consume(lexer, -1); - return c; + return out->token; } } @@ -368,7 +368,7 @@ finalize: out->token = T_ERROR; } consume(lexer, -1); - return c; + return out->token; } static uint32_t @@ -449,7 +449,7 @@ lex_string(struct lexer *lexer, struct token *out) out->string.len = lexer->buflen; out->string.value = buf; consume(lexer, -1); - return c; + return out->token; default: push(lexer, c, false); push(lexer, lex_rune(lexer), false); @@ -473,7 +473,7 @@ lex_string(struct lexer *lexer, struct token *out) assert(c == '\''); out->token = T_LITERAL; out->storage = TYPE_STORAGE_RUNE; - return c; + return out->token; default: assert(0); // Invariant } @@ -553,7 +553,7 @@ lex3(struct lexer *lexer, struct token *out, uint32_t c) assert(0); // Invariant } - return c; + return out->token; } static uint32_t @@ -705,7 +705,7 @@ lex2(struct lexer *lexer, struct token *out, uint32_t c) assert(0); // Invariant } - return c; + return out->token; } uint32_t @@ -714,7 +714,7 @@ lex(struct lexer *lexer, struct token *out) uint32_t c = wgetc(lexer); if (c == UTF8_INVALID) { out->token = T_EOF; - return c; + return out->token; } if (c <= 0x7F && (isalpha(c) || c == '_')) { @@ -780,7 +780,7 @@ lex(struct lexer *lexer, struct token *out) break; } - return c; + return out->token; } void @@ -807,13 +807,32 @@ token_finish(struct token *tok) } const char * +lexical_token_str(enum lexical_token tok) +{ + switch (tok) { + case T_NAME: + return "name"; + case T_LITERAL: + return "literal"; + case T_EOF: + return "end of file"; + case T_ERROR: + return "error"; + default: + assert(tok < sizeof(tokens) / sizeof(tokens[0])); + return tokens[tok]; + } +} + +const char * token_str(const struct token *tok) { switch (tok->token) { case T_NAME: return tok->name; + case T_LITERAL: + assert(0); // TODO default: - assert(tok->token < sizeof(tokens) / sizeof(tokens[0])); - return tokens[tok->token]; + return lexical_token_str(tok->token); } } diff --git a/src/main.c b/src/main.c @@ -9,8 +9,8 @@ main(int argc, char *argv[]) struct lexer lexer; lex_init(&lexer, stdin); - struct ast_unit unit; - parse(&lexer, NULL, &unit); + struct ast_subunit subunit; + parse(&lexer, &subunit); lex_finish(&lexer); return 0; diff --git a/src/parse.c b/src/parse.c @@ -1,73 +1,114 @@ -#include <assert.h> +#include <stdarg.h> +#include <stdbool.h> #include <stdio.h> +#include <stdlib.h> +#include <string.h> #include "ast.h" #include "identifier.h" #include "lex.h" #include "parse.h" -#include "utf8.h" #include "types.h" +#include "utf8.h" -void -parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit) +struct parser { + struct lexer *lex; +}; + +static void +trace(struct parser *par, const char *name) +{ + if (getenv("HAREC_TRACE") == NULL) { + return; + } + fprintf(stderr, "%s\n", name); +} + +static void +synassert(bool cond, struct token *tok, ...) +{ + if (!cond) { + va_list ap; + va_start(ap, tok); + + // TODO: file name, lineno, colno + enum lexical_token t = va_arg(ap, enum lexical_token); + fprintf(stderr, + "Syntax error: unexpected '%s'%s", + token_str(tok), + t == T_EOF ? "\n" : ", expected " ); + while (t != T_EOF) { + fprintf(stderr, "%s", lexical_token_str(t)); + t = va_arg(ap, enum lexical_token); + fprintf(stderr, "%s", t == T_EOF ? "\n" : ", "); + } + exit(1); + } +} + +static void +parse_identifier(struct parser *par, struct identifier *ident) { struct token tok = {0}; + struct identifier *i = ident; + trace(par, "identifier"); - while (tok.token != T_EOF) { + while (true) { + synassert(lex(par->lex, &tok) == T_NAME, &tok, T_NAME, T_EOF); + i->name = strdup(tok.name); token_finish(&tok); - lex(lexer, &tok); - switch (tok.token) { - case T_NAME: - fprintf(stderr, "'%s'\n", tok.name); - break; - case T_LITERAL: - switch (tok.storage) { - case TYPE_STORAGE_F32: - case TYPE_STORAGE_F64: - fprintf(stderr, "(%lf: %s)\n", tok._float, - type_storage_unparse(tok.storage)); - break; - case TYPE_STORAGE_I8: - case TYPE_STORAGE_I16: - case TYPE_STORAGE_I32: - case TYPE_STORAGE_I64: - case TYPE_STORAGE_INT: - fprintf(stderr, "(%jd: %s)\n", tok._signed, - type_storage_unparse(tok.storage)); - break; - case TYPE_STORAGE_RUNE: - putc('\'', stderr); - utf8_fputch(stderr, tok.rune); - putc('\'', stderr); - putc('\n', stderr); - break; - case TYPE_STORAGE_STRING: - fprintf(stderr, "\"%*s\"\n", (int)tok.string.len, - tok.string.value); - break; - case TYPE_STORAGE_SIZE: - case TYPE_STORAGE_U8: - case TYPE_STORAGE_U16: - case TYPE_STORAGE_U32: - case TYPE_STORAGE_U64: - case TYPE_STORAGE_UINT: - fprintf(stderr, "(%ju: %s)\n", tok._unsigned, - type_storage_unparse(tok.storage)); - break; - default: - assert(0); - } - break; - case T_ERROR: - fprintf(stderr, "ERROR\n"); - break; - case T_EOF: - fprintf(stderr, "EOF\n"); + + struct identifier *ns; + switch (lex(par->lex, &tok)) { + case T_DOUBLE_COLON: + ns = calloc(1, sizeof(struct identifier)); + *ns = *i; + i->ns = ns; + i = ns; break; default: - fprintf(stderr, "%s\n", token_str(&tok)); + // TODO: Unlex + return; + } + } +} + +static void +parse_import(struct parser *par, struct ast_imports *imports) +{ + trace(par, "import"); + struct identifier ident = {0}; + parse_identifier(par, &ident); + // TODO: Parse the various forms of imports +} + +static void +parse_imports(struct parser *par, struct ast_subunit *subunit) +{ + trace(par, "imports"); + struct token tok = {0}; + struct ast_imports **next = &subunit->imports; + + while (true) { + struct ast_imports *imports; + switch (lex(par->lex, &tok)) { + case T_USE: + imports = calloc(1, sizeof(struct ast_imports)); + parse_import(par, imports); + *next = imports; + next = &imports->next; break; + default: + // TODO: unlex + return; } - }; + } +} - token_finish(&tok); +void +parse(struct lexer *lex, struct ast_subunit *subunit) +{ + struct parser par = { + .lex = lex, + }; + parse_imports(&par, subunit); }