commit 56e3c4ea3d2096cce28bbda2ef626f1112f6c34d
parent 78ff6f8f8a7873d2ea073064733bc221b7833024
Author: Drew DeVault <sir@cmpwn.com>
Date: Sun, 22 Nov 2020 08:44:04 -0500
parse: initial riggings
Diffstat:
5 files changed, 134 insertions(+), 74 deletions(-)
diff --git a/include/lex.h b/include/lex.h
@@ -146,5 +146,6 @@ uint32_t lex(struct lexer *lexer, struct token *out);
void token_finish(struct token *tok);
const char *token_str(const struct token *tok);
+const char *lexical_token_str(enum lexical_token tok);
#endif
diff --git a/include/parse.h b/include/parse.h
@@ -2,10 +2,9 @@
#define HAREC_PARSE_H
#include <stdio.h>
-struct ast_unit;
-struct identifier;
+struct ast_subunit;
struct lexer;
-void parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit);
+void parse(struct lexer *lexer, struct ast_subunit *unit);
#endif
diff --git a/src/lex.c b/src/lex.c
@@ -203,7 +203,7 @@ lex_name(struct lexer *lexer, struct token *out)
}
}
out->token = T_EOF;
- return c;
+ return out->token;
lookup:;
void *token = bsearch(&lexer->buf, tokens, T_LAST_KEYWORD + 1,
@@ -215,7 +215,7 @@ lookup:;
out->token = (const char **)token - tokens;
}
consume(lexer, -1);
- return c;
+ return out->token;
}
static uint32_t
@@ -319,7 +319,7 @@ finalize:
if (!isvalid) {
out->token = T_ERROR;
consume(lexer, -1);
- return c;
+ return out->token;
}
}
@@ -330,7 +330,7 @@ finalize:
if (endptr == exp) {
out->token = T_ERROR;
consume(lexer, -1);
- return c;
+ return out->token;
}
}
@@ -368,7 +368,7 @@ finalize:
out->token = T_ERROR;
}
consume(lexer, -1);
- return c;
+ return out->token;
}
static uint32_t
@@ -449,7 +449,7 @@ lex_string(struct lexer *lexer, struct token *out)
out->string.len = lexer->buflen;
out->string.value = buf;
consume(lexer, -1);
- return c;
+ return out->token;
default:
push(lexer, c, false);
push(lexer, lex_rune(lexer), false);
@@ -473,7 +473,7 @@ lex_string(struct lexer *lexer, struct token *out)
assert(c == '\'');
out->token = T_LITERAL;
out->storage = TYPE_STORAGE_RUNE;
- return c;
+ return out->token;
default:
assert(0); // Invariant
}
@@ -553,7 +553,7 @@ lex3(struct lexer *lexer, struct token *out, uint32_t c)
assert(0); // Invariant
}
- return c;
+ return out->token;
}
static uint32_t
@@ -705,7 +705,7 @@ lex2(struct lexer *lexer, struct token *out, uint32_t c)
assert(0); // Invariant
}
- return c;
+ return out->token;
}
uint32_t
@@ -714,7 +714,7 @@ lex(struct lexer *lexer, struct token *out)
uint32_t c = wgetc(lexer);
if (c == UTF8_INVALID) {
out->token = T_EOF;
- return c;
+ return out->token;
}
if (c <= 0x7F && (isalpha(c) || c == '_')) {
@@ -780,7 +780,7 @@ lex(struct lexer *lexer, struct token *out)
break;
}
- return c;
+ return out->token;
}
void
@@ -807,13 +807,32 @@ token_finish(struct token *tok)
}
const char *
+lexical_token_str(enum lexical_token tok)
+{
+ switch (tok) {
+ case T_NAME:
+ return "name";
+ case T_LITERAL:
+ return "literal";
+ case T_EOF:
+ return "end of file";
+ case T_ERROR:
+ return "error";
+ default:
+ assert(tok < sizeof(tokens) / sizeof(tokens[0]));
+ return tokens[tok];
+ }
+}
+
+const char *
token_str(const struct token *tok)
{
switch (tok->token) {
case T_NAME:
return tok->name;
+ case T_LITERAL:
+ assert(0); // TODO
default:
- assert(tok->token < sizeof(tokens) / sizeof(tokens[0]));
- return tokens[tok->token];
+ return lexical_token_str(tok->token);
}
}
diff --git a/src/main.c b/src/main.c
@@ -9,8 +9,8 @@ main(int argc, char *argv[])
struct lexer lexer;
lex_init(&lexer, stdin);
- struct ast_unit unit;
- parse(&lexer, NULL, &unit);
+ struct ast_subunit subunit;
+ parse(&lexer, &subunit);
lex_finish(&lexer);
return 0;
diff --git a/src/parse.c b/src/parse.c
@@ -1,73 +1,114 @@
-#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
#include "ast.h"
#include "identifier.h"
#include "lex.h"
#include "parse.h"
-#include "utf8.h"
#include "types.h"
+#include "utf8.h"
-void
-parse(struct lexer *lexer, struct identifier *ns, struct ast_unit *unit)
+struct parser {
+ struct lexer *lex;
+};
+
+static void
+trace(struct parser *par, const char *name)
+{
+ if (getenv("HAREC_TRACE") == NULL) {
+ return;
+ }
+ fprintf(stderr, "%s\n", name);
+}
+
+static void
+synassert(bool cond, struct token *tok, ...)
+{
+ if (!cond) {
+ va_list ap;
+ va_start(ap, tok);
+
+ // TODO: file name, lineno, colno
+ enum lexical_token t = va_arg(ap, enum lexical_token);
+ fprintf(stderr,
+ "Syntax error: unexpected '%s'%s",
+ token_str(tok),
+ t == T_EOF ? "\n" : ", expected " );
+ while (t != T_EOF) {
+ fprintf(stderr, "%s", lexical_token_str(t));
+ t = va_arg(ap, enum lexical_token);
+ fprintf(stderr, "%s", t == T_EOF ? "\n" : ", ");
+ }
+ exit(1);
+ }
+}
+
+static void
+parse_identifier(struct parser *par, struct identifier *ident)
{
struct token tok = {0};
+ struct identifier *i = ident;
+ trace(par, "identifier");
- while (tok.token != T_EOF) {
+ while (true) {
+ synassert(lex(par->lex, &tok) == T_NAME, &tok, T_NAME, T_EOF);
+ i->name = strdup(tok.name);
token_finish(&tok);
- lex(lexer, &tok);
- switch (tok.token) {
- case T_NAME:
- fprintf(stderr, "'%s'\n", tok.name);
- break;
- case T_LITERAL:
- switch (tok.storage) {
- case TYPE_STORAGE_F32:
- case TYPE_STORAGE_F64:
- fprintf(stderr, "(%lf: %s)\n", tok._float,
- type_storage_unparse(tok.storage));
- break;
- case TYPE_STORAGE_I8:
- case TYPE_STORAGE_I16:
- case TYPE_STORAGE_I32:
- case TYPE_STORAGE_I64:
- case TYPE_STORAGE_INT:
- fprintf(stderr, "(%jd: %s)\n", tok._signed,
- type_storage_unparse(tok.storage));
- break;
- case TYPE_STORAGE_RUNE:
- putc('\'', stderr);
- utf8_fputch(stderr, tok.rune);
- putc('\'', stderr);
- putc('\n', stderr);
- break;
- case TYPE_STORAGE_STRING:
- fprintf(stderr, "\"%*s\"\n", (int)tok.string.len,
- tok.string.value);
- break;
- case TYPE_STORAGE_SIZE:
- case TYPE_STORAGE_U8:
- case TYPE_STORAGE_U16:
- case TYPE_STORAGE_U32:
- case TYPE_STORAGE_U64:
- case TYPE_STORAGE_UINT:
- fprintf(stderr, "(%ju: %s)\n", tok._unsigned,
- type_storage_unparse(tok.storage));
- break;
- default:
- assert(0);
- }
- break;
- case T_ERROR:
- fprintf(stderr, "ERROR\n");
- break;
- case T_EOF:
- fprintf(stderr, "EOF\n");
+
+ struct identifier *ns;
+ switch (lex(par->lex, &tok)) {
+ case T_DOUBLE_COLON:
+ ns = calloc(1, sizeof(struct identifier));
+ *ns = *i;
+ i->ns = ns;
+ i = ns;
break;
default:
- fprintf(stderr, "%s\n", token_str(&tok));
+ // TODO: Unlex
+ return;
+ }
+ }
+}
+
+static void
+parse_import(struct parser *par, struct ast_imports *imports)
+{
+ trace(par, "import");
+ struct identifier ident = {0};
+ parse_identifier(par, &ident);
+ // TODO: Parse the various forms of imports
+}
+
+static void
+parse_imports(struct parser *par, struct ast_subunit *subunit)
+{
+ trace(par, "imports");
+ struct token tok = {0};
+ struct ast_imports **next = &subunit->imports;
+
+ while (true) {
+ struct ast_imports *imports;
+ switch (lex(par->lex, &tok)) {
+ case T_USE:
+ imports = calloc(1, sizeof(struct ast_imports));
+ parse_import(par, imports);
+ *next = imports;
+ next = &imports->next;
break;
+ default:
+ // TODO: unlex
+ return;
}
- };
+ }
+}
- token_finish(&tok);
+void
+parse(struct lexer *lex, struct ast_subunit *subunit)
+{
+ struct parser par = {
+ .lex = lex,
+ };
+ parse_imports(&par, subunit);
}