commit 4e83713538860a3cacc809d120577e72e79dd200
parent d4f105b90fb92e4db0e0947200af156d9a24e2b7
Author: Eyal Sawady <ecs@d2evs.net>
Date: Fri, 11 Dec 2020 17:56:54 -0500
parse: implement declarations
Also parse a minimal subset of types and expressions
Diffstat:
5 files changed, 623 insertions(+), 20 deletions(-)
diff --git a/include/ast.h b/include/ast.h
@@ -1,6 +1,9 @@
#ifndef HARE_AST_H
#define HARE_AST_H
+#include <stdbool.h>
+#include <stdint.h>
#include "identifier.h"
+#include "types.h"
enum ast_import_mode {
AST_IMPORT_IDENTIFIER, // use foo::bar;
@@ -18,25 +21,150 @@ struct ast_imports {
struct ast_imports *next;
};
-enum ast_declaration_type {
+struct ast_list_type {
+ struct ast_expression *length; // NULL for slices and unbounded arrays
+ struct type *members;
+};
+
+struct ast_enum_field {
+ const char *name;
+ struct ast_expression *value;
+ struct enum_field *next;
+};
+
+struct ast_enum_type {
+ enum type_storage storage;
+ struct ast_enum_field *values;
+};
+
+enum variadism {
+ VARIADISM_NONE,
+ VARIADISM_C,
+ VARIADISM_HARE,
+};
+
+struct ast_function_parameters {
+ char *name;
+ struct ast_type *type;
+ struct ast_function_parameters *next;
+};
+
+struct ast_function_type {
+ bool noreturn;
+ enum variadism variadism;
+ struct ast_type *result;
+ struct ast_function_parameters *parameters;
+};
+
+struct ast_pointer_type {
+ bool nullable;
+ struct ast_type *referent;
+};
+
+struct ast_tagged_union_type {
+ struct ast_type *type;
+ struct ast_tagged_union_type *next;
+};
+
+struct ast_struct_union_type {
+ const char *name;
+ struct ast_type *type;
+ struct ast_struct_union_type *next;
+};
+
+struct ast_type {
+ enum type_storage storage;
+ bool constant;
+ union {
+ struct identifier alias;
+ struct ast_list_type array;
+ struct ast_enum_type _enum;
+ struct ast_function_type function;
+ struct ast_pointer_type pointer;
+ struct ast_list_type slice;
+ struct ast_struct_union_type _struct;
+ struct ast_tagged_union_type tagged_union;
+ struct ast_struct_union_type _union;
+ };
+};
+
+enum expression_type {
+ EXPR_CONSTANT,
+};
+
+struct ast_constant_expression {
+ enum type_storage storage;
+ union {
+ intmax_t _signed;
+ uintmax_t _unsigned;
+ struct {
+ size_t len;
+ char *value;
+ } string;
+ };
+};
+
+struct ast_expression {
+ enum expression_type type;
+ union {
+ struct ast_constant_expression constant;
+ };
+};
+
+struct ast_global_decl {
+ char *symbol;
+ struct identifier ident;
+ struct ast_type type;
+ struct ast_expression init;
+ struct ast_global_decl *next;
+};
+
+struct ast_type_decl {
+ struct identifier ident;
+ struct ast_type type;
+ struct ast_type_decl *next;
+};
+
+enum function_flags {
+ FN_FINI = 1 << 0,
+ FN_INIT = 1 << 1,
+ FN_TEST = 1 << 2,
+};
+
+struct ast_function_decl {
+ char *symbol;
+ uint32_t flags; // enum function_flags
+ struct identifier ident;
+ struct ast_function_type prototype;
+ struct ast_expression body;
+};
+
+enum ast_decl_type {
AST_DECL_FUNC,
AST_DECL_TYPE,
- AST_DECL_VAR,
+ AST_DECL_GLOBAL,
AST_DECL_CONST,
};
-struct ast_declaration {
- enum ast_declaration_type type;
+struct ast_decl {
+ enum ast_decl_type decl_type;
+ bool exported;
+ union {
+ struct ast_global_decl global;
+ struct ast_global_decl constant;
+ struct ast_type_decl type;
+ struct ast_function_decl function;
+ };
};
-struct ast_declarations {
- struct ast_declaration decl;
- struct ast_declarations *next;
+struct ast_decls {
+ struct ast_decl decl;
+ struct ast_decls *next;
};
struct ast_subunit {
struct ast_imports *imports;
- struct ast_declarations decls;
+ struct ast_decls decls;
struct ast_subunit *next;
};
diff --git a/include/identifier.h b/include/identifier.h
@@ -1,5 +1,6 @@
#ifndef HARE_IDENTIFIER_H
#define HARE_IDENTIFIER_H
+#include <stddef.h>
struct identifier {
char *name;
diff --git a/include/types.h b/include/types.h
@@ -1,25 +1,38 @@
#ifndef HARE_TYPES_H
#define HARE_TYPES_H
+#include <stdbool.h>
+#include "identifier.h"
enum type_storage {
// Scalar types
- TYPE_STORAGE_U8,
- TYPE_STORAGE_U16,
- TYPE_STORAGE_U32,
- TYPE_STORAGE_U64,
- TYPE_STORAGE_I8,
+ TYPE_STORAGE_BOOL,
+ TYPE_STORAGE_CHAR,
+ TYPE_STORAGE_F32,
+ TYPE_STORAGE_F64,
TYPE_STORAGE_I16,
TYPE_STORAGE_I32,
TYPE_STORAGE_I64,
+ TYPE_STORAGE_I8,
TYPE_STORAGE_INT,
TYPE_STORAGE_RUNE,
+ TYPE_STORAGE_SIZE,
+ TYPE_STORAGE_U16,
+ TYPE_STORAGE_U32,
+ TYPE_STORAGE_U64,
+ TYPE_STORAGE_U8,
TYPE_STORAGE_UINT,
TYPE_STORAGE_UINTPTR,
- TYPE_STORAGE_SIZE,
- TYPE_STORAGE_F32,
- TYPE_STORAGE_F64,
+ TYPE_STORAGE_VOID,
// Aggregate types
+ TYPE_STORAGE_ALIAS,
+ TYPE_STORAGE_ARRAY,
+ TYPE_STORAGE_FUNCTION,
+ TYPE_STORAGE_POINTER,
+ TYPE_STORAGE_SLICE,
TYPE_STORAGE_STRING,
+ TYPE_STORAGE_STRUCT,
+ TYPE_STORAGE_TAGGED_UNION,
+ TYPE_STORAGE_UNION,
};
const char *type_storage_unparse(enum type_storage storage);
diff --git a/src/lex.c b/src/lex.c
@@ -989,6 +989,18 @@ token_str(const struct token *tok)
break;
case TYPE_STORAGE_STRING:
return string_unparse(tok);
+ case TYPE_STORAGE_ALIAS:
+ case TYPE_STORAGE_ARRAY:
+ case TYPE_STORAGE_BOOL:
+ case TYPE_STORAGE_CHAR:
+ case TYPE_STORAGE_FUNCTION:
+ case TYPE_STORAGE_POINTER:
+ case TYPE_STORAGE_SLICE:
+ case TYPE_STORAGE_STRUCT:
+ case TYPE_STORAGE_TAGGED_UNION:
+ case TYPE_STORAGE_UNION:
+ case TYPE_STORAGE_VOID:
+ assert(0);
}
return buf;
default:;
diff --git a/src/parse.c b/src/parse.c
@@ -1,4 +1,5 @@
#include <assert.h>
+#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
@@ -17,6 +18,17 @@ struct parser {
};
static void
+synassert_msg(bool cond, const char *msg, struct token *tok)
+{
+ if (!cond) {
+ fprintf(stderr, "Syntax error: %s at %s:%d:%d ('%s')\n", msg,
+ tok->loc.path, tok->loc.lineno, tok->loc.colno,
+ token_str(tok));
+ exit(1);
+ }
+}
+
+static void
synassert(bool cond, struct token *tok, ...)
{
if (!cond) {
@@ -24,11 +36,10 @@ synassert(bool cond, struct token *tok, ...)
va_start(ap, tok);
enum lexical_token t = va_arg(ap, enum lexical_token);
- const char *s = token_str(tok);
fprintf(stderr,
- "Syntax error: unexpected '%s' at %s:%d:%d%s", s,
- tok->loc.path, tok->loc.lineno, tok->loc.colno,
- t == T_EOF ? "\n" : ", expected " );
+ "Syntax error: unexpected '%s' at %s:%d:%d%s",
+ token_str(tok), tok->loc.path, tok->loc.lineno,
+ tok->loc.colno, t == T_EOF ? "\n" : ", expected " );
while (t != T_EOF) {
if (t == T_LITERAL || t == T_NAME) {
fprintf(stderr, "%s", lexical_token_str(t));
@@ -142,6 +153,442 @@ parse_imports(struct parser *par, struct ast_subunit *subunit)
trleave(TR_PARSE, NULL);
}
+static void parse_type(struct parser *par, struct ast_type *type);
+
+static void
+parse_parameter_list(struct parser *par, struct ast_function_type *type)
+{
+ trenter(TR_PARSE, "parameter-list");
+ struct token tok = {0};
+ bool more = true;
+ struct ast_function_parameters **next = &type->parameters;
+ while (more) {
+ *next = calloc(1, sizeof(struct ast_function_parameters));
+ (*next)->type = calloc(1, sizeof(struct ast_type));
+ want(par, T_NAME, &tok);
+ (*next)->name = tok.name;
+ want(par, T_COLON, NULL);
+ parse_type(par, (*next)->type);
+ switch (lex(par->lex, &tok)) {
+ case T_COMMA:
+ switch (lex(par->lex, &tok)) {
+ case T_ELLIPSIS:
+ type->variadism = VARIADISM_HARE;
+ if (lex(par->lex, &tok) != T_COMMA) {
+ unlex(par->lex, &tok);
+ }
+ more = false;
+ break;
+ default:
+ unlex(par->lex, &tok);
+ next = &(*next)->next;
+ break;
+ }
+ break;
+ case T_ELLIPSIS:
+ type->variadism = VARIADISM_C;
+ if (lex(par->lex, &tok) != T_COMMA) {
+ unlex(par->lex, &tok);
+ }
+ more = false;
+ break;
+ default:
+ more = false;
+ unlex(par->lex, &tok);
+ break;
+ }
+ }
+ trleave(TR_PARSE, NULL);
+}
+
+static void
+parse_prototype(struct parser *par, struct ast_function_type *type)
+{
+ trenter(TR_PARSE, "prototype");
+ want(par, T_LPAREN, NULL);
+ struct token tok = {0};
+ if (lex(par->lex, &tok) != T_RPAREN) {
+ unlex(par->lex, &tok);
+ parse_parameter_list(par, type);
+ want(par, T_RPAREN, NULL);
+ }
+ type->result = calloc(1, sizeof(struct ast_type));
+ parse_type(par, type->result);
+ // TODO: unparse prototype
+ trleave(TR_PARSE, NULL);
+}
+
+static void
+parse_type(struct parser *par, struct ast_type *type)
+{
+ trenter(TR_PARSE, "type");
+ struct token tok = {0};
+ switch (lex(par->lex, &tok)) {
+ case T_CONST:
+ type->constant = true;
+ break;
+ default:
+ unlex(par->lex, &tok);
+ break;
+ }
+ switch (lex(par->lex, &tok)) {
+ case T_I8:
+ type->storage = TYPE_STORAGE_I8;
+ break;
+ case T_I16:
+ type->storage = TYPE_STORAGE_I16;
+ break;
+ case T_I32:
+ type->storage = TYPE_STORAGE_I32;
+ break;
+ case T_I64:
+ type->storage = TYPE_STORAGE_I64;
+ break;
+ case T_U8:
+ type->storage = TYPE_STORAGE_U8;
+ break;
+ case T_U16:
+ type->storage = TYPE_STORAGE_U16;
+ break;
+ case T_U32:
+ type->storage = TYPE_STORAGE_U32;
+ break;
+ case T_U64:
+ type->storage = TYPE_STORAGE_U64;
+ break;
+ case T_INT:
+ type->storage = TYPE_STORAGE_INT;
+ break;
+ case T_UINT:
+ type->storage = TYPE_STORAGE_UINT;
+ break;
+ case T_SIZE:
+ type->storage = TYPE_STORAGE_SIZE;
+ break;
+ case T_UINTPTR:
+ type->storage = TYPE_STORAGE_UINTPTR;
+ break;
+ case T_CHAR:
+ type->storage = TYPE_STORAGE_CHAR;
+ break;
+ case T_RUNE:
+ type->storage = TYPE_STORAGE_RUNE;
+ break;
+ case T_STR:
+ type->storage = TYPE_STORAGE_STRING;
+ break;
+ case T_F32:
+ type->storage = TYPE_STORAGE_F32;
+ break;
+ case T_F64:
+ type->storage = TYPE_STORAGE_F64;
+ break;
+ case T_BOOL:
+ type->storage = TYPE_STORAGE_BOOL;
+ break;
+ case T_VOID:
+ type->storage = TYPE_STORAGE_VOID;
+ break;
+ case T_ENUM:
+ assert(0); // TODO: Enums
+ case T_NULLABLE:
+ type->pointer.nullable = true;
+ want(par, T_TIMES, NULL);
+ /* fallthrough */
+ case T_TIMES:
+ type->storage = TYPE_STORAGE_POINTER;
+ type->pointer.referent = calloc(1, sizeof(struct ast_type));
+ parse_type(par, type->pointer.referent);
+ break;
+ case T_STRUCT:
+ case T_UNION:
+ assert(0); // TODO: Structs/unions
+ case T_LPAREN:
+ assert(0); // TODO: Tagged unions
+ case T_LBRACKET:
+ assert(0); // TODO: Slices/arrays
+ case T_ATTR_NORETURN:
+ type->function.noreturn = true;
+ want(par, T_FN, NULL);
+ /* fallthrough */
+ case T_FN:
+ type->storage = TYPE_STORAGE_FUNCTION;
+ parse_prototype(par, &type->function);
+ break;
+ default:
+ unlex(par->lex, &tok);
+ type->storage = TYPE_STORAGE_ALIAS;
+ parse_identifier(par, &type->alias);
+ break;
+ }
+ // TODO: unparse type
+ trleave(TR_PARSE, NULL);
+}
+
+static void
+parse_simple_expression(struct parser *par, struct ast_expression *exp)
+{
+ struct token tok = {0};
+ lex(par->lex, &tok);
+ assert(tok.token == T_LITERAL); // TODO: other simple expressions
+ exp->type = EXPR_CONSTANT;
+ exp->constant.storage = tok.storage;
+ switch (tok.storage) {
+ case TYPE_STORAGE_CHAR:
+ case TYPE_STORAGE_U8:
+ case TYPE_STORAGE_U16:
+ case TYPE_STORAGE_U32:
+ case TYPE_STORAGE_U64:
+ case TYPE_STORAGE_UINT:
+ case TYPE_STORAGE_UINTPTR:
+ case TYPE_STORAGE_SIZE:
+ exp->constant._unsigned = (uintmax_t)tok._unsigned;
+ break;
+ case TYPE_STORAGE_I8:
+ case TYPE_STORAGE_I16:
+ case TYPE_STORAGE_I32:
+ case TYPE_STORAGE_I64:
+ case TYPE_STORAGE_INT:
+ exp->constant._signed = (intmax_t)tok._signed;
+ break;
+ case TYPE_STORAGE_STRING:
+ exp->constant.string.len = tok.string.len;
+ exp->constant.string.value = tok.string.value;
+ break;
+ default:
+ assert(0); // TODO
+ }
+}
+
+static void
+parse_complex_expression(struct parser *par, struct ast_expression *exp)
+{
+ // TODO: other complex expressions
+ parse_simple_expression(par, exp);
+}
+
+static char *
+parse_attr_symbol(struct parser *par)
+{
+ struct token tok = {0};
+ want(par, T_LPAREN, NULL);
+ want(par, T_LITERAL, &tok);
+ synassert_msg(tok.storage == TYPE_STORAGE_STRING,
+ "expected string literal", &tok);
+ for (size_t i = 0; i < tok.string.len; i++) {
+ uint32_t c = tok.string.value[i];
+ synassert_msg(c <= 0x7F && (isalnum(c) || c == '_' || c == '$'
+ || c == '.'), "invalid symbol", &tok);
+ synassert_msg(i != 0 || (!isdigit(c) && c != '$'),
+ "invalid symbol", &tok);
+ }
+ want(par, T_RPAREN, NULL);
+ return tok.string.value;
+}
+
+static void
+parse_global_decl(struct parser *par, enum lexical_token mode,
+ struct ast_global_decl *decl)
+{
+ trenter(TR_PARSE, "global");
+ struct token tok = {0};
+ struct ast_global_decl *i = decl;
+ assert(mode == T_LET || mode == T_CONST || mode == T_DEF);
+ bool more = true;
+ while (more) {
+ if (mode == T_LET || mode == T_CONST) {
+ switch (lex(par->lex, &tok)) {
+ case T_ATTR_SYMBOL:
+ i->symbol = parse_attr_symbol(par);
+ break;
+ default:
+ unlex(par->lex, &tok);
+ break;
+ }
+ }
+ parse_identifier(par, &i->ident);
+ want(par, T_COLON, NULL);
+ parse_type(par, &i->type);
+ if (mode == T_CONST) {
+ i->type.constant = true;
+ }
+ want(par, T_EQUAL, NULL);
+ parse_simple_expression(par, &i->init);
+ switch (lex(par->lex, &tok)) {
+ case T_COMMA:
+ lex(par->lex, &tok);
+ if (tok.token == T_NAME || tok.token == T_ATTR_SYMBOL) {
+ i->next = calloc(1, sizeof(struct ast_global_decl));
+ i = i->next;
+ unlex(par->lex, &tok);
+ break;
+ }
+ /* fallthrough */
+ default:
+ more = false;
+ unlex(par->lex, &tok);
+ break;
+ }
+ }
+
+ for (struct ast_global_decl *i = decl; i; i = i->next) {
+ char ibuf[1024], tbuf[1024], ebuf[1024];
+ identifier_unparse_static(&i->ident, ibuf, sizeof(ibuf));
+ strncpy(tbuf, "[type]", sizeof(tbuf)); // TODO: unparse type
+ strncpy(ebuf, "[expr]", sizeof(ebuf)); // TODO: unparse expr
+ if (decl->symbol) {
+ trace(TR_PARSE, "%s @symbol(\"%s\") %s: %s = %s",
+ lexical_token_str(mode), decl->symbol, ibuf,
+ tbuf, ebuf);
+ } else {
+ trace(TR_PARSE, "%s %s: %s = [expr]",
+ lexical_token_str(mode), ibuf, tbuf);
+ }
+ }
+ trleave(TR_PARSE, NULL);
+}
+
+static void
+parse_type_decl(struct parser *par, struct ast_type_decl *decl)
+{
+ trenter(TR_PARSE, "typedef");
+ struct token tok = {0};
+ struct ast_type_decl *i = decl;
+ bool more = true;
+ while (more) {
+ parse_identifier(par, &i->ident);
+ want(par, T_EQUAL, NULL);
+ parse_type(par, &i->type);
+ switch (lex(par->lex, &tok)) {
+ case T_COMMA:
+ lex(par->lex, &tok);
+ if (lex(par->lex, &tok) == T_NAME) {
+ i->next = calloc(1, sizeof(struct ast_type_decl));
+ i = i->next;
+ unlex(par->lex, &tok);
+ break;
+ }
+ /* fallthrough */
+ default:
+ more = false;
+ unlex(par->lex, &tok);
+ break;
+ }
+ }
+
+ for (struct ast_type_decl *i = decl; i; i = i->next) {
+ char ibuf[1024], tbuf[1024];
+ identifier_unparse_static(&i->ident, ibuf, sizeof(ibuf));
+ strncpy(tbuf, "[type]", sizeof(tbuf)); // TODO: unparse type
+ trace(TR_PARSE, "def %s = %s", ibuf, tbuf);
+ }
+ trleave(TR_PARSE, NULL);
+}
+
+static void
+parse_fn_decl(struct parser *par, struct ast_function_decl *decl)
+{
+ trenter(TR_PARSE, "fn");
+ struct token tok = {0};
+ bool more = true;
+ while (more) {
+ switch (lex(par->lex, &tok)) {
+ case T_ATTR_FINI:
+ decl->flags |= FN_FINI;
+ break;
+ case T_ATTR_INIT:
+ decl->flags |= FN_INIT;
+ break;
+ case T_ATTR_SYMBOL:
+ decl->symbol = parse_attr_symbol(par);
+ break;
+ case T_ATTR_TEST:
+ decl->flags |= FN_TEST;
+ break;
+ case T_ATTR_NORETURN:
+ decl->prototype.noreturn = true;
+ break;
+ default:
+ more = false;
+ unlex(par->lex, &tok);
+ break;
+ }
+ }
+ want(par, T_FN, NULL);
+ parse_identifier(par, &decl->ident);
+ parse_prototype(par, &decl->prototype);
+ want(par, T_EQUAL, NULL);
+ parse_complex_expression(par, &decl->body);
+
+ char symbol[1024], buf[1024];
+ if (decl->symbol) {
+ snprintf(symbol, sizeof(symbol), "@symbol(\"%s\") ", decl->symbol);
+ }
+ identifier_unparse_static(&decl->ident, buf, sizeof(buf));
+ trace(TR_PARSE, "%s%s%s%s%sfn %s %s = %s",
+ decl->flags & FN_FINI ? "@fini " : "",
+ decl->flags & FN_INIT ? "@init " : "",
+ decl->prototype.noreturn ? "@noreturn " : "",
+ decl->flags & FN_TEST ? "@test " : "",
+ decl->symbol ? symbol : "", buf, "[prototype]", "[expr]");
+ trleave(TR_PARSE, NULL);
+}
+
+static void
+parse_decl(struct parser *par, struct ast_decl *decl)
+{
+ struct token tok = {0};
+ switch (lex(par->lex, &tok)) {
+ case T_CONST:
+ case T_LET:
+ decl->decl_type = AST_DECL_GLOBAL;
+ parse_global_decl(par, tok.token, &decl->global);
+ break;
+ case T_DEF:
+ decl->decl_type = AST_DECL_CONST;
+ parse_global_decl(par, tok.token, &decl->constant);
+ break;
+ case T_TYPE:
+ decl->decl_type = AST_DECL_TYPE;
+ parse_type_decl(par, &decl->type);
+ break;
+ default:
+ unlex(par->lex, &tok);
+ decl->decl_type = AST_DECL_FUNC;
+ parse_fn_decl(par, &decl->function);
+ break;
+ }
+}
+
+static void
+parse_decls(struct parser *par, struct ast_decls *decls)
+{
+ trenter(TR_PARSE, "decls");
+ struct token tok = {0};
+ struct ast_decls **next = &decls;
+ while (tok.token != T_EOF) {
+ switch (lex(par->lex, &tok)) {
+ case T_EXPORT:
+ (*next)->decl.exported = true;
+ trace(TR_PARSE, "export");
+ break;
+ default:
+ unlex(par->lex, &tok);
+ break;
+ }
+ parse_decl(par, &(*next)->decl);
+ next = &(*next)->next;
+ *next = calloc(1, sizeof(struct ast_decls));
+ want(par, T_SEMICOLON, NULL);
+ if (lex(par->lex, &tok) != T_EOF) {
+ unlex(par->lex, &tok);
+ }
+ }
+ free(*next);
+ *next = 0;
+ trleave(TR_PARSE, NULL);
+}
+
void
parse(struct lexer *lex, struct ast_subunit *subunit)
{
@@ -149,4 +596,6 @@ parse(struct lexer *lex, struct ast_subunit *subunit)
.lex = lex,
};
parse_imports(&par, subunit);
+ parse_decls(&par, &subunit->decls);
+ want(&par, T_EOF, NULL);
}