harec

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 8b9ba5c36380ea10047e4384ff03703d939dbc68
parent aba6bcec52f191372537f40d10d4c12a5888bdc8
Author: Drew DeVault <sir@cmpwn.com>
Date:   Fri, 19 Nov 2021 10:01:26 +0100

Overhaul allocation expressions

Prior to this change, allocation expressions changed their behavior
dramatically depending on context. This change significantly reduces the
effect of the type hint on the behavior of alloc, and uses distinct
grammatical constructs to address each case instead.

Allocations are now broken down into four categories:

- Objects		// alloc(42)
- Slice with capacity	// alloc([], 42);
- Slice with length	// alloc([0...], 42);
- Slice copies		// alloc(x...)

All of these previously lived under the same code path and used mostly
the same grammar, and had a lot of hacks to distinguish them. Now they
are distinguished during parse, except cap/length, which is done in
check, and is made easier/less hacky thanks to the previous array
expandability change.

There is likely to be at least one additional change to allocations in
the future: presently the nullable flag in the type hint is used to
determine if the allocation should abort on allocation failure or not.
In the future, we are likely to make OOM handling mandatory, so this
will change.

I also think there's a potential footgun in the alloc(x...) case,
because in some cases `alloc(x)` will compile but do something
unexpected -- allocate a *[]int which refers to the same underlying data
source, instead of creating a []int which allocates a separate copy of
the underlying data. It may be wise to revisit this later.

Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
Minclude/ast.h | 16++++++++++------
Minclude/expr.h | 23+++++++++++++++++------
Mrt/malloc.ha | 2+-
Msrc/check.c | 164+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Msrc/gen.c | 179+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Msrc/parse.c | 13+++++++++++--
Msrc/types.c | 7++++++-
Mtests/17-alloc.ha | 27++++++++++++++++++++++++++-
8 files changed, 326 insertions(+), 105 deletions(-)

diff --git a/include/ast.h b/include/ast.h @@ -97,6 +97,11 @@ struct ast_type { }; }; +struct ast_expression_list { + struct ast_expression *expr; + struct ast_expression_list *next; +}; + struct ast_expression_access { enum access_type type; union { @@ -117,7 +122,11 @@ struct ast_expression_access { }; struct ast_expression_alloc { - struct ast_expression *expr; + enum alloc_kind kind; + union { + struct ast_expression *init; + struct ast_expression_list *items; + }; struct ast_expression *cap; }; @@ -235,11 +244,6 @@ struct ast_expression_insert { bool is_static; }; -struct ast_expression_list { - struct ast_expression *expr; - struct ast_expression_list *next; -}; - struct ast_expression_compound { char *label; struct location label_loc; diff --git a/include/expr.h b/include/expr.h @@ -40,6 +40,11 @@ enum expr_type { EXPR_YIELD, }; +struct expressions { + struct expression *expr; + struct expressions *next; +}; + enum access_type { ACCESS_IDENTIFIER, ACCESS_INDEX, @@ -67,8 +72,19 @@ struct expression_access { }; }; +enum alloc_kind { + ALLOC_OBJECT, // alloc(42) + ALLOC_WITH_CAP, // alloc([], 42) + ALLOC_WITH_LEN, // alloc([0...], 42) + ALLOC_COPY, // alloc(x...); +}; + struct expression_alloc { - struct expression *expr; + enum alloc_kind kind; + union { + struct expression *init; + struct expressions *items; + }; struct expression *cap; }; @@ -154,11 +170,6 @@ struct expression_call { struct call_argument *args; }; -struct expressions { - struct expression *expr; - struct expressions *next; -}; - struct expression_compound { char *label; struct scope *scope; diff --git a/rt/malloc.ha b/rt/malloc.ha @@ -114,8 +114,8 @@ fn list_from_block(s: size, p: uintptr) nullable *void = { // Frees a pointer previously allocated with [malloc]. export @symbol("rt.free") fn free_(_p: nullable *void) void = { - nfree += 1; if (_p != null) { + nfree += 1; let p = _p: *void; let bsize = (p: uintptr - size(size): uintptr): *[1]size; let s = bsize[0]; diff --git a/src/check.c b/src/check.c @@ -274,79 +274,121 @@ check_expr_access(struct context *ctx, } static void -check_expr_alloc(struct context *ctx, +check_expr_alloc_init(struct context *ctx, const struct ast_expression *aexpr, struct expression *expr, const struct type *hint) { - assert(aexpr->type == EXPR_ALLOC); - expr->type = EXPR_ALLOC; - expr->alloc.expr = xcalloc(sizeof(struct expression), 1); - const struct type *inittype = NULL; - if (hint && type_dealias(hint)->storage == STORAGE_POINTER) { - inittype = type_dealias(hint)->pointer.referent; - } else if (hint && type_dealias(hint)->storage == STORAGE_SLICE) { - inittype = hint; + // alloc(initializer) case + int ptrflags = 0; + const struct type *inithint = NULL; + if (hint) { + const struct type *htype = type_dealias(hint); + switch (htype->storage) { + case STORAGE_POINTER: + inithint = htype->pointer.referent; + // TODO: Describe the use of pointer flags in the spec + ptrflags = htype->pointer.flags; + break; + case STORAGE_SLICE: + case STORAGE_ARRAY: + inithint = hint; + break; + default: + // The user's code is wrong here, but we'll let it fail + // later. + break; + } } - check_expression(ctx, aexpr->alloc.expr, expr->alloc.expr, inittype); - inittype = expr->alloc.expr->result; - int flags = 0; - if (hint && type_is_assignable(hint, inittype)) { - if (type_dealias(hint)->storage == STORAGE_SLICE) { - inittype = hint; - } else if (type_dealias(hint)->storage == STORAGE_POINTER) { - inittype = type_dealias(hint)->pointer.referent; - flags = type_dealias(hint)->pointer.flags; - } + check_expression(ctx, aexpr->alloc.init, expr->alloc.init, inithint); + + const struct type *objtype = expr->alloc.init->result; + expr->result = type_store_lookup_pointer(ctx->store, objtype, ptrflags); +} + +static void +check_expr_alloc_slice(struct context *ctx, + const struct ast_expression *aexpr, + struct expression *expr, + const struct type *hint) +{ + // alloc(init, capacity) case + check_expression(ctx, aexpr->alloc.init, expr->alloc.init, hint); + + const struct type *objtype = expr->alloc.init->result; + if (type_dealias(objtype)->storage != STORAGE_ARRAY + && type_dealias(objtype)->storage != STORAGE_SLICE) { + error(ctx, aexpr->alloc.init->loc, expr, + "Slice initializer must be of slice or array type, not %s", + type_storage_unparse(type_dealias(objtype)->storage)); + return; } - switch (type_dealias(inittype)->storage) { - case STORAGE_SLICE: - expr->result = inittype; - break; - case STORAGE_ARRAY: - if (aexpr->alloc.cap) { - expr->result = type_store_lookup_slice(ctx->store, - type_dealias(inittype)->array.members); - } else { - expr->result = type_store_lookup_pointer(ctx->store, - inittype, flags); - } - break; - default: - expr->result = type_store_lookup_pointer(ctx->store, - inittype, flags); - break; + const struct type *caphint = &builtin_type_size; + expr->alloc.cap = xcalloc(1, sizeof(struct expression)); + check_expression(ctx, aexpr->alloc.cap, expr->alloc.cap, caphint); + + const struct type *captype = expr->alloc.cap->result; + if (!type_is_assignable(&builtin_type_size, captype)) { + error(ctx, aexpr->alloc.cap->loc, expr, + "Slice capacity must be assignable to size"); + return; } - enum type_storage storage = type_dealias(expr->result)->storage; - switch (storage) { - case STORAGE_POINTER: - if (aexpr->alloc.cap != NULL) { - error(ctx, aexpr->alloc.cap->loc, expr, - "Allocation with capacity must be of slice type, not %s", - type_storage_unparse(storage)); - return; - } + const struct type *membtype = type_dealias(objtype)->array.members; + expr->result = type_store_lookup_slice(ctx->store, membtype); + + if (objtype->storage == STORAGE_ARRAY + && objtype->array.expandable) { + expr->alloc.kind = ALLOC_WITH_LEN; + } +} + +static void +check_expr_alloc_copy(struct context *ctx, + const struct ast_expression *aexpr, + struct expression *expr, + const struct type *hint) +{ + // alloc(init...) case + check_expression(ctx, aexpr->alloc.init, expr->alloc.init, hint); + + const struct type *result = expr->alloc.init->result; + if (result != hint) { + // TODO: We might be able to be less strict on this. We could + // copy slices of types which differ only based on the flag, or + // copy an array into a new slice. + error(ctx, aexpr->alloc.init->loc, expr, + "Cannot copy a slice to a slice of another type"); + return; + } + + expr->result = result; +} + +static void +check_expr_alloc(struct context *ctx, + const struct ast_expression *aexpr, + struct expression *expr, + const struct type *hint) +{ + assert(aexpr->type == EXPR_ALLOC); + expr->type = EXPR_ALLOC; + expr->alloc.init = xcalloc(1, sizeof(struct expression)); + expr->alloc.kind = aexpr->alloc.kind; + switch (aexpr->alloc.kind) { + case ALLOC_OBJECT: + check_expr_alloc_init(ctx, aexpr, expr, hint); break; - case STORAGE_SLICE: - if (aexpr->alloc.cap != NULL) { - expr->alloc.cap = xcalloc(sizeof(struct expression), 1); - check_expression(ctx, aexpr->alloc.cap, expr->alloc.cap, &builtin_type_size); - if (!type_is_assignable(&builtin_type_size, - expr->alloc.cap->result)) { - error(ctx, aexpr->alloc.cap->loc, expr, - "Allocation capacity must be assignable to size"); - return; - } - } + case ALLOC_WITH_CAP: + check_expr_alloc_slice(ctx, aexpr, expr, hint); break; - default: - error(ctx, aexpr->loc, expr, - "Allocation type must be pointer or slice, not %s", - type_storage_unparse(storage)); - return; + case ALLOC_COPY: + check_expr_alloc_copy(ctx, aexpr, expr, hint); + break; + case ALLOC_WITH_LEN: + abort(); // Not determined by parse } } diff --git a/src/gen.c b/src/gen.c @@ -380,7 +380,8 @@ gen_expr_access(struct gen_context *ctx, const struct expression *expr) static void gen_alloc_slice_at(struct gen_context *ctx, const struct expression *expr, - struct gen_value out) + struct gen_value out, + bool expand) { struct qbe_value qcap; if (expr->alloc.cap) { @@ -391,14 +392,14 @@ gen_alloc_slice_at(struct gen_context *ctx, struct gen_value init; struct qbe_value qinit; struct qbe_value length, initdata; - const struct type *inittype = type_dealias(expr->alloc.expr->result); + const struct type *inittype = type_dealias(expr->alloc.init->result); switch (inittype->storage) { case STORAGE_ARRAY: assert(inittype->array.length != SIZE_UNDEFINED); length = constl(inittype->array.length); break; case STORAGE_SLICE: - init = gen_expr(ctx, expr->alloc.expr); + init = gen_expr(ctx, expr->alloc.init); qinit = mkqval(ctx, &init); enum qbe_instr load = load_for_type(ctx, &builtin_type_size); initdata = mkqtmp(ctx, ctx->arch.ptr, ".%d"); @@ -446,7 +447,13 @@ gen_alloc_slice_at(struct gen_context *ctx, enum qbe_instr store = store_for_type(ctx, &builtin_type_size); pushi(ctx->current, NULL, store, &data, &base, NULL); pushi(ctx->current, &ptr, Q_ADD, &base, &offset, NULL); - pushi(ctx->current, NULL, store, &length, &ptr, NULL); + + if (expand) { + pushi(ctx->current, NULL, store, &qcap, &ptr, NULL); + } else { + pushi(ctx->current, NULL, store, &length, &ptr, NULL); + } + offset = constl(builtin_type_size.size * 2); pushi(ctx->current, &ptr, Q_ADD, &base, &offset, NULL); pushi(ctx->current, NULL, store, &qcap, &ptr, NULL); @@ -457,34 +464,41 @@ gen_alloc_slice_at(struct gen_context *ctx, .type = inittype, .name = data.name, }; - gen_expr_at(ctx, expr->alloc.expr, storage); + gen_expr_at(ctx, expr->alloc.init, storage); } else { struct qbe_value rtmemcpy = mkrtfunc(ctx, "rt.memcpy"); pushi(ctx->current, NULL, Q_CALL, &rtmemcpy, &data, &initdata, &size, NULL); } + + if (!expand) { + return; + } + + struct qbe_value last = mkqtmp(ctx, ctx->arch.ptr, ".%d"); + struct qbe_value next = mkqtmp(ctx, ctx->arch.ptr, ".%d"); + pushi(ctx->current, &next, Q_MUL, &length, &isize, NULL); + pushi(ctx->current, &next, Q_ADD, &next, &data, NULL); + pushi(ctx->current, &last, Q_SUB, &next, &isize, NULL); + struct qbe_value remain = mkqtmp(ctx, ctx->arch.sz, ".%d"); + pushi(ctx->current, &remain, Q_SUB, &qcap, &length, NULL); + pushi(ctx->current, &remain, Q_MUL, &remain, &isize, NULL); + struct qbe_value rtmemcpy = mkrtfunc(ctx, "rt.memcpy"); + pushi(ctx->current, NULL, Q_CALL, &rtmemcpy, &next, &last, &remain, NULL); } static struct gen_value -gen_expr_alloc_with(struct gen_context *ctx, +gen_expr_alloc_init_with(struct gen_context *ctx, const struct expression *expr, struct gen_value *out) { - if (type_dealias(expr->result)->storage == STORAGE_SLICE) { - if (out) { - gen_alloc_slice_at(ctx, expr, *out); - return gv_void; - } - struct gen_value temp = mktemp(ctx, expr->result, "object.%d"); - struct qbe_value base = mkqval(ctx, &temp); - struct qbe_value sz = constl(expr->result->size); - enum qbe_instr alloc = alloc_for_align(expr->result->align); - pushprei(ctx->current, &base, alloc, &sz, NULL); - gen_alloc_slice_at(ctx, expr, temp); - return temp; - } + // alloc(init) case assert(expr->alloc.cap == NULL); - struct qbe_value sz = constl(type_dereference(expr->result)->size); + const struct type *objtype = type_dealias(expr->result); + assert(objtype->storage == STORAGE_POINTER); + objtype = objtype->pointer.referent; + + struct qbe_value sz = constl(objtype->size); struct gen_value result = mktemp(ctx, expr->result, ".%d"); struct qbe_value qresult = mkqval(ctx, &result); struct qbe_value rtfunc = mkrtfunc(ctx, "rt.malloc"); @@ -503,10 +517,10 @@ gen_expr_alloc_with(struct gen_context *ctx, struct gen_value object = { .kind = GV_TEMP, - .type = type_dereference(expr->result), + .type = objtype, .name = result.name, }; - gen_expr_at(ctx, expr->alloc.expr, object); + gen_expr_at(ctx, expr->alloc.init, object); if (out) { gen_store(ctx, *out, result); } @@ -514,6 +528,107 @@ gen_expr_alloc_with(struct gen_context *ctx, } static struct gen_value +gen_expr_alloc_slice_with(struct gen_context *ctx, + const struct expression *expr, struct gen_value *out) +{ + // alloc(init, cap | len) case + bool expand = expr->alloc.kind == ALLOC_WITH_LEN; + if (out) { + gen_alloc_slice_at(ctx, expr, *out, expand); + return gv_void; + } + struct gen_value temp = mktemp(ctx, expr->result, "object.%d"); + struct qbe_value base = mkqval(ctx, &temp); + struct qbe_value sz = constl(expr->result->size); + enum qbe_instr alloc = alloc_for_align(expr->result->align); + pushprei(ctx->current, &base, alloc, &sz, NULL); + gen_alloc_slice_at(ctx, expr, temp, expand); + return temp; +} + +static struct gen_value +gen_expr_alloc_copy_with(struct gen_context *ctx, + const struct expression *expr, struct gen_value *out) +{ + // alloc(init...) case + struct gen_value ret = gv_void; + if (out == NULL) { + ret = mktemp(ctx, expr->result, "object.%d"); + out = &ret; + + struct qbe_value base = mkqval(ctx, out); + struct qbe_value sz = constl(expr->result->size); + enum qbe_instr alloc = alloc_for_align(expr->result->align); + pushprei(ctx->current, &base, alloc, &sz, NULL); + } + + enum qbe_instr load = load_for_type(ctx, &builtin_type_size); + enum qbe_instr store = store_for_type(ctx, &builtin_type_size); + + struct gen_value src = gen_expr(ctx, expr->alloc.init); + struct qbe_value sbase = mkcopy(ctx, &src, ".%d"); + struct qbe_value dbase = mkcopy(ctx, out, ".%d"); + struct qbe_value srcdata = mkqtmp(ctx, ctx->arch.sz, ".%d"); + pushi(ctx->current, &srcdata, load, &sbase, NULL); + struct qbe_value offs = constl(builtin_type_size.size); + pushi(ctx->current, &sbase, Q_ADD, &sbase, &offs, NULL); + + struct qbe_value length = mkqtmp(ctx, ctx->arch.sz, ".%d"); + pushi(ctx->current, &length, load, &sbase, NULL); + + struct qbe_statement linvalid, lvalid, lalloc; + struct qbe_value balloc = mklabel(ctx, &lalloc, ".%d"); + struct qbe_value binvalid = mklabel(ctx, &linvalid, ".%d"); + struct qbe_value bvalid = mklabel(ctx, &lvalid, ".%d"); + + struct qbe_value newdata = mkqtmp(ctx, ctx->arch.ptr, ".%d"); + struct qbe_value zero = constl(0); + pushi(ctx->current, &newdata, Q_COPY, &zero, NULL); + pushi(ctx->current, NULL, Q_JNZ, &length, &balloc, &bvalid, NULL); + push(&ctx->current->body, &lalloc); + + const struct type *membtype = + type_dealias(expr->alloc.init->result)->array.members; + struct qbe_value sz = mkqtmp(ctx, ctx->arch.sz, ".%d"); + struct qbe_value membsz = constl(membtype->size); + pushi(ctx->current, &sz, Q_MUL, &membsz, &length, NULL); + + struct qbe_value rtfunc = mkrtfunc(ctx, "rt.malloc"); + pushi(ctx->current, &newdata, Q_CALL, &rtfunc, &sz, NULL); + + pushi(ctx->current, NULL, Q_JNZ, &newdata, &bvalid, &binvalid, NULL); + push(&ctx->current->body, &linvalid); + gen_fixed_abort(ctx, expr->loc, ABORT_ALLOC_FAILURE); + push(&ctx->current->body, &lvalid); + + pushi(ctx->current, NULL, store, &newdata, &dbase, NULL); + pushi(ctx->current, &dbase, Q_ADD, &dbase, &offs, NULL); + pushi(ctx->current, NULL, store, &length, &dbase, NULL); + pushi(ctx->current, &dbase, Q_ADD, &dbase, &offs, NULL); + pushi(ctx->current, NULL, store, &length, &dbase, NULL); + + struct qbe_value rtmemcpy = mkrtfunc(ctx, "rt.memcpy"); + pushi(ctx->current, NULL, Q_CALL, &rtmemcpy, &newdata, &srcdata, &sz, NULL); + return ret; +} + +static struct gen_value +gen_expr_alloc_with(struct gen_context *ctx, + const struct expression *expr, struct gen_value *out) +{ + switch (expr->alloc.kind) { + case ALLOC_OBJECT: + return gen_expr_alloc_init_with(ctx, expr, out); + case ALLOC_WITH_CAP: + case ALLOC_WITH_LEN: + return gen_expr_alloc_slice_with(ctx, expr, out); + case ALLOC_COPY: + return gen_expr_alloc_copy_with(ctx, expr, out); + } + abort(); // Unreachable +} + +static struct gen_value gen_expr_append(struct gen_context *ctx, const struct expression *expr) { struct gen_value slice = gen_expr(ctx, expr->append.expr); @@ -1019,6 +1134,9 @@ gen_expr_cast_slice_at(struct gen_context *ctx, { const struct type *to = expr->result, *from = type_dealias(expr->cast.value->result); + if (from->storage == STORAGE_POINTER) { + from = type_dealias(from->pointer.referent); + } assert(from->storage == STORAGE_ARRAY); assert(from->array.length != SIZE_UNDEFINED); @@ -1147,9 +1265,16 @@ cast_prefers_at(const struct expression *expr) return true; } // array => slice - if (type_dealias(from)->storage == STORAGE_ARRAY && - type_dealias(to)->storage == STORAGE_SLICE) { - return true; + if (type_dealias(to)->storage == STORAGE_SLICE) { + switch (type_dealias(from)->storage) { + case STORAGE_ARRAY: + return true; + case STORAGE_POINTER: + from = type_dealias(from)->pointer.referent; + return type_dealias(from)->storage == STORAGE_ARRAY; + default: + return false; + } } return false; } @@ -1722,9 +1847,9 @@ gen_expr_for(struct gen_context *ctx, const struct expression *expr) static struct gen_value gen_expr_free(struct gen_context *ctx, const struct expression *expr) { - const struct type *type = type_dealias(expr->alloc.expr->result); + const struct type *type = type_dealias(expr->free.expr->result); struct qbe_value rtfunc = mkrtfunc(ctx, "rt.free"); - struct gen_value val = gen_expr(ctx, expr->alloc.expr); + struct gen_value val = gen_expr(ctx, expr->free.expr); struct qbe_value qval = mkqval(ctx, &val); if (type->storage == STORAGE_SLICE || type->storage == STORAGE_STRING) { struct qbe_value lval = mklval(ctx, &val); diff --git a/src/parse.c b/src/parse.c @@ -1237,17 +1237,26 @@ parse_allocation_expression(struct lexer *lexer) case T_ALLOC: exp = mkexpr(&tok.loc); exp->type = EXPR_ALLOC; + exp->alloc.kind = ALLOC_OBJECT; want(lexer, T_LPAREN, NULL); - exp->alloc.expr = parse_expression(lexer); + exp->alloc.init = parse_expression(lexer); switch (lex(lexer, &tok)) { case T_COMMA: + // alloc(init, cap) exp->alloc.cap = parse_expression(lexer); + exp->alloc.kind = ALLOC_WITH_CAP; + want(lexer, T_RPAREN, NULL); + break; + case T_ELLIPSIS: + // alloc(init...) + exp->alloc.kind = ALLOC_COPY; want(lexer, T_RPAREN, NULL); break; case T_RPAREN: + // alloc(init) break; default: - synassert(false, &tok, T_COMMA, T_RPAREN, T_EOF); + synassert(false, &tok, T_COMMA, T_RPAREN, T_ELLIPSIS, T_EOF); } break; case T_FREE: diff --git a/src/types.c b/src/types.c @@ -634,7 +634,12 @@ type_is_assignable(const struct type *to, const struct type *from) case STORAGE_VOID: return true; case STORAGE_SLICE: - from = type_dealias(from); + if (from->storage == STORAGE_POINTER) { + from = type_dealias(from->pointer.referent); + if (from->storage != STORAGE_ARRAY) { + return false; + } + } if (from->storage != STORAGE_ARRAY && from->storage != STORAGE_SLICE) { return false; diff --git a/tests/17-alloc.ha b/tests/17-alloc.ha @@ -68,13 +68,38 @@ fn slice() void = { }; free(y); - let z: []int = alloc([]); + let z: []int = []; let p = &z: *struct { data: nullable *[*]int, length: size, capacity: size, }; assert(p.data == null && p.length == 0 && p.capacity == 0); + + let x: []int = alloc([1, 2, 3...], 42); + defer free(x); + assert(x[0] == 1); + assert(x[1] == 2); + assert(x[2] == 3); + for (let i = 2z; i < len(x); i += 1) { + assert(x[i] == 3); + }; +}; + +fn slice_copy() void = { + let x: []int = [1, 2, 3]; + + let p: []int = alloc(x...); + defer free(p); + assert(p: *[*]int != x: *[*]int); + assert(len(p) == len(x)); + for (let i = 0z; i < len(p); i += 1) { + assert(p[i] == x[i]); + }; + + let q: *[]int = alloc(x); + defer free(q); + assert((*q): *[*]int == x: *[*]int); }; fn string() void = {