commit ddce9e82212b3ba80e1de93c667d5d2604470488
parent b77b8f13f13852e2a799b8fa4fc6f223dd867e20
Author: Eyal Sawady <ecs@d2evs.net>
Date: Tue, 20 Apr 2021 18:36:28 -0400
gen: improve efficiency of gen_copy
For small (<128 bytes) copies, use repeated load/stores. For large
copies, call rt::memcpy. 128 is used because that's what clang appears
to do.
This nearly doubles (0.22s to 0.12s) the speed of my test case, parsing
hare/parse/*.ha using hare::parse from a bufio::buffered. The bottleneck
is now in the rt::memcpy from slice assignment in bufio::buffered_read.
Co-authored-by: Michael Forney <mforney@mforney.org>
Diffstat:
M | src/gen.c | | | 64 | ++++++++++++++++++++++++++++++++++++++++++++-------------------- |
1 file changed, 44 insertions(+), 20 deletions(-)
diff --git a/src/gen.c b/src/gen.c
@@ -190,31 +190,55 @@ gen_copy(struct gen_context *ctx,
pushc(ctx->current, "begin gen_copy for type %s (is_union? %d)",
dest->type->name, dest->type->is_union);
- struct qbe_value temp = {0}, destp = {0}, srcp = {0}, size = {0};
- gen_temp(ctx, &temp, &qbe_long, "temp.%d");
+ struct qbe_value destp = {0}, srcp = {0};
gen_temp(ctx, &destp, &qbe_long, "dest.%d");
gen_temp(ctx, &srcp, &qbe_long, "src.%d");
pushi(ctx->current, &destp, Q_COPY, dest, NULL);
pushi(ctx->current, &srcp, Q_COPY, src, NULL);
- // TODO: It would be nice to have a more efficient builtin for
- // this, especially given that copying around unions is an
- // important feature of Hare
- //
- // NOTE: I suspect that this code may be subtly wrong for some reason
- // when handling union types. If you have written the full set of test
- // cases for struct and union types, and ended up here: when you figure
- // it out, please examine the version of this function from
- // 1d12f4143e87548b8f876f7ecd336c8eb0255679 and see if you can backport
- // it with your fix applied. That version was much more efficient than
- // this is.
- struct qbe_value rtfunc = {0};
- rtfunc.kind = QV_GLOBAL;
- rtfunc.name = strdup("rt.memcpy");
- rtfunc.type = &qbe_long;
- constl(&size, dest->type->size);
- pushi(ctx->current, NULL, Q_CALL, &rtfunc,
- &destp, &srcp, &size, NULL);
+ if (dest->type->size > 128) {
+ struct qbe_value rtfunc = {0}, size = {0};
+ rtfunc.kind = QV_GLOBAL;
+ rtfunc.name = strdup("rt.memcpy");
+ rtfunc.type = &qbe_long;
+ constl(&size, dest->type->size);
+ pushi(ctx->current, NULL, Q_CALL, &rtfunc,
+ &destp, &srcp, &size, NULL);
+ } else {
+ enum qbe_instr load, store;
+ struct qbe_value temp = {0}, align = {0};
+ assert(dest->type->align
+ && (dest->type->align & (dest->type->align - 1)) == 0);
+ switch (dest->type->align) {
+ case 1:
+ load = Q_LOADUB;
+ store = Q_STOREB;
+ break;
+ case 2:
+ load = Q_LOADUH;
+ store = Q_STOREH;
+ break;
+ case 4:
+ load = Q_LOADUW;
+ store = Q_STOREW; break;
+ default:
+ assert(dest->type->align == 8);
+ load = Q_LOADL;
+ store = Q_STOREL;
+ break;
+ }
+ gen_temp(ctx, &temp, &qbe_long, "temp.%d");
+ constl(&align, dest->type->align);
+ for (size_t offset = 0; offset < dest->type->size;
+ offset += dest->type->align) {
+ pushi(ctx->current, &temp, load, &srcp, NULL);
+ pushi(ctx->current, NULL, store, &temp, &destp, NULL);
+ pushi(ctx->current, &srcp, Q_ADD, &srcp, &align, NULL);
+ pushi(ctx->current, &destp, Q_ADD, &destp, &align, NULL);
+ }
+ }
+
+ pushc(ctx->current, "end gen_copy for type %s", dest->type->name);
}
static void