hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 6a7563fc01cea51f96914a72db7c7a01544bcdeb
parent 5bfc6935a53af99880e9220f990b458b29177c90
Author: Alexey Yerin <yyp@disroot.org>
Date:   Wed,  6 Sep 2023 21:33:01 +0300

sort: Implement powersort for large arrays

Signed-off-by: Alexey Yerin <yyp@disroot.org>

Diffstat:
Mscripts/gen-stdlib | 2+-
Msort/sort.ha | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mstdlib.mk | 4++--
3 files changed, 174 insertions(+), 6 deletions(-)

diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -1353,7 +1353,7 @@ sort() { gensrcs_sort \ +test.ha fi - gen_ssa sort strings types + gen_ssa sort math strings types } strconv() { diff --git a/sort/sort.ha b/sort/sort.ha @@ -2,6 +2,10 @@ // (c) 2021 Drew DeVault <sir@cmpwn.com> // (c) 2021 Steven Guikal <void@fluix.one> +use math; +use types; +use rt; + // Sorts a slice of items in place. This function provides a stable sort - // relative order of equal elements is preserved. // @@ -12,9 +16,7 @@ export fn sort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = { insort(items, itemsz, cmp); return; }; - - // TODO: Timsort - insort(items, itemsz, cmp); + powersort(items, itemsz, cmp); }; // Sorts a slice of strings in place. Sorting is done with respect to Unicode @@ -79,3 +81,169 @@ fn insort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = { }; }; }; + +// Based on paper "Nearly-Optimal Mergesorts: Fast, Practical Sorting Methods +// That Optimally Adapt to Existing Runs"; J. Ian Munro, Sebastian Wild +// +// https://arxiv.org/pdf/1805.04154.pdf + +def MINRUN: size = 24; // FIXME: needs tuning +def EMPTY: size = -1z; + +// A run of non-decreasing elements on the interval [start; end). +type run = struct { + start: size, // Set to EMPTY when a run is merged + end: size, +}; + +fn powersort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = { + // npowers = floor(log2(n)) + 1 + const npowers = math::bit_size_u64(len(items)) + 1; + const runs: []run = alloc([run { start = EMPTY, ... }...], npowers + 1); + defer free(runs); + let top = 0u8; + + const aux: []u8 = alloc([0...], len(items) * itemsz); + defer free(aux); + + let a = run { + start = 0z, + end = extend(items, itemsz, cmp, 0), + }; + const length = a.end - a.start; + if (length < MINRUN) { + a.end = if (a.start + MINRUN < len(items)) + a.start + MINRUN else len(items); + insort(cut(items, itemsz, a.start, a.end), itemsz, cmp); + }; + for (a.end < len(items)) { + let b = run { + start = a.end, + end = extend(items, itemsz, cmp, a.end), + }; + const length = b.end - b.start; + if (length < MINRUN) { + b.end = if (b.start + MINRUN < len(items)) + b.start + MINRUN else len(items); + insort(cut(items, itemsz, b.start, b.end), itemsz, cmp); + }; + const k = node_power(0, len(items), a.start, b.start, b.end); + assert(k != top); + for (let i = top; i > k; i -= 1) { + if (runs[i].start == EMPTY) continue; + merge(items, itemsz, cmp, aux, + runs[i].start, runs[i].end, a.end); + + a.start = runs[i].start; + runs[i].start = EMPTY; + }; + runs[k] = a; + top = k; + + a = b; + }; + assert(a.end == len(items)); + for (let i = top; i > 0; i -= 1) { + if (runs[i].start == EMPTY) continue; + merge(items, itemsz, cmp, aux, + runs[i].start, runs[i].end, len(items)); + }; +}; + +// Returns 'end' such that [start; end) in 'items' is non-decreasing +// +// a[0] ≤ a[1] ≤ ... ≤ a[n - 1] - kept as-is +// a[1] > a[1] > ... > a[n - 1] - reversed +// +// Note: reversing a sequence with equal elements will move their relative +// locations, which is undesirable for a stable sort. +fn extend(items: []opaque, itemsz: size, cmp: *cmpfunc, start: size) size = { + const n = len(items); + const items = (items: *[*]u8)[..len(items) * itemsz]; + + assert(n - start > 0, "Empty extension"); + if (start + 1 == n) { + return n; + }; + + if (cmp(&items[start * itemsz], &items[(start + 1) * itemsz]) <= 0) { + let end = start + 2; + for (end < n && cmp(&items[(end - 1) * itemsz], &items[end * itemsz]) <= 0) { + end += 1; + }; + return end; + } else { + let end = start + 2; + for (end < n && cmp(&items[(end - 1) * itemsz], &items[end * itemsz]) > 0) { + end += 1; + }; + reverse(cut(items, itemsz, start, end), itemsz); + return end; + }; +}; + +fn reverse(items: []opaque, itemsz: size) void = { + const n = len(items); + const items = (items: *[*]u8)[..n * itemsz]; + for (let i = 0z; i < n / 2; i += 1) { + swap(&items[i * itemsz], &items[(n - i - 1) * itemsz], itemsz); + }; +}; + +fn merge( + items: []opaque, + itemsz: size, + cmp: *cmpfunc, + aux: []u8, + l: size, + m: size, + r: size, +) void = { + l *= itemsz; + m *= itemsz; + r *= itemsz; + + const items = items: *[*]u8; + const aux = aux: *[*]u8; + // Placing items at the beginning results in better cache performance + // (probably) + aux[..m - l] = items[l..m]; + + let i = 0z, j = m, out = l; + for (i < m - l && j < r; out += itemsz) { + if (cmp(&aux[i], &items[j]) < 0) { + rt::memmove(&items[out], &aux[i], itemsz); + i += itemsz; + } else { + rt::memmove(&items[out], &items[j], itemsz); + j += itemsz; + }; + }; + if (i < m - l) { + const sz = (m - l) - i; + rt::memmove(&items[out], &aux[i], sz); + out += sz; + }; + if (j < r) { + const sz = r - j; + rt::memmove(&items[out], &items[j], sz); + out += sz; + }; +}; + +fn cut(items: []opaque, itemsz: size, l: size, r: size) []opaque = { + return *(&types::slice { + data = &(items: *[*]u8)[l * itemsz], + length = r - l, + capacity = 0, + }: *[]opaque); +}; + +fn node_power(left: size, right: size, start_a: size, start_b: size, end_b: size) u8 = { + const n: u64 = right - left; + const l: u64 = start_a + start_b - 2 * left; + const r: u64 = start_b + end_b - 2 * left; + const a = ((l << 30) / n): u32; + const b = ((r << 30) / n): u32; + return math::leading_zeros_u32(a ^ b); +}; diff --git a/stdlib.mk b/stdlib.mk @@ -2058,7 +2058,7 @@ stdlib_sort_any_srcs = \ $(STDLIB)/sort/sort.ha \ $(STDLIB)/sort/types.ha -$(HARECACHE)/sort/sort-any.ssa: $(stdlib_sort_any_srcs) $(stdlib_rt) $(stdlib_strings_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) +$(HARECACHE)/sort/sort-any.ssa: $(stdlib_sort_any_srcs) $(stdlib_rt) $(stdlib_math_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/sort @$(stdlib_env) $(HAREC) $(HAREFLAGS) -o $@ -Nsort \ @@ -4566,7 +4566,7 @@ testlib_sort_any_srcs = \ $(STDLIB)/sort/types.ha \ $(STDLIB)/sort/+test.ha -$(TESTCACHE)/sort/sort-any.ssa: $(testlib_sort_any_srcs) $(testlib_rt) $(testlib_strings_$(PLATFORM)) $(testlib_types_$(PLATFORM)) +$(TESTCACHE)/sort/sort-any.ssa: $(testlib_sort_any_srcs) $(testlib_rt) $(testlib_math_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/sort @$(testlib_env) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nsort \