hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

sort.ha (6058B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use math;
      5 use types;
      6 
      7 // Sorts a slice of items in place. This function provides a stable sort -
      8 // relative order of equal elements is preserved.
      9 //
     10 // Note that this function may (temporarily) allocate, and will abort on
     11 // allocation failure.
     12 export fn sort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = {
     13 	if (len(items) < 256) {
     14 		insort(items, itemsz, cmp);
     15 		return;
     16 	};
     17 	powersort(items, itemsz, cmp);
     18 };
     19 
     20 // Checks if all of the items in a slice are sorted.
     21 export fn sorted(items: []opaque, itemsz: size, cmp: *cmpfunc) bool = {
     22 	let ba = items: *[*]u8;
     23 	for (let i = 1z; i < len(items); i += 1) {
     24 		if (cmp(&ba[(i - 1) * itemsz], &ba[i * itemsz]) > 0) {
     25 			return false;
     26 		};
     27 	};
     28 	return true;
     29 };
     30 
     31 fn swap(a: *opaque, b: *opaque, sz: size) void = {
     32 	let a = a: *[*]u8, b = b: *[*]u8;
     33 	for (let i = 0z; i < sz; i += 1) {
     34 		let c = a[i];
     35 		a[i] = b[i];
     36 		b[i] = c;
     37 	};
     38 };
     39 
     40 // Finds the index of the rightmost value that is equal to key or, if such value
     41 // does not exist, less than key.
     42 fn search_rightmost(
     43 	in: []opaque,
     44 	sz: size,
     45 	key: const *opaque,
     46 	cmp: *cmpfunc,
     47 ) size = {
     48 	let l = 0z;
     49 	let r = len(in);
     50 	let ba = in: *[*]u8;
     51 	for (l < r) {
     52 		let m = l + (r - l) / 2;
     53 		if (cmp(key, &ba[m * sz]) < 0) {
     54 			r = m;
     55 		} else {
     56 			l = m + 1;
     57 		};
     58 	};
     59 	return r - 1;
     60 };
     61 
     62 fn insort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = {
     63 	let ba = items: *[*]u8;
     64 	for (let i = 0z; i < len(items); i += 1) {
     65 		let bound = search_rightmost(items[0..i], itemsz,
     66 			&ba[i * itemsz], cmp);
     67 		for (let j = i; j > bound + 1; j -= 1) {
     68 			let a = &ba[(j - 1) * itemsz];
     69 			let b = &ba[j * itemsz];
     70 			swap(a, b, itemsz);
     71 		};
     72 	};
     73 };
     74 
     75 // Based on paper "Nearly-Optimal Mergesorts: Fast, Practical Sorting Methods
     76 // That Optimally Adapt to Existing Runs"; J. Ian Munro, Sebastian Wild
     77 //
     78 // https://arxiv.org/pdf/1805.04154.pdf
     79 
     80 def MINRUN: size = 24; // FIXME: needs tuning
     81 def EMPTY: size = -1z;
     82 
     83 // A run of non-decreasing elements on the interval [start; end).
     84 type run = struct {
     85 	start: size, // Set to EMPTY when a run is merged
     86 	end: size,
     87 };
     88 
     89 fn powersort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = {
     90 	// npowers = floor(log2(n)) + 1
     91 	const npowers = math::bit_size_u64(len(items)) + 1;
     92 	const runs: []run = alloc([run { start = EMPTY, ... }...], npowers + 1);
     93 	defer free(runs);
     94 	let top = 0u8;
     95 
     96 	const aux: []u8 = alloc([0...], len(items) * itemsz);
     97 	defer free(aux);
     98 
     99 	let a = run {
    100 		start = 0z,
    101 		end = extend(items, itemsz, cmp, 0),
    102 	};
    103 	const length = a.end - a.start;
    104 	if (length < MINRUN) {
    105 		a.end = if (a.start + MINRUN < len(items))
    106 			a.start + MINRUN else len(items);
    107 		insort(cut(items, itemsz, a.start, a.end), itemsz, cmp);
    108 	};
    109 	for (a.end < len(items)) {
    110 		let b = run {
    111 			start = a.end,
    112 			end = extend(items, itemsz, cmp, a.end),
    113 		};
    114 		const length = b.end - b.start;
    115 		if (length < MINRUN) {
    116 			b.end = if (b.start + MINRUN < len(items))
    117 				b.start + MINRUN else len(items);
    118 			insort(cut(items, itemsz, b.start, b.end), itemsz, cmp);
    119 		};
    120 		const k = node_power(0, len(items), a.start, b.start, b.end);
    121 		assert(k != top);
    122 		for (let i = top; i > k; i -= 1) {
    123 			if (runs[i].start == EMPTY) continue;
    124 			merge(items, itemsz, cmp, aux,
    125 				runs[i].start, runs[i].end, a.end);
    126 
    127 			a.start = runs[i].start;
    128 			runs[i].start = EMPTY;
    129 		};
    130 		runs[k] = a;
    131 		top = k;
    132 
    133 		a = b;
    134 	};
    135 	assert(a.end == len(items));
    136 	for (let i = top; i > 0; i -= 1) {
    137 		if (runs[i].start == EMPTY) continue;
    138 		merge(items, itemsz, cmp, aux,
    139 			runs[i].start, runs[i].end, len(items));
    140 	};
    141 };
    142 
    143 // Returns 'end' such that [start; end) in 'items' is non-decreasing
    144 //
    145 //     a[0] ≤ a[1] ≤ ... ≤ a[n - 1] - kept as-is
    146 //     a[1] > a[1] > ... > a[n - 1] - reversed
    147 //
    148 // Note: reversing a sequence with equal elements will move their relative
    149 // locations, which is undesirable for a stable sort.
    150 fn extend(items: []opaque, itemsz: size, cmp: *cmpfunc, start: size) size = {
    151 	const n = len(items);
    152 	const items = (items: *[*]u8)[..len(items) * itemsz];
    153 
    154 	assert(n - start > 0, "Empty extension");
    155 	if (start + 1 == n) {
    156 		return n;
    157 	};
    158 
    159 	if (cmp(&items[start * itemsz], &items[(start + 1) * itemsz]) <= 0) {
    160 		let end = start + 2;
    161 		for (end < n && cmp(&items[(end - 1) * itemsz], &items[end * itemsz]) <= 0) {
    162 			end += 1;
    163 		};
    164 		return end;
    165 	} else {
    166 		let end = start + 2;
    167 		for (end < n && cmp(&items[(end - 1) * itemsz], &items[end * itemsz]) > 0) {
    168 			end += 1;
    169 		};
    170 		reverse(cut(items, itemsz, start, end), itemsz);
    171 		return end;
    172 	};
    173 };
    174 
    175 fn reverse(items: []opaque, itemsz: size) void = {
    176 	const n = len(items);
    177 	const items = (items: *[*]u8)[..n * itemsz];
    178 	for (let i = 0z; i < n / 2; i += 1) {
    179 		swap(&items[i * itemsz], &items[(n - i - 1) * itemsz], itemsz);
    180 	};
    181 };
    182 
    183 fn merge(
    184 	items: []opaque,
    185 	itemsz: size,
    186 	cmp: *cmpfunc,
    187 	aux: []u8,
    188 	l: size,
    189 	m: size,
    190 	r: size,
    191 ) void = {
    192 	l *= itemsz;
    193 	m *= itemsz;
    194 	r *= itemsz;
    195 
    196 	const items = items: *[*]u8;
    197 	// Placing items at the beginning results in better cache performance
    198 	// (probably)
    199 	aux[..m - l] = items[l..m];
    200 
    201 	let i = 0z, j = m, out = l;
    202 	for (i < m - l && j < r; out += itemsz) {
    203 		if (cmp(&aux[i], &items[j]) < 0) {
    204 			items[out..out + itemsz] = aux[i..i + itemsz];
    205 			i += itemsz;
    206 		} else {
    207 			items[out..out + itemsz] = items[j..j + itemsz];
    208 			j += itemsz;
    209 		};
    210 	};
    211 	if (i < m - l) {
    212 		const sz = (m - l) - i;
    213 		items[out..out + sz] = aux[i..i + sz];
    214 		out += sz;
    215 	};
    216 	if (j < r) {
    217 		const sz = r - j;
    218 		items[out..out + sz] = items[j..j + sz];
    219 		out += sz;
    220 	};
    221 };
    222 
    223 fn cut(items: []opaque, itemsz: size, l: size, r: size) []opaque = {
    224 	return *(&types::slice {
    225 		data = &(items: *[*]u8)[l * itemsz],
    226 		length = r - l,
    227 		capacity = 0,
    228 	}: *[]opaque);
    229 };
    230 
    231 fn node_power(left: size, right: size, start_a: size, start_b: size, end_b: size) u8 = {
    232 	const n: u64 = right - left;
    233 	const l: u64 = start_a + start_b - 2 * left;
    234 	const r: u64 = start_b + end_b - 2 * left;
    235 	const a = ((l << 30) / n): u32;
    236 	const b = ((r << 30) / n): u32;
    237 	return math::leading_zeros_u32(a ^ b);
    238 };