sort.ha (6058B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use math; 5 use types; 6 7 // Sorts a slice of items in place. This function provides a stable sort - 8 // relative order of equal elements is preserved. 9 // 10 // Note that this function may (temporarily) allocate, and will abort on 11 // allocation failure. 12 export fn sort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = { 13 if (len(items) < 256) { 14 insort(items, itemsz, cmp); 15 return; 16 }; 17 powersort(items, itemsz, cmp); 18 }; 19 20 // Checks if all of the items in a slice are sorted. 21 export fn sorted(items: []opaque, itemsz: size, cmp: *cmpfunc) bool = { 22 let ba = items: *[*]u8; 23 for (let i = 1z; i < len(items); i += 1) { 24 if (cmp(&ba[(i - 1) * itemsz], &ba[i * itemsz]) > 0) { 25 return false; 26 }; 27 }; 28 return true; 29 }; 30 31 fn swap(a: *opaque, b: *opaque, sz: size) void = { 32 let a = a: *[*]u8, b = b: *[*]u8; 33 for (let i = 0z; i < sz; i += 1) { 34 let c = a[i]; 35 a[i] = b[i]; 36 b[i] = c; 37 }; 38 }; 39 40 // Finds the index of the rightmost value that is equal to key or, if such value 41 // does not exist, less than key. 42 fn search_rightmost( 43 in: []opaque, 44 sz: size, 45 key: const *opaque, 46 cmp: *cmpfunc, 47 ) size = { 48 let l = 0z; 49 let r = len(in); 50 let ba = in: *[*]u8; 51 for (l < r) { 52 let m = l + (r - l) / 2; 53 if (cmp(key, &ba[m * sz]) < 0) { 54 r = m; 55 } else { 56 l = m + 1; 57 }; 58 }; 59 return r - 1; 60 }; 61 62 fn insort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = { 63 let ba = items: *[*]u8; 64 for (let i = 0z; i < len(items); i += 1) { 65 let bound = search_rightmost(items[0..i], itemsz, 66 &ba[i * itemsz], cmp); 67 for (let j = i; j > bound + 1; j -= 1) { 68 let a = &ba[(j - 1) * itemsz]; 69 let b = &ba[j * itemsz]; 70 swap(a, b, itemsz); 71 }; 72 }; 73 }; 74 75 // Based on paper "Nearly-Optimal Mergesorts: Fast, Practical Sorting Methods 76 // That Optimally Adapt to Existing Runs"; J. Ian Munro, Sebastian Wild 77 // 78 // https://arxiv.org/pdf/1805.04154.pdf 79 80 def MINRUN: size = 24; // FIXME: needs tuning 81 def EMPTY: size = -1z; 82 83 // A run of non-decreasing elements on the interval [start; end). 84 type run = struct { 85 start: size, // Set to EMPTY when a run is merged 86 end: size, 87 }; 88 89 fn powersort(items: []opaque, itemsz: size, cmp: *cmpfunc) void = { 90 // npowers = floor(log2(n)) + 1 91 const npowers = math::bit_size_u64(len(items)) + 1; 92 const runs: []run = alloc([run { start = EMPTY, ... }...], npowers + 1); 93 defer free(runs); 94 let top = 0u8; 95 96 const aux: []u8 = alloc([0...], len(items) * itemsz); 97 defer free(aux); 98 99 let a = run { 100 start = 0z, 101 end = extend(items, itemsz, cmp, 0), 102 }; 103 const length = a.end - a.start; 104 if (length < MINRUN) { 105 a.end = if (a.start + MINRUN < len(items)) 106 a.start + MINRUN else len(items); 107 insort(cut(items, itemsz, a.start, a.end), itemsz, cmp); 108 }; 109 for (a.end < len(items)) { 110 let b = run { 111 start = a.end, 112 end = extend(items, itemsz, cmp, a.end), 113 }; 114 const length = b.end - b.start; 115 if (length < MINRUN) { 116 b.end = if (b.start + MINRUN < len(items)) 117 b.start + MINRUN else len(items); 118 insort(cut(items, itemsz, b.start, b.end), itemsz, cmp); 119 }; 120 const k = node_power(0, len(items), a.start, b.start, b.end); 121 assert(k != top); 122 for (let i = top; i > k; i -= 1) { 123 if (runs[i].start == EMPTY) continue; 124 merge(items, itemsz, cmp, aux, 125 runs[i].start, runs[i].end, a.end); 126 127 a.start = runs[i].start; 128 runs[i].start = EMPTY; 129 }; 130 runs[k] = a; 131 top = k; 132 133 a = b; 134 }; 135 assert(a.end == len(items)); 136 for (let i = top; i > 0; i -= 1) { 137 if (runs[i].start == EMPTY) continue; 138 merge(items, itemsz, cmp, aux, 139 runs[i].start, runs[i].end, len(items)); 140 }; 141 }; 142 143 // Returns 'end' such that [start; end) in 'items' is non-decreasing 144 // 145 // a[0] ≤ a[1] ≤ ... ≤ a[n - 1] - kept as-is 146 // a[1] > a[1] > ... > a[n - 1] - reversed 147 // 148 // Note: reversing a sequence with equal elements will move their relative 149 // locations, which is undesirable for a stable sort. 150 fn extend(items: []opaque, itemsz: size, cmp: *cmpfunc, start: size) size = { 151 const n = len(items); 152 const items = (items: *[*]u8)[..len(items) * itemsz]; 153 154 assert(n - start > 0, "Empty extension"); 155 if (start + 1 == n) { 156 return n; 157 }; 158 159 if (cmp(&items[start * itemsz], &items[(start + 1) * itemsz]) <= 0) { 160 let end = start + 2; 161 for (end < n && cmp(&items[(end - 1) * itemsz], &items[end * itemsz]) <= 0) { 162 end += 1; 163 }; 164 return end; 165 } else { 166 let end = start + 2; 167 for (end < n && cmp(&items[(end - 1) * itemsz], &items[end * itemsz]) > 0) { 168 end += 1; 169 }; 170 reverse(cut(items, itemsz, start, end), itemsz); 171 return end; 172 }; 173 }; 174 175 fn reverse(items: []opaque, itemsz: size) void = { 176 const n = len(items); 177 const items = (items: *[*]u8)[..n * itemsz]; 178 for (let i = 0z; i < n / 2; i += 1) { 179 swap(&items[i * itemsz], &items[(n - i - 1) * itemsz], itemsz); 180 }; 181 }; 182 183 fn merge( 184 items: []opaque, 185 itemsz: size, 186 cmp: *cmpfunc, 187 aux: []u8, 188 l: size, 189 m: size, 190 r: size, 191 ) void = { 192 l *= itemsz; 193 m *= itemsz; 194 r *= itemsz; 195 196 const items = items: *[*]u8; 197 // Placing items at the beginning results in better cache performance 198 // (probably) 199 aux[..m - l] = items[l..m]; 200 201 let i = 0z, j = m, out = l; 202 for (i < m - l && j < r; out += itemsz) { 203 if (cmp(&aux[i], &items[j]) < 0) { 204 items[out..out + itemsz] = aux[i..i + itemsz]; 205 i += itemsz; 206 } else { 207 items[out..out + itemsz] = items[j..j + itemsz]; 208 j += itemsz; 209 }; 210 }; 211 if (i < m - l) { 212 const sz = (m - l) - i; 213 items[out..out + sz] = aux[i..i + sz]; 214 out += sz; 215 }; 216 if (j < r) { 217 const sz = r - j; 218 items[out..out + sz] = items[j..j + sz]; 219 out += sz; 220 }; 221 }; 222 223 fn cut(items: []opaque, itemsz: size, l: size, r: size) []opaque = { 224 return *(&types::slice { 225 data = &(items: *[*]u8)[l * itemsz], 226 length = r - l, 227 capacity = 0, 228 }: *[]opaque); 229 }; 230 231 fn node_power(left: size, right: size, start_a: size, start_b: size, end_b: size) u8 = { 232 const n: u64 = right - left; 233 const l: u64 = start_a + start_b - 2 * left; 234 const r: u64 = start_b + end_b - 2 * left; 235 const a = ((l << 30) / n): u32; 236 const b = ((r << 30) / n): u32; 237 return math::leading_zeros_u32(a ^ b); 238 };