malloc.ha (9380B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // This is a simple memory allocator, based on 5 // Appel, Andrew W., and David A. Naumann. "Verified sequential malloc/free" 6 // but with logarithmic bin sizing and additional safety checks. Not thread-safe 7 8 // A group of blocks that were allocated together. 9 export type chunk = union { 10 padding: size, // TODO: track number of active allocations here 11 data: [*]u8, 12 }; 13 14 // Metadata for a block. 15 export type meta = struct { 16 union { 17 sz: size, 18 next: uintptr, 19 }, 20 user: [*]u8, 21 }; 22 23 // Size of the header/footer for allocations. 24 def META: size = size(size); 25 26 // Alignment for pointers returned by malloc. 27 // XXX: arch 28 def ALIGN: size = 16; 29 30 // Allocation granularity for large allocs. Only used to allow verifying large 31 // heap pointers, doesn't necessarily need to match system page size. 32 def PAGESZ: size = 4096; 33 34 // Amount of memory to allocate at a time for chunks (2MiB). 35 def CHUNKSZ: size = 1 << 21; 36 37 // Byte to fill allocations with while they're not in use. 38 def POISON: u8 = 0x69; 39 40 export type memory_heap = struct { 41 // Number of allocations currently in flight. 42 cur_allocs: size, 43 // Freelists for blocks up to 2048 bytes. 44 bins: [9]nullable *meta, 45 // The chunk to allocate from if there are no blocks available in the 46 // right freelist. 47 cur_chunk: (*chunk, size), 48 }; 49 50 let static_heap = memory_heap { 51 cur_allocs = 0, 52 bins = [null...], 53 cur_chunk = (null: *chunk, CHUNKSZ), 54 }; 55 56 let heap = &static_heap; 57 58 export fn newheap() memory_heap = { 59 // Re-initializes the heap from scratch, abandoning all prior memory 60 // allocations and returning the previous heap. 61 // 62 // This function is designed to be called by debug:: in a scenario where 63 // the heap has been corrupted. It abandons the corrupt heap and 64 // prepares a fresh heap which debug:: can use to allocate memory for 65 // any operations it needs to perform during clean-up. 66 const old = *heap; 67 *heap = memory_heap { 68 cur_allocs = 0, 69 bins = [null...], 70 cur_chunk = (null: *chunk, CHUNKSZ), 71 }; 72 return old; 73 }; 74 75 // Allocates n bytes of memory and returns a pointer to them, or null if there 76 // is insufficient memory. 77 export fn malloc(n: size) nullable *opaque = { 78 if (n == 0) return null; 79 if (size_islarge(n)) { 80 // Round up to PAGESZ and just use mmap directly 81 n = realsz(n); 82 let m = match (segmalloc(n + ALIGN + META)) { 83 case null => 84 return null; 85 case let p: *opaque => 86 yield (p: uintptr + ALIGN - META): *meta; 87 }; 88 89 m.sz = n; 90 *(&m.user[n]: *size) = n; // For out-of-bounds write detection 91 heap.cur_allocs += 1; 92 return &m.user; 93 }; 94 95 let bin = size_getbin(n), sz = bin_getsize(bin); 96 let m = match (heap.bins[bin]) { 97 case null => 98 if (heap.cur_chunk.1 + META + sz + META > CHUNKSZ) { 99 // No space left in this chunk, allocate a new one 100 match (segmalloc(CHUNKSZ)) { 101 case null => 102 return null; 103 case let p: *opaque => 104 heap.cur_chunk = (p: *chunk, size(size)); 105 }; 106 }; 107 108 // Allocate a new block from the currently-active chunk 109 let m = &heap.cur_chunk.0.data[heap.cur_chunk.1]: *meta; 110 heap.cur_chunk.1 += META + sz; 111 m.sz = sz; 112 *(&m.user[sz]: *size) = sz; 113 yield m; 114 case let m: *meta => 115 // Pop a block off the freelist 116 heap.bins[bin] = meta_next(m); 117 checkpoison(m, sz); 118 m.sz = sz; 119 yield m; 120 }; 121 122 heap.cur_allocs += 1; 123 return &m.user; 124 }; 125 126 // Frees an allocation returned by [[malloc]]. Freeing any other pointer, or 127 // freeing a pointer that's already been freed, will cause an abort. 128 export @symbol("rt.free") fn free_(p: nullable *opaque) void = { 129 let m = match (p) { 130 case null => 131 return; 132 case let p: *opaque => 133 yield getmeta(p); 134 }; 135 heap.cur_allocs -= 1; 136 137 if (size_islarge(m.sz)) { 138 // Pass through to munmap 139 segfree((p: uintptr - ALIGN): *opaque, m.sz + ALIGN + META); 140 return; 141 }; 142 143 // Push onto freelist 144 let bin = size_getbin(m.sz); 145 m.user[..m.sz] = [POISON...]; 146 m.next = heap.bins[bin]: uintptr | 0b1; 147 heap.bins[bin] = m; 148 }; 149 150 // Changes the allocation size of a pointer to n bytes. If n is smaller than 151 // the prior allocation, it is truncated; otherwise the allocation is expanded 152 // and the values of the new bytes are undefined. May return a different pointer 153 // than the one given if there is insufficient space to expand the pointer 154 // in-place. Returns null if there is insufficient memory to support the 155 // request. 156 export fn realloc(p: nullable *opaque, n: size) nullable *opaque = { 157 if (n == 0) { 158 free(p); 159 return null; 160 }; 161 let m = match (p) { 162 case null => 163 return malloc(n); 164 case let p: *opaque => 165 yield getmeta(p); 166 }; 167 if (realsz(n) == m.sz) return p; 168 169 let new = match (malloc(n)) { 170 case null => 171 return null; 172 case let new: *opaque => 173 yield new; 174 }; 175 memcpy(new, &m.user, if (n < m.sz) n else m.sz); 176 free(p); 177 return new; 178 }; 179 180 // Gets the metadata for a given allocation. The provided pointer must have been 181 // returned by [[malloc]] or [[realloc]] and must not have been freed. 182 export fn getmeta(p: *opaque) *meta = { 183 let m = (p: uintptr - META): *meta; 184 validatemeta(m, false); 185 assert(m.sz & 0b1 == 0, 186 "tried to get metadata for already-freed pointer (double free?)"); 187 return m; 188 }; 189 190 191 // Find the maximum allocation size for a given bin. 192 fn bin_getsize(bin: size) size = { 193 // Would need to have bin 0 be ALIGN rather than 0 in this case 194 static assert(ALIGN != META); 195 196 // Space bins logarithmically 197 let sz = if (bin == 0) 0z else 1 << (bin - 1); 198 199 // And make sure that (bin_getsize(n) + META) % ALIGN == 0, while erring on 200 // the side of bin sizes slightly larger than powers of two 201 return sz * ALIGN + ALIGN - META; 202 }; 203 204 // Find the bin for a given allocation size. 205 fn size_getbin(sz: size) size = { 206 // Undo alignment fudging. Equivalent to 207 // ceil((sz - ALIGN + META) / ALIGN) 208 sz = (sz + META - 1) / ALIGN; 209 210 // Then undo exponentiation 211 if (sz == 0) return 0; 212 let ret = 0z; 213 for (1 << ret < sz; ret += 1) void; 214 return ret + 1; 215 }; 216 217 // Returns true if a given allocation size should use mmap directly. 218 fn size_islarge(sz: size) bool = sz > bin_getsize(len(heap.bins) - 1); 219 220 // Gets the next block on the freelist. 221 fn meta_next(m: *meta) nullable *meta = { 222 assert(m.next & 0b1 == 0b1, 223 "expected metadata on freelist to be marked as free (heap corruption?)"); 224 return (m.next & ~0b1): nullable *meta; 225 }; 226 227 // Round a user-requested allocation size up to the next-smallest size we can 228 // allocate. 229 fn realsz(sz: size) size = { 230 if (size_islarge(sz)) { 231 sz += ALIGN + META; 232 if (sz % PAGESZ != 0) sz += PAGESZ - sz % PAGESZ; 233 return sz - ALIGN - META; 234 }; 235 236 return bin_getsize(size_getbin(sz)); 237 }; 238 239 240 // Check for memory errors related to a given block of memory. 241 fn validatemeta(m: *meta, shallow: bool) void = { 242 assert(&m.user: uintptr % ALIGN == 0, 243 "invalid alignment for metadata pointer (heap corruption?)"); 244 // If we were recursively called to check a next pointer, the block 245 // needs to be marked as free, abort in meta_next() if it's not 246 if (m.sz & 0b1 == 0b1 || shallow == true) { 247 // Block is currently free, verify that it points to a valid 248 // next block 249 match (meta_next(m)) { 250 case null => void; 251 case let next: *meta => 252 assert(next: uintptr % ALIGN == META, 253 "invalid metadata for small allocation on freelist (heap corruption?)"); 254 if (!shallow) validatemeta(next, true); 255 }; 256 return; 257 }; 258 259 // Block is currently allocated, verify that its size is valid 260 let second = &m.user[m.sz]: *meta; 261 if (size_islarge(m.sz)) { 262 assert((&m.user: uintptr - ALIGN) % PAGESZ == 0, 263 "invalid large allocation address (non-heap pointer?)"); 264 assert((m.sz + ALIGN + META) % PAGESZ == 0, 265 "invalid metadata for large allocation (non-heap pointer?)"); 266 assert(second.sz == m.sz, 267 "invalid secondary metadata for large allocation (out-of-bounds write?)"); 268 return; 269 }; 270 271 assert(bin_getsize(size_getbin(m.sz)) == m.sz, 272 "invalid metadata for small allocation (non-heap pointer?)"); 273 if (second.sz & 0b1 == 0b1) { 274 // Next block after it in the chunk is free, recursively verify 275 // that it's valid 276 validatemeta(second, false); 277 return; 278 }; 279 280 // Note that we can't recurse here because the "next block" might 281 // actually be the extra metadata at the end of the chunk (which is 282 // never marked as being on the freelist 283 assert(!size_islarge(second.sz), 284 "invalid secondary metadata for small allocation (out-of-bounds write?)"); 285 assert(bin_getsize(size_getbin(second.sz)) == second.sz, 286 "invalid secondary metadata for small allocation (out-of-bounds write?)"); 287 }; 288 289 // Verify that a pointer on a free list hasn't been touched since it was added. 290 fn checkpoison(m: *meta, sz: size) void = { 291 match (meta_next(m)) { 292 case null => void; 293 case let next: *meta => 294 validatemeta(next, false); 295 }; 296 for (let i = 0z; i < sz; i += 1) { 297 assert(m.user[i] == POISON, "invalid poison data on freelist (use after free?)"); 298 }; 299 }; 300 301 @fini fn checkleaks() void = { 302 for (let i = 0z; i < len(heap.bins); i += 1) { 303 for (let m = heap.bins[i]; m != null; m = meta_next(m as *meta)) { 304 checkpoison(m as *meta, bin_getsize(i)); 305 }; 306 }; 307 // TODO: Need a debugging malloc that tracks backtraces for 308 // currently-active allocations in order to help with finding leaks 309 // before we enable this by default. Also need to make sure that this is 310 // run after the rest of @fini in order to guarantee that we see all 311 // frees 312 //assert(heap.cur_allocs == 0, "memory leak"); 313 };