hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

malloc.ha (9380B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // This is a simple memory allocator, based on
      5 // Appel, Andrew W., and David A. Naumann. "Verified sequential malloc/free"
      6 // but with logarithmic bin sizing and additional safety checks. Not thread-safe
      7 
      8 // A group of blocks that were allocated together.
      9 export type chunk = union {
     10 	padding: size, // TODO: track number of active allocations here
     11 	data: [*]u8,
     12 };
     13 
     14 // Metadata for a block.
     15 export type meta = struct {
     16 	union {
     17 		sz: size,
     18 		next: uintptr,
     19 	},
     20 	user: [*]u8,
     21 };
     22 
     23 // Size of the header/footer for allocations.
     24 def META: size = size(size);
     25 
     26 // Alignment for pointers returned by malloc.
     27 // XXX: arch
     28 def ALIGN: size = 16;
     29 
     30 // Allocation granularity for large allocs. Only used to allow verifying large
     31 // heap pointers, doesn't necessarily need to match system page size.
     32 def PAGESZ: size = 4096;
     33 
     34 // Amount of memory to allocate at a time for chunks (2MiB).
     35 def CHUNKSZ: size = 1 << 21;
     36 
     37 // Byte to fill allocations with while they're not in use.
     38 def POISON: u8 = 0x69;
     39 
     40 export type memory_heap = struct {
     41 	// Number of allocations currently in flight.
     42 	cur_allocs: size,
     43 	// Freelists for blocks up to 2048 bytes.
     44 	bins: [9]nullable *meta,
     45 	// The chunk to allocate from if there are no blocks available in the
     46 	// right freelist.
     47 	cur_chunk: (*chunk, size),
     48 };
     49 
     50 let static_heap = memory_heap {
     51 	cur_allocs = 0,
     52 	bins = [null...],
     53 	cur_chunk = (null: *chunk, CHUNKSZ),
     54 };
     55 
     56 let heap = &static_heap;
     57 
     58 export fn newheap() memory_heap = {
     59 	// Re-initializes the heap from scratch, abandoning all prior memory
     60 	// allocations and returning the previous heap.
     61 	//
     62 	// This function is designed to be called by debug:: in a scenario where
     63 	// the heap has been corrupted. It abandons the corrupt heap and
     64 	// prepares a fresh heap which debug:: can use to allocate memory for
     65 	// any operations it needs to perform during clean-up.
     66 	const old = *heap;
     67 	*heap = memory_heap {
     68 		cur_allocs = 0,
     69 		bins = [null...],
     70 		cur_chunk = (null: *chunk, CHUNKSZ),
     71 	};
     72 	return old;
     73 };
     74 
     75 // Allocates n bytes of memory and returns a pointer to them, or null if there
     76 // is insufficient memory.
     77 export fn malloc(n: size) nullable *opaque = {
     78 	if (n == 0) return null;
     79 	if (size_islarge(n)) {
     80 		// Round up to PAGESZ and just use mmap directly
     81 		n = realsz(n);
     82 		let m = match (segmalloc(n + ALIGN + META)) {
     83 		case null =>
     84 			return null;
     85 		case let p: *opaque =>
     86 			yield (p: uintptr + ALIGN - META): *meta;
     87 		};
     88 
     89 		m.sz = n;
     90 		*(&m.user[n]: *size) = n; // For out-of-bounds write detection
     91 		heap.cur_allocs += 1;
     92 		return &m.user;
     93 	};
     94 
     95 	let bin = size_getbin(n), sz = bin_getsize(bin);
     96 	let m = match (heap.bins[bin]) {
     97 	case null =>
     98 		if (heap.cur_chunk.1 + META + sz + META > CHUNKSZ) {
     99 			// No space left in this chunk, allocate a new one
    100 			match (segmalloc(CHUNKSZ)) {
    101 			case null =>
    102 				return null;
    103 			case let p: *opaque =>
    104 				heap.cur_chunk = (p: *chunk, size(size));
    105 			};
    106 		};
    107 
    108 		// Allocate a new block from the currently-active chunk
    109 		let m = &heap.cur_chunk.0.data[heap.cur_chunk.1]: *meta;
    110 		heap.cur_chunk.1 += META + sz;
    111 		m.sz = sz;
    112 		*(&m.user[sz]: *size) = sz;
    113 		yield m;
    114 	case let m: *meta =>
    115 		// Pop a block off the freelist
    116 		heap.bins[bin] = meta_next(m);
    117 		checkpoison(m, sz);
    118 		m.sz = sz;
    119 		yield m;
    120 	};
    121 
    122 	heap.cur_allocs += 1;
    123 	return &m.user;
    124 };
    125 
    126 // Frees an allocation returned by [[malloc]]. Freeing any other pointer, or
    127 // freeing a pointer that's already been freed, will cause an abort.
    128 export @symbol("rt.free") fn free_(p: nullable *opaque) void = {
    129 	let m = match (p) {
    130 	case null =>
    131 		return;
    132 	case let p: *opaque =>
    133 		yield getmeta(p);
    134 	};
    135 	heap.cur_allocs -= 1;
    136 
    137 	if (size_islarge(m.sz)) {
    138 		// Pass through to munmap
    139 		segfree((p: uintptr - ALIGN): *opaque, m.sz + ALIGN + META);
    140 		return;
    141 	};
    142 
    143 	// Push onto freelist
    144 	let bin = size_getbin(m.sz);
    145 	m.user[..m.sz] = [POISON...];
    146 	m.next = heap.bins[bin]: uintptr | 0b1;
    147 	heap.bins[bin] = m;
    148 };
    149 
    150 // Changes the allocation size of a pointer to n bytes. If n is smaller than
    151 // the prior allocation, it is truncated; otherwise the allocation is expanded
    152 // and the values of the new bytes are undefined. May return a different pointer
    153 // than the one given if there is insufficient space to expand the pointer
    154 // in-place. Returns null if there is insufficient memory to support the
    155 // request.
    156 export fn realloc(p: nullable *opaque, n: size) nullable *opaque = {
    157 	if (n == 0) {
    158 		free(p);
    159 		return null;
    160 	};
    161 	let m = match (p) {
    162 	case null =>
    163 		return malloc(n);
    164 	case let p: *opaque =>
    165 		yield getmeta(p);
    166 	};
    167 	if (realsz(n) == m.sz) return p;
    168 
    169 	let new = match (malloc(n)) {
    170 	case null =>
    171 		return null;
    172 	case let new: *opaque =>
    173 		yield new;
    174 	};
    175 	memcpy(new, &m.user, if (n < m.sz) n else m.sz);
    176 	free(p);
    177 	return new;
    178 };
    179 
    180 // Gets the metadata for a given allocation. The provided pointer must have been
    181 // returned by [[malloc]] or [[realloc]] and must not have been freed.
    182 export fn getmeta(p: *opaque) *meta = {
    183 	let m = (p: uintptr - META): *meta;
    184 	validatemeta(m, false);
    185 	assert(m.sz & 0b1 == 0,
    186 		"tried to get metadata for already-freed pointer (double free?)");
    187 	return m;
    188 };
    189 
    190 
    191 // Find the maximum allocation size for a given bin.
    192 fn bin_getsize(bin: size) size = {
    193 	// Would need to have bin 0 be ALIGN rather than 0 in this case
    194 	static assert(ALIGN != META);
    195 
    196 	// Space bins logarithmically
    197 	let sz = if (bin == 0) 0z else 1 << (bin - 1);
    198 
    199 	// And make sure that (bin_getsize(n) + META) % ALIGN == 0, while erring on
    200 	// the side of bin sizes slightly larger than powers of two
    201 	return sz * ALIGN + ALIGN - META;
    202 };
    203 
    204 // Find the bin for a given allocation size.
    205 fn size_getbin(sz: size) size = {
    206 	// Undo alignment fudging. Equivalent to
    207 	// ceil((sz - ALIGN + META) / ALIGN)
    208 	sz = (sz + META - 1) / ALIGN;
    209 
    210 	// Then undo exponentiation
    211 	if (sz == 0) return 0;
    212 	let ret = 0z;
    213 	for (1 << ret < sz; ret += 1) void;
    214 	return ret + 1;
    215 };
    216 
    217 // Returns true if a given allocation size should use mmap directly.
    218 fn size_islarge(sz: size) bool = sz > bin_getsize(len(heap.bins) - 1);
    219 
    220 // Gets the next block on the freelist.
    221 fn meta_next(m: *meta) nullable *meta = {
    222 	assert(m.next & 0b1 == 0b1,
    223 		"expected metadata on freelist to be marked as free (heap corruption?)");
    224 	return (m.next & ~0b1): nullable *meta;
    225 };
    226 
    227 // Round a user-requested allocation size up to the next-smallest size we can
    228 // allocate.
    229 fn realsz(sz: size) size = {
    230 	if (size_islarge(sz)) {
    231 		sz += ALIGN + META;
    232 		if (sz % PAGESZ != 0) sz += PAGESZ - sz % PAGESZ;
    233 		return sz - ALIGN - META;
    234 	};
    235 
    236 	return bin_getsize(size_getbin(sz));
    237 };
    238 
    239 
    240 // Check for memory errors related to a given block of memory.
    241 fn validatemeta(m: *meta, shallow: bool) void = {
    242 	assert(&m.user: uintptr % ALIGN == 0,
    243 		"invalid alignment for metadata pointer (heap corruption?)");
    244 	// If we were recursively called to check a next pointer, the block
    245 	// needs to be marked as free, abort in meta_next() if it's not
    246 	if (m.sz & 0b1 == 0b1 || shallow == true) {
    247 		// Block is currently free, verify that it points to a valid
    248 		// next block
    249 		match (meta_next(m)) {
    250 		case null => void;
    251 		case let next: *meta =>
    252 			assert(next: uintptr % ALIGN == META,
    253 				"invalid metadata for small allocation on freelist (heap corruption?)");
    254 			if (!shallow) validatemeta(next, true);
    255 		};
    256 		return;
    257 	};
    258 
    259 	// Block is currently allocated, verify that its size is valid
    260 	let second = &m.user[m.sz]: *meta;
    261 	if (size_islarge(m.sz)) {
    262 		assert((&m.user: uintptr - ALIGN) % PAGESZ == 0,
    263 			"invalid large allocation address (non-heap pointer?)");
    264 		assert((m.sz + ALIGN + META) % PAGESZ == 0,
    265 			"invalid metadata for large allocation (non-heap pointer?)");
    266 		assert(second.sz == m.sz,
    267 			"invalid secondary metadata for large allocation (out-of-bounds write?)");
    268 		return;
    269 	};
    270 
    271 	assert(bin_getsize(size_getbin(m.sz)) == m.sz,
    272 		"invalid metadata for small allocation (non-heap pointer?)");
    273 	if (second.sz & 0b1 == 0b1) {
    274 		// Next block after it in the chunk is free, recursively verify
    275 		// that it's valid
    276 		validatemeta(second, false);
    277 		return;
    278 	};
    279 
    280 	// Note that we can't recurse here because the "next block" might
    281 	// actually be the extra metadata at the end of the chunk (which is
    282 	// never marked as being on the freelist
    283 	assert(!size_islarge(second.sz),
    284 		"invalid secondary metadata for small allocation (out-of-bounds write?)");
    285 	assert(bin_getsize(size_getbin(second.sz)) == second.sz,
    286 		"invalid secondary metadata for small allocation (out-of-bounds write?)");
    287 };
    288 
    289 // Verify that a pointer on a free list hasn't been touched since it was added.
    290 fn checkpoison(m: *meta, sz: size) void = {
    291 	match (meta_next(m)) {
    292 	case null => void;
    293 	case let next: *meta =>
    294 		validatemeta(next, false);
    295 	};
    296 	for (let i = 0z; i < sz; i += 1) {
    297 		assert(m.user[i] == POISON, "invalid poison data on freelist (use after free?)");
    298 	};
    299 };
    300 
    301 @fini fn checkleaks() void = {
    302 	for (let i = 0z; i < len(heap.bins); i += 1) {
    303 		for (let m = heap.bins[i]; m != null; m = meta_next(m as *meta)) {
    304 			checkpoison(m as *meta, bin_getsize(i));
    305 		};
    306 	};
    307 	// TODO: Need a debugging malloc that tracks backtraces for
    308 	// currently-active allocations in order to help with finding leaks
    309 	// before we enable this by default. Also need to make sure that this is
    310 	// run after the rest of @fini in order to guarantee that we see all
    311 	// frees
    312 	//assert(heap.cur_allocs == 0, "memory leak");
    313 };