hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

malloc.ha (8109B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // This is a simple memory allocator, based on
      5 // Appel, Andrew W., and David A. Naumann. "Verified sequential malloc/free"
      6 // but with logarithmic bin sizing and additional safety checks. Not thread-safe
      7 
      8 // Size of the header/footer for allocations.
      9 def META: size = size(size);
     10 
     11 // Alignment for pointers returned by malloc.
     12 // XXX: arch
     13 def ALIGN: size = 16;
     14 
     15 // Allocation granularity for large allocs. Only used to allow verifying large
     16 // heap pointers, doesn't necessarily need to match system page size.
     17 def PAGESZ: size = 4096;
     18 
     19 // Amount of memory to allocate at a time for chunks (2MiB).
     20 def CHUNKSZ: size = 1 << 21;
     21 
     22 // Byte to fill allocations with while they're not in use.
     23 def POISON: u8 = 0x69;
     24 
     25 // Allocates n bytes of memory and returns a pointer to them, or null if there
     26 // is insufficient memory.
     27 export fn malloc(n: size) nullable *opaque = {
     28 	if (n == 0) return null;
     29 	if (size_islarge(n)) {
     30 		// Round up to PAGESZ and just use mmap directly
     31 		n = realsz(n);
     32 		let m = match (segmalloc(n + ALIGN + META)) {
     33 		case null =>
     34 			return null;
     35 		case let p: *opaque =>
     36 			yield (p: uintptr + ALIGN - META): *meta;
     37 		};
     38 
     39 		m.sz = n;
     40 		*(&m.user[n]: *size) = n; // For out-of-bounds write detection
     41 		heap.cur_allocs += 1;
     42 		return &m.user;
     43 	};
     44 
     45 	let bin = size_getbin(n), sz = bin_getsize(bin);
     46 	let m = match (heap.bins[bin]) {
     47 	case null =>
     48 		if (heap.cur_chunk.1 + META + sz + META > CHUNKSZ) {
     49 			// No space left in this chunk, allocate a new one
     50 			match (segmalloc(CHUNKSZ)) {
     51 			case null =>
     52 				return null;
     53 			case let p: *opaque =>
     54 				heap.cur_chunk = (p: *chunk, size(size));
     55 			};
     56 		};
     57 
     58 		// Allocate a new block from the currently-active chunk
     59 		let m = &heap.cur_chunk.0.data[heap.cur_chunk.1]: *meta;
     60 		heap.cur_chunk.1 += META + sz;
     61 		m.sz = sz;
     62 		*(&m.user[sz]: *size) = sz;
     63 		yield m;
     64 	case let m: *meta =>
     65 		// Pop a block off the freelist
     66 		heap.bins[bin] = meta_next(m);
     67 		checkpoison(m, sz);
     68 		m.sz = sz;
     69 		yield m;
     70 	};
     71 
     72 	heap.cur_allocs += 1;
     73 	return &m.user;
     74 };
     75 
     76 // Frees an allocation returned by [[malloc]]. Freeing any other pointer, or
     77 // freeing a pointer that's already been freed, will cause an abort.
     78 export @symbol("rt.free") fn free_(p: nullable *opaque) void = {
     79 	let m = match (p) {
     80 	case null =>
     81 		return;
     82 	case let p: *opaque =>
     83 		yield getmeta(p);
     84 	};
     85 	heap.cur_allocs -= 1;
     86 
     87 	if (size_islarge(m.sz)) {
     88 		// Pass through to munmap
     89 		segfree((p: uintptr - ALIGN): *opaque, m.sz + ALIGN + META);
     90 		return;
     91 	};
     92 
     93 	// Push onto freelist
     94 	let bin = size_getbin(m.sz);
     95 	m.user[..m.sz] = [POISON...];
     96 	m.next = heap.bins[bin]: uintptr | 0b1;
     97 	heap.bins[bin] = m;
     98 };
     99 
    100 // Changes the allocation size of a pointer to n bytes. If n is smaller than
    101 // the prior allocation, it is truncated; otherwise the allocation is expanded
    102 // and the values of the new bytes are undefined. May return a different pointer
    103 // than the one given if there is insufficient space to expand the pointer
    104 // in-place. Returns null if there is insufficient memory to support the
    105 // request.
    106 export fn realloc(p: nullable *opaque, n: size) nullable *opaque = {
    107 	if (n == 0) {
    108 		free(p);
    109 		return null;
    110 	};
    111 	let m = match (p) {
    112 	case null =>
    113 		return malloc(n);
    114 	case let p: *opaque =>
    115 		yield getmeta(p);
    116 	};
    117 	if (realsz(n) == m.sz) return p;
    118 
    119 	let new = match (malloc(n)) {
    120 	case null =>
    121 		return null;
    122 	case let new: *opaque =>
    123 		yield new;
    124 	};
    125 	memcpy(new, &m.user, if (n < m.sz) n else m.sz);
    126 	free(p);
    127 	return new;
    128 };
    129 
    130 // Gets the metadata for a given allocation. The provided pointer must have been
    131 // returned by [[malloc]] or [[realloc]] and must not have been freed.
    132 export fn getmeta(p: *opaque) *meta = {
    133 	let m = (p: uintptr - META): *meta;
    134 	validatemeta(m, false);
    135 	assert(m.sz & 0b1 == 0,
    136 		"tried to get metadata for already-freed pointer (double free?)");
    137 	return m;
    138 };
    139 
    140 
    141 // Find the maximum allocation size for a given bin.
    142 fn bin_getsize(bin: size) size = {
    143 	// Would need to have bin 0 be ALIGN rather than 0 in this case
    144 	static assert(ALIGN != META);
    145 
    146 	// Space bins logarithmically
    147 	let sz = if (bin == 0) 0z else 1 << (bin - 1);
    148 
    149 	// And make sure that (bin_getsize(n) + META) % ALIGN == 0, while erring on
    150 	// the side of bin sizes slightly larger than powers of two
    151 	return sz * ALIGN + ALIGN - META;
    152 };
    153 
    154 // Find the bin for a given allocation size.
    155 fn size_getbin(sz: size) size = {
    156 	// Undo alignment fudging. Equivalent to
    157 	// ceil((sz - ALIGN + META) / ALIGN)
    158 	sz = (sz + META - 1) / ALIGN;
    159 
    160 	// Then undo exponentiation
    161 	if (sz == 0) return 0;
    162 	let ret = 0z;
    163 	for (1 << ret < sz; ret += 1) void;
    164 	return ret + 1;
    165 };
    166 
    167 // Returns true if a given allocation size should use mmap directly.
    168 fn size_islarge(sz: size) bool = sz > bin_getsize(len(heap.bins) - 1);
    169 
    170 // Gets the next block on the freelist.
    171 fn meta_next(m: *meta) nullable *meta = {
    172 	assert(m.next & 0b1 == 0b1,
    173 		"expected metadata on freelist to be marked as free (heap corruption?)");
    174 	return (m.next & ~0b1): nullable *meta;
    175 };
    176 
    177 // Round a user-requested allocation size up to the next-smallest size we can
    178 // allocate.
    179 fn realsz(sz: size) size = {
    180 	if (size_islarge(sz)) {
    181 		sz += ALIGN + META;
    182 		if (sz % PAGESZ != 0) sz += PAGESZ - sz % PAGESZ;
    183 		return sz - ALIGN - META;
    184 	};
    185 
    186 	return bin_getsize(size_getbin(sz));
    187 };
    188 
    189 
    190 // Check for memory errors related to a given block of memory.
    191 fn validatemeta(m: *meta, shallow: bool) void = {
    192 	assert(&m.user: uintptr % ALIGN == 0,
    193 		"invalid alignment for metadata pointer (heap corruption?)");
    194 	// If we were recursively called to check a next pointer, the block
    195 	// needs to be marked as free, abort in meta_next() if it's not
    196 	if (m.sz & 0b1 == 0b1 || shallow == true) {
    197 		// Block is currently free, verify that it points to a valid
    198 		// next block
    199 		match (meta_next(m)) {
    200 		case null => void;
    201 		case let next: *meta =>
    202 			assert(next: uintptr % ALIGN == META,
    203 				"invalid metadata for small allocation on freelist (heap corruption?)");
    204 			if (!shallow) validatemeta(next, true);
    205 		};
    206 		return;
    207 	};
    208 
    209 	// Block is currently allocated, verify that its size is valid
    210 	let second = &m.user[m.sz]: *meta;
    211 	if (size_islarge(m.sz)) {
    212 		assert((&m.user: uintptr - ALIGN) % PAGESZ == 0,
    213 			"invalid large allocation address (non-heap pointer?)");
    214 		assert((m.sz + ALIGN + META) % PAGESZ == 0,
    215 			"invalid metadata for large allocation (non-heap pointer?)");
    216 		assert(second.sz == m.sz,
    217 			"invalid secondary metadata for large allocation (out-of-bounds write?)");
    218 		return;
    219 	};
    220 
    221 	assert(bin_getsize(size_getbin(m.sz)) == m.sz,
    222 		"invalid metadata for small allocation (non-heap pointer?)");
    223 	if (second.sz & 0b1 == 0b1) {
    224 		// Next block after it in the chunk is free, recursively verify
    225 		// that it's valid
    226 		validatemeta(second, false);
    227 		return;
    228 	};
    229 
    230 	// Note that we can't recurse here because the "next block" might
    231 	// actually be the extra metadata at the end of the chunk (which is
    232 	// never marked as being on the freelist
    233 	assert(!size_islarge(second.sz),
    234 		"invalid secondary metadata for small allocation (out-of-bounds write?)");
    235 	assert(bin_getsize(size_getbin(second.sz)) == second.sz,
    236 		"invalid secondary metadata for small allocation (out-of-bounds write?)");
    237 };
    238 
    239 // Verify that a pointer on a free list hasn't been touched since it was added.
    240 fn checkpoison(m: *meta, sz: size) void = {
    241 	match (meta_next(m)) {
    242 	case null => void;
    243 	case let next: *meta =>
    244 		validatemeta(next, false);
    245 	};
    246 	for (let i = 0z; i < sz; i += 1) {
    247 		assert(m.user[i] == POISON, "invalid poison data on freelist (use after free?)");
    248 	};
    249 };
    250 
    251 @fini fn checkleaks() void = {
    252 	for (let i = 0z; i < len(heap.bins); i += 1) {
    253 		for (let m = heap.bins[i]; m != null; m = meta_next(m as *meta)) {
    254 			checkpoison(m as *meta, bin_getsize(i));
    255 		};
    256 	};
    257 	// TODO: Need a debugging malloc that tracks backtraces for
    258 	// currently-active allocations in order to help with finding leaks
    259 	// before we enable this by default. Also need to make sure that this is
    260 	// run after the rest of @fini in order to guarantee that we see all
    261 	// frees
    262 	//assert(heap.cur_allocs == 0, "memory leak");
    263 };