commit a6018f1d4b0e0bde3f682df6de47e7351eaf647b
parent 3d5400e7f259434020f8726557145587f59ff7e7
Author: Ember Sawady <ecs@d2evs.net>
Date: Thu, 25 May 2023 04:18:24 +0000
Rewrite malloc
Signed-off-by: Ember Sawady <ecs@d2evs.net>
Diffstat:
M | rt/malloc.ha | | | 378 | ++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- |
1 file changed, 248 insertions(+), 130 deletions(-)
diff --git a/rt/malloc.ha b/rt/malloc.ha
@@ -1,135 +1,122 @@
-// License: MPL-2.0
-// (c) 2021 Drew DeVault <sir@cmpwn.com>
-// (c) 2021 Ember Sawady <ecs@d2evs.net>
-
-// This is a simple memory allocator, based on Appel, Andrew W., and David A.
-// Naumann. "Verified sequential malloc/free." It is not thread-safe.
-//
-// Large allocations are handled with mmap.
-//
-// For small allocations, we set up 50 bins, where each bin is responsible for
-// 16 different allocation sizes (e.g. bin 1 handles allocations from 10 thru 26
-// bytes); except for the first and last bin, which are responsible for fewer
-// than 16 allocation sizes.
-//
-// Each bin is 1MiB (BIGBLOCK) in size. We ceil the allocation size to the
-// largest size supported for this bin, then break the bin up into smaller
-// blocks. Each block is structured as [{sz: size, data..., link: *void}...];
-// where sz is the size of this (small) block, data is is set aside for the
-// user's actual allocation, and link is a pointer to the next bin's data field.
-//
-// In short, a bin for a particular size is pre-filled with all allocations of
-// that size, and the first word of each allocation is set to a pointer to the
-// next allocation. As such, malloc becomes:
-//
-// 1. Look up bin; pre-fill if not already allocated
-// 2. Let p = bin; bin = *bin; return p
-//
-// Then, free is simply:
-// 1. Look up bin
-// 2. *p = bin;
-// 3. bin = p;
-//
-// Note that over time this can cause the ordering of the allocations in each
-// bin to become non-continuous. This has no consequences for performance or
-// correctness.
-
-def ALIGN: size = 2;
-def WORD: size = size(size);
-def WASTE: size = WORD * ALIGN - WORD;
-def BIGBLOCK: size = (2 << 16) * WORD;
-
-let bins: [50]nullable *void = [null...];
-
-fn bin2size(b: size) size = ((b + 1) * ALIGN - 1) * WORD;
-
-fn size2bin(s: size) size = {
- assert(s <= bin2size(len(bins) - 1), "Size exceeds maximum for bin");
- return (s + (WORD * (ALIGN - 1) - 1)) / (WORD * ALIGN);
+// This is a simple memory allocator, based on
+// Appel, Andrew W., and David A. Naumann. "Verified sequential malloc/free"
+// but with logarithmic bin sizing and additional safety checks. Not thread-safe
+
+// A group of blocks that were allocated together.
+type chunk = union {
+ padding: size, // TODO: track number of active allocations here
+ data: [*]u8,
+};
+
+// Metadata for a block.
+export type meta = struct {
+ union {
+ sz: size,
+ next: uintptr,
+ },
+ user: [*]u8,
};
+// Size of the header/footer for allocations.
+def META: size = size(size);
+
+// Alignment for pointers returned by malloc.
+// XXX: arch
+def ALIGN: size = 16;
+
+// Allocation granularity for large allocs. Only used to allow sanity-checking
+// large heap pointers, doesn't necessarily need to match system page size.
+def PAGESZ: size = 4096;
+
+// Amount of memory to allocate at a time for chunks (2MiB).
+def CHUNKSZ: size = 1 << 21;
+
+// Byte to fill allocations with while they're not in use.
+def POISON: u8 = 0x69;
+
+// Number of allocations currently in flight.
+let cur_allocs: size = 0;
+
+// Freelists for blocks up to 2048 bytes.
+let bins: [9]nullable *meta = [null...];
+
+// The chunk to allocate from if there are no blocks available in the right
+// freelist.
+let cur_chunk: (*chunk, size) = (null: *chunk, CHUNKSZ);
+
// Allocates n bytes of memory and returns a pointer to them, or null if there
// is insufficient memory.
export fn malloc(n: size) nullable *void = {
- return if (n == 0) null
- else if (n > bin2size(len(bins) - 1)) malloc_large(n)
- else malloc_small(n);
-};
+ if (n == 0) return null;
+ if (size_islarge(n)) {
+ // Round up to PAGESZ and just use mmap directly
+ n = realsz(n);
+ let m = match (segmalloc(n + ALIGN + META)) {
+ case null =>
+ return null;
+ case let p: *void =>
+ yield (p: uintptr + ALIGN - META): *meta;
+ };
-fn malloc_large(n: size) nullable *void = {
- let p = segmalloc(n + WASTE + WORD);
- if (p == null) {
- return null;
+ m.sz = n;
+ *(&m.user[n]: *size) = n; // For out-of-bounds write detection
+ cur_allocs += 1;
+ return &m.user;
};
- let bsize = (p: uintptr + WASTE: uintptr): *[1]size;
- bsize[0] = n;
- return (p: uintptr + WASTE: uintptr + WORD: uintptr): nullable *void;
-};
-fn malloc_small(n: size) nullable *void = {
- const b = size2bin(n);
- let p = bins[b];
- if (p == null) {
- p = fill_bin(b);
- if (p != null) {
- bins[b] = p;
+ let bin = size_getbin(n), sz = bin_getsize(bin);
+ let m = match (bins[bin]) {
+ case null =>
+ if (cur_chunk.1 + META + sz + META > CHUNKSZ) {
+ // No space left in this chunk, allocate a new one
+ match (segmalloc(CHUNKSZ)) {
+ case null =>
+ return null;
+ case let p: *void =>
+ cur_chunk = (p: *chunk, size(size));
+ };
};
- };
- return if (p != null) {
- let q = *(p: **void);
- bins[b] = q;
- yield p;
- } else null;
-};
-
-fn fill_bin(b: size) nullable *void = {
- const s = bin2size(b);
- let p = segmalloc(BIGBLOCK);
- return if (p == null) null else list_from_block(s, p: uintptr);
-};
-fn list_from_block(s: size, p: uintptr) nullable *void = {
- const nblocks = (BIGBLOCK - WASTE) / (s + WORD);
-
- let q = p + WASTE: uintptr; // align q+WORD
- for (let j = 0z; j != nblocks - 1; j += 1) {
- let sz = q: *size;
- let useralloc = q + WORD: uintptr; // aligned
- let next = (useralloc + s: uintptr + WORD: uintptr): *void;
- *sz = s;
- *(useralloc: **void) = next;
- q += s: uintptr + WORD: uintptr;
+ // Allocate a new block from the currently-active chunk
+ let m = &cur_chunk.0.data[cur_chunk.1]: *meta;
+ cur_chunk.1 += META + sz;
+ m.sz = sz;
+ *(&m.user[sz]: *size) = sz;
+ yield m;
+ case let m: *meta =>
+ // Pop a block off the freelist
+ bins[bin] = meta_next(m);
+ checkpoison(m, sz);
+ m.sz = sz;
+ yield m;
};
- // Terminate last block:
- (q: *[1]size)[0] = s;
- *((q + 1: uintptr): *nullable *void) = null;
-
- // Return first block:
- return (p + WASTE: uintptr + WORD: uintptr): *void;
+ cur_allocs += 1;
+ return &m.user;
};
-// Frees a pointer previously allocated with [[malloc]].
-export @symbol("rt.free") fn free_(_p: nullable *void) void = {
- if (_p != null) {
- let p = _p: *void;
- let bsize = (p: uintptr - size(size): uintptr): *[1]size;
- let s = bsize[0];
- if (s <= bin2size(len(bins) - 1)) free_small(p, s)
- else free_large(p, s);
+// Frees an allocation returned by [[malloc]]. Freeing any other pointer, or
+// freeing a pointer that's already been freed, will cause an abort.
+export @symbol("rt.free") fn free_(p: nullable *void) void = {
+ let m = match (p) {
+ case null =>
+ return;
+ case let p: *void =>
+ yield getmeta(p);
};
-};
+ cur_allocs -= 1;
-fn free_large(_p: *void, s: size) void = {
- let p = (_p: uintptr - (WASTE: uintptr + WORD: uintptr)): *void;
- segfree(p, s + WASTE + WORD);
-};
+ if (size_islarge(m.sz)) {
+ // Pass through to munmap
+ segfree((p: uintptr - ALIGN): *void, m.sz + ALIGN + META);
+ return;
+ };
-fn free_small(p: *void, s: size) void = {
- let b = size2bin(s);
- let q = bins[b];
- *(p: *nullable *void) = q;
- bins[b] = p;
+ // Push onto freelist
+ let bin = size_getbin(m.sz);
+ m.user[..m.sz] = [POISON...];
+ m.next = bins[bin]: uintptr | 0b1;
+ bins[bin] = m;
};
// Changes the allocation size of a pointer to n bytes. If n is smaller than
@@ -138,30 +125,161 @@ fn free_small(p: *void, s: size) void = {
// than the one given if there is insufficient space to expand the pointer
// in-place. Returns null if there is insufficient memory to support the
// request.
-export fn realloc(_p: nullable *void, n: size) nullable *void = {
+export fn realloc(p: nullable *void, n: size) nullable *void = {
if (n == 0) {
- free_(_p);
+ free(p);
return null;
- } else if (_p == null) {
+ };
+ let m = match (p) {
+ case null =>
return malloc(n);
+ case let p: *void =>
+ yield getmeta(p);
};
+ if (realsz(n) == m.sz) return p;
- let p = _p: *void;
- let bsize = (p: uintptr - size(size): uintptr): *size;
- let s = *bsize;
- if (s >= n) {
- return p;
+ let new = match (malloc(n)) {
+ case null =>
+ return null;
+ case let new: *void =>
+ yield new;
};
+ memcpy(new, &m.user, if (n < m.sz) n else m.sz);
+ free(p);
+ return new;
+};
+
+// Gets the metadata for a given allocation. The provided pointer must have been
+// returned by [[malloc]] and must not have been freed.
+export fn getmeta(p: *void) *meta = {
+ let m = (p: uintptr - META): *meta;
+ validatemeta(m, false);
+ assert(m.sz & 0b1 == 0,
+ "tried to get metadata for already-freed pointer (double free?)");
+ return m;
+};
- if (n < bin2size(len(bins) - 1) && size2bin(n) == size2bin(s)) {
- return p;
+
+// Find the maximum allocation size for a given bin.
+fn bin_getsize(bin: size) size = {
+ // Would need to have bin 0 be ALIGN rather than 0 in this case
+ static assert(ALIGN != META);
+
+ // Space bins logarithmically
+ let sz = if (bin == 0) 0z else 1 << (bin - 1);
+
+ // And make sure that (bin_getsize(n) + META) % ALIGN == 0, while erring on
+ // the side of bin sizes slightly larger than powers of two
+ return sz * ALIGN + ALIGN - META;
+};
+
+// Find the bin for a given allocation size.
+fn size_getbin(sz: size) size = {
+ // Undo alignment fudging. Equivalent to
+ // ceil((sz - ALIGN + META) / ALIGN)
+ sz = (sz + META - 1) / ALIGN;
+
+ // Then undo exponentiation
+ if (sz == 0) return 0;
+ let ret = 0z;
+ for (1 << ret < sz; ret += 1) void;
+ return ret + 1;
+};
+
+// Returns true if a given allocation size should use mmap directly.
+fn size_islarge(sz: size) bool = sz > bin_getsize(len(bins) - 1);
+
+// Gets the next block on the freelist.
+fn meta_next(m: *meta) nullable *meta = {
+ assert(m.next & 0b1 == 0b1,
+ "expected metadata on freelist to be marked as free (heap corruption?)");
+ return (m.next & ~0b1): nullable *meta;
+};
+
+// Round a user-requested allocation size up to the next-smallest size we can
+// allocate.
+fn realsz(sz: size) size = {
+ if (size_islarge(sz)) {
+ sz += ALIGN + META;
+ if (sz % PAGESZ != 0) sz += PAGESZ - sz % PAGESZ;
+ return sz - ALIGN - META;
};
- let new = malloc(n);
- if (new != null) {
- memcpy(new: *void, p, s);
- free(p);
+ return bin_getsize(size_getbin(sz));
+};
+
+
+// Check for memory errors related to a given block of memory.
+fn validatemeta(m: *meta, shallow: bool) void = {
+ assert(&m.user: uintptr % ALIGN == 0,
+ "invalid alignment for metadata pointer (heap corruption?)");
+ // If we were recursively called to check a next pointer, the block
+ // needs to be marked as free, abort in meta_next() if it's not
+ if (m.sz & 0b1 == 0b1 || shallow == true) {
+ // Block is currently free, verify that it points to a valid
+ // next block
+ match (meta_next(m)) {
+ case null => void;
+ case let next: *meta =>
+ assert(next: uintptr % ALIGN == META,
+ "invalid metadata for small allocation on freelist (heap corruption?)");
+ if (!shallow) validatemeta(next, true);
+ };
+ return;
};
- return new;
+ // Block is currently allocated, verify that its size is valid
+ let second = &m.user[m.sz]: *meta;
+ if (size_islarge(m.sz)) {
+ assert((&m.user: uintptr - ALIGN) % PAGESZ == 0,
+ "invalid large allocation address (non-heap pointer?)");
+ assert((m.sz + ALIGN + META) % PAGESZ == 0,
+ "invalid metadata for large allocation (non-heap pointer?)");
+ assert(second.sz == m.sz,
+ "invalid secondary metadata for large allocation (out-of-bounds write?)");
+ return;
+ };
+
+ assert(bin_getsize(size_getbin(m.sz)) == m.sz,
+ "invalid metadata for small allocation (non-heap pointer?)");
+ if (second.sz & 0b1 == 0b1) {
+ // Next block after it in the chunk is free, recursively verify
+ // that it's valid
+ validatemeta(second, false);
+ return;
+ };
+
+ // Note that we can't recurse here because the "next block" might
+ // actually be the extra metadata at the end of the chunk (which is
+ // never marked as being on the freelist
+ assert(!size_islarge(second.sz),
+ "invalid secondary metadata for small allocation (out-of-bounds write?)");
+ assert(bin_getsize(size_getbin(second.sz)) == second.sz,
+ "invalid secondary metadata for small allocation (out-of-bounds write?)");
+};
+
+// Verify that a pointer on a free list hasn't been touched since it was added.
+fn checkpoison(m: *meta, sz: size) void = {
+ match (meta_next(m)) {
+ case null => void;
+ case let next: *meta =>
+ validatemeta(next, false);
+ };
+ for (let i = 0z; i < sz; i += 1) {
+ assert(m.user[i] == POISON, "invalid poison data on freelist (use after free?)");
+ };
+};
+
+@fini fn checkleaks() void = {
+ for (let i = 0z; i < len(bins); i += 1) {
+ for (let m = bins[i]; m != null; m = meta_next(m as *meta)) {
+ checkpoison(m as *meta, bin_getsize(i));
+ };
+ };
+ // TODO: Need a debugging malloc that tracks backtraces for
+ // currently-active allocations in order to help with finding leaks
+ // before we enable this by default. Also need to make sure that this is
+ // run after the rest of @fini in order to guarantee that we see all
+ // frees
+ //assert(cur_allocs == 0, "memory leak");
};