2 * Written by Josh Dybnis and released to the public domain, as explained at
3 * http://creativecommons.org/licenses/publicdomain
5 * fast multi-threaded malloc.
7 #ifndef USE_SYSTEM_MALLOC
8 #define _BSD_SOURCE // so we get MAP_ANON on linux
17 #define CHUNK_SCALE 12 // 4k chunks
18 #define PAGE_SCALE 21 // 2MB pages
19 #define PAGE_SIZE (1ULL << PAGE_SCALE)
21 // On both linux and Mac OS X the size of the mmap-able virtual address space is between 2^46 and 2^47. Linux has
22 // no problem when you grab the whole thing. Mac OS X apparently does some O(n) thing on the first page fault
23 // that takes over 2 seconds if you mmap 2^46 bytes. So on Mac OS X we only take 2^38 bytes of virtual space. Which
24 // is OK though, since you can only buy a Mac with up to 32GB of RAM (as of 2/09).
27 #define TOTAL_SCALE 38
29 #define TOTAL_SCALE 46
32 #define TOTAL_SCALE 32
34 #define TOTAL_SIZE (1ULL << TOTAL_SCALE)
36 #define INVALID_SLAB_CLASS 255
37 #define METASLAB_CLASS_MAX 2
38 #define NESTED_4K_SLAB_CLASS_MAX 16
39 #define NESTED_32K_SLAB_CLASS_MAX 39
40 #define NESTED_256K_SLAB_CLASS_MAX 63
41 #define NESTED_SLAB_CLASS_MAX NESTED_256K_SLAB_CLASS_MAX
42 #define LARGE_SLAB_CLASS_MAX 93
43 #define HUGE_SLAB_CLASS_MAX (sizeof(BlockSize) / sizeof(*BlockSize))
44 #define SLAB_CLASS_MAX HUGE_SLAB_CLASS_MAX
46 #define NESTED_SLAB_CASES NESTED_4K_SLAB_CASES: case NESTED_32K_SLAB_CASES: case NESTED_256K_SLAB_CASES
47 #define NESTED_4K_SLAB_CASES METASLAB_CLASS_MAX+1 ... NESTED_4K_SLAB_CLASS_MAX
48 #define NESTED_32K_SLAB_CASES NESTED_4K_SLAB_CLASS_MAX+1 ... NESTED_32K_SLAB_CLASS_MAX: case 0
49 #define NESTED_256K_SLAB_CASES NESTED_32K_SLAB_CLASS_MAX+1 ... NESTED_SLAB_CLASS_MAX: case 1
50 #define LARGE_SLAB_CASES NESTED_SLAB_CLASS_MAX+1 ... LARGE_SLAB_CLASS_MAX: case 2
51 #define HUGE_SLAB_CASES LARGE_SLAB_CLASS_MAX+1 ... HUGE_SLAB_CLASS_MAX
53 #define SLAB_CLASS_SCALE(class) ({ \
56 case NESTED_4K_SLAB_CASES: _scale = 12; break; \
57 case NESTED_32K_SLAB_CASES: _scale = 15; break; \
58 case NESTED_256K_SLAB_CASES: _scale = 18; break; \
59 case LARGE_SLAB_CASES: _scale = 21; break; \
65 static const uint32_t BlockSize[] = {
66 // meta slab classes (for the nested slabs)
67 1 << 12, 1 << 15, 1 << 18
69 // nested slab classes (4kB, 32kB, and 256kB)
70 8, 16, 24, 32, 40, 48, 56, 64, 72, 80,
71 88, 96, 112, 120, 128, 144, 160, 176, 192, 224,
72 256, 288, 320, 352, 384, 416, 448, 480, 512, 576,
73 640, 704, 768, 832, 896, 960, 1024, 1152, 1280, 1408,
74 1536, 1664, 1856, 2048, 2240, 2432, 2688, 2944, 3200, 3520,
75 3840, 4160, 4544, 4928, 5312, 5696, 6144, 6592, 7040, 7488,
78 // large slab classes (full page, 2MB)
79 8896, 9984, 11200, 12544, 14016, 15616, 17408, 19328, 21440, 23744,
80 26176, 28800, 31616, 34624, 37760, 41024, 44416, 47936, 51584, 55296,
81 59008, 62784, 66496, 70208, 73856, 77376, 80832, 84160, 87360, 90368,
82 93248, 95936, 98496, 100864,
84 // huge slabs (slabs on huge blocks, 2MB-4MB)
85 110912, 121984, 134144, 147520, 162240, 178432, 196224, 215808, 237376, 261056,
86 287104, 315776, 347328, 382016, 420160, 462144, 508352, 559168, 615040, 676544,
87 744192, 818560, 900416, 990400, 1089408, 1198336, 1318144, 1449920, 1594880, 1754368,
91 typedef uint8_t class_t;
93 typedef struct block {
99 unsigned free_list:15;
100 unsigned num_in_use:9;
102 } __attribute__((packed)) slab_t;
104 typedef struct metaslab {
107 slab_t slab[1 << (PAGE_SCALE - CHUNK_SCALE)];
109 struct metaslab *older;
110 struct metaslab *newer;
111 } q[NESTED_SLAB_CLASS_MAX+1];
112 uint64_t partial_slab_bitmap2[NESTED_32K_SLAB_CLASS_MAX+1];
113 uint8_t partial_slab_bitmap1[NESTED_SLAB_CLASS_MAX+1];
116 char *MemBase = NULL;
118 char *PageBreak = NULL;
119 size_t *PageMap = NULL;
120 block_t *FreePages = NULL;
121 struct { slab_t *slab; char *slab_base; } ActiveSlab[SLAB_CLASS_MAX + 1] = {};
125 size_t bytes_requested;
126 size_t bytes_allocated;
127 size_t total_bytes_allocated;
128 } ClassStats[METASLAB_CLASS_MAX+1];
133 } PartialSlabQueue[SLAB_CLASS_MAX+1];
137 } FreshPartialSlabQueue[SLAB_CLASS_MAX+1];
139 static block_t *get_block (class_t slab_class);
141 void mem_init (void) {
142 ASSERT(INVALID_SLAB_CLASS > SLAB_CLASS_MAX);
144 void *buf = mmap(NULL, TOTAL_SIZE, PROT_NONE, MAP_NORESERVE|MAP_ANON|MAP_PRIVATE, -1, 0);
145 if (buf == (void *)-1) {
149 MemEnd = buf + TOTAL_SIZE;
150 MemBase = (char *)( ((size_t)buf + PAGE_SIZE-1) & ~(PAGE_SIZE-1) ); // align to a page boundry
152 size_t page_map_size = sizeof(void *) >> (TOTAL_SCALE - PAGE_SCALE);
153 mprotect(MemBase, chunk_map_size, PROT_READ|PROT_WRITE);
154 PageBreak = MemBase + chunk_map_size;
155 PageMap = (size_t *)MemBase;
158 static class_t get_slab_class (size_t size) {
159 for (int i = METASLAB_CLASS_MAX + 1; i <= SLAB_CLASS_MAX; ++i) {
160 if (size <= BlockSize[i])
163 return INVALID_SLAB_CLASS;
166 static class_t get_meta_class (class_t class) {
167 int scale = SLAB_CLASS_SCALE(class);
168 if (scale == PAGE_SCALE || scale == 0)
169 return INVALID_SLAB_CLASS;
170 return (scale - 12) / 3;
173 static void *get_page (void) {
174 block_t *p = FreePages;
176 p = (block_t *)PageBreak;
177 PageBreak += PAGE_SIZE;
184 static void free_page (void *p) {
185 ASSERT(p < (void *)PageBreak);
186 block_t *b = (block_t *)p;
191 static void init_slab (void *b, class_t slab_class) {
194 static slab_t *new_large_slab (class_t slab_class) {
198 static int find_partial_slab(metaslab_t *metaslab, class_t target_class, int target_index) {
199 switch (target_class) {
200 case NESTED_4K_SLAB_CASSES:
202 // search nearby the target first
203 int base_index = (target_index & ~0x7);
204 for (int i = 0; i < 8; ++i) {
205 if (base_index + i == target_index)
207 if (metaslab->slab[base_index + i].class == target_class)
208 return base_index + i;
211 metaslab->partial_slab_bitmap2[target_class] &= ~(1ULL << (base_index >> 3));
212 uint64_t bitmap = metaslab->partial_slab_bitmap2[target_class];
215 int n = base_index >> 3;
216 if (bitmap & (0xFF << (n & ~0x7))) {
217 bitmap &= 0xFF << (n & ~0x7); // search nearby the target first
219 base_index = COUNT_TRAILING_ZEROS(bitmap) << 3;
220 for (int i = 0; i < 8; ++i) {
221 if (metaslab->slab[base_index + i].class == target_class)
222 return base_index + i;
226 case NESTED_32K_SLAB_CASSES:
228 uint64_t bitmap = metaslab->partial_slab_bitmap2[target_class];
231 int n = target_index >> 3;
232 if (bitmap & (0xFF << (n & ~0x7))) {
233 bitmap &= 0xFF << (n & ~0x7); // search nearby the target first
235 return COUNT_TRAILING_ZEROS(bitmap) << 3;
237 case NESTED_256K_SLAB_CASSES:
239 uint8_t bitmap = metaslab->partial_slab_bitmap1[target_class];
242 return COUNT_TRAILING_ZEROS(bitmap) << 6;
250 static void activate_new_slab (class_t slab_class) {
252 switch (slab_class) {
253 case NESTED_SLAB_CASES:
254 int slab_index = ActiveSlab[slab_class].slab_index;
255 metaslab_t *metaslab = ActiveSlab[slab_class].metaslab;
257 // First look for a partial slab on the same metaslab as the old active slab.
258 new_slab = find_partial_slab(metaslab, slab_class);
259 if (new_slab == NULL) {
260 // No partial slab on the same metaslab. Remove a metaslab from the front of the queue.
261 metaslab_t *metaslab = (metaslab_t *)PartialSlabQueue[slab_class].oldest;
262 if (metaslab != NULL) {
263 ASSERT(metaslab->q[slab_class].older == NULL);
264 PartialSlabQueue[slab_class].newest = (slab_t *)metaslab->q[slab_class].newer;
265 metaslab->q[slab_class].newer->q[slab_class].older = NULL;
266 new_slab = find_partial_slab(metaslab, slab_class);
268 // Can't find a partial slab; create a new slab.
269 new_slab = (slab_t *)get_block(get_meta_class(slab_class));
270 init_slab(new_slab, slab_class);
275 case LARGE_SLAB_CASES:
276 case HUGE_SLAB_CASES:
277 // large or huge slab class
278 new_slab = PartialSlabQueue[slab_class].oldest;
279 if (new_slab == NULL) {
280 ASSERT(new_slab->older == NULL);
281 PartialSlabQueue[slab_class].newest = new_slab->newer;
282 new_slab->newer->older = NULL;
284 if (new_slab == NULL) {
285 if (IS_HUGE_SLAB_CLASS(slab_class)) {
286 new_slab = new_large_slab(slab_class);
288 ASSERT(IS_LARGE_SLAB_CLASS(slab_class));
289 new_slab = (slab_t *)get_page();
291 init_slab(new_slab, slab_class);
299 ActiveSlab[slab_class] = new_slab;
302 static void *get_block(class_t slab_class) {
304 // Look for a free block on the active slab.
305 switch (slab_class) {
306 case NESTED_SLAB_CASES:
307 int slab_index = ActiveSlab[slab_class].slab_index;
308 metaslab_t *metaslab = ActiveSlab[slab_class].metaslab;
309 if (metaslab != NULL) {
310 slab_t slab = metaslab->slab[slab_index];
311 if (slab.free_list) {
312 char *slab_base = metaslab->data + ( ( slab_index - 1 ) << SLAB_CLASS_SCALE(slab_class) );
313 void *b = (void *)( slab_base + ( ( slab.free_list - 1 ) << 3 ) );
314 metaslab->slab[slab_index].free_list = *(uint16_t *)b;
320 case LARGE_SLAB_CASES:
324 case HUGE_SLAB_CASES:
332 // Find another slab, activate it, and try again.
333 activate_new_slab(slab_class);
334 return get_block(slab_class); // recursive tail-call
337 void *nbd_malloc (size_t n) {
338 TRACE("m1", "nbd_malloc: size %llu", n, 0);
342 block_t *b = get_block( get_slab_class(n) );
344 TRACE("m1", "nbd_malloc: returning block %p", b, 0);
348 void nbd_free (void *x) {
349 TRACE("m1", "nbd_free: block %p", x, 0);
351 ASSERT(x >= (void *)MemBase && x < (void *)MemEnd);
353 block_t *b = (block_t *)x;
354 size_t page_index = (size_t)b >> PAGE_SCALE;
355 metaslab_t *metaslab = PageMap[page_index];
357 size_t slab_index = ((size_t)b & MASK(PAGE_SCALE)) >> 12;
358 slab_t slab = metaslab->slab[slab_index];
360 // if <slab> is not valid <b> is on a larger slab.
362 b->next = slab.free_list;
363 // the <offset> of the block is offset by 1 so 0 can represent NULL.
364 slab.free_list = ( ((size_t)b & MASK(12)) >> 3 ) + 1;
366 // <b> is not on a 4kB slab.
367 slab_index &= 0x7; // Try the 32kB slab.
368 slab = metaslab->slab[slab_index];
370 b->next = slab.free_list;
371 slab.free_list = ( ((size_t)b & MASK(15)) >> 3 ) + 1;
373 // <b> is not on a 32kB slab.
374 slab_index &= 0x3F; // <b> must be on the 256kB slab.
375 slab = metaslab->slab[slab_index];
377 b->next = slab.free_list;
378 slab.free_list = ( ((size_t)b & MASK(18)) >> 3 ) + 1;
382 metaslab->slab[slab_index] = slab;
383 if (slab.num_in_use == 0) {
384 free_slab(metaslab, slab_index);
388 #else//USE_SYSTEM_MALLOC
391 void mem_init (void) {
395 void ndb_free (void *x) {
396 TRACE("m1", "nbd_free: %p", x, 0);
398 memset(x, 0xcd, sizeof(void *)); // bear trap
404 void *nbd_malloc (size_t n) {
405 TRACE("m1", "nbd_malloc: request size %llu", n, 0);
407 TRACE("m1", "nbd_malloc: returning %p", x, 0);
410 #endif//USE_SYSTEM_MALLOC