circuitpython/py/gc.c

481 lines
16 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "mpconfig.h"
#include "gc.h"
#if MICROPY_ENABLE_GC
#if 0 // print debugging info
#define DEBUG_PRINT (1)
#define DEBUG_printf DEBUG_printf
#else // don't print debugging info
#define DEBUG_printf(...) (void)0
#endif
typedef unsigned char byte;
#define WORDS_PER_BLOCK (4)
#define BYTES_PER_BLOCK (WORDS_PER_BLOCK * BYTES_PER_WORD)
#define STACK_SIZE (64) // tunable; minimum is 1
STATIC byte *gc_alloc_table_start;
STATIC machine_uint_t gc_alloc_table_byte_len;
STATIC machine_uint_t *gc_pool_start;
STATIC machine_uint_t *gc_pool_end;
STATIC int gc_stack_overflow;
STATIC machine_uint_t gc_stack[STACK_SIZE];
STATIC machine_uint_t *gc_sp;
// ATB = allocation table byte
// 0b00 = FREE -- free block
// 0b01 = HEAD -- head of a chain of blocks
// 0b10 = TAIL -- in the tail of a chain of blocks
// 0b11 = MARK -- marked head block
#define AT_FREE (0)
#define AT_HEAD (1)
#define AT_TAIL (2)
#define AT_MARK (3)
#define BLOCKS_PER_ATB (4)
#define ATB_MASK_0 (0x03)
#define ATB_MASK_1 (0x0c)
#define ATB_MASK_2 (0x30)
#define ATB_MASK_3 (0xc0)
#define ATB_0_IS_FREE(a) (((a) & ATB_MASK_0) == 0)
#define ATB_1_IS_FREE(a) (((a) & ATB_MASK_1) == 0)
#define ATB_2_IS_FREE(a) (((a) & ATB_MASK_2) == 0)
#define ATB_3_IS_FREE(a) (((a) & ATB_MASK_3) == 0)
#define BLOCK_SHIFT(block) (2 * ((block) & (BLOCKS_PER_ATB - 1)))
#define ATB_GET_KIND(block) ((gc_alloc_table_start[(block) / BLOCKS_PER_ATB] >> BLOCK_SHIFT(block)) & 3)
#define ATB_ANY_TO_FREE(block) do { gc_alloc_table_start[(block) / BLOCKS_PER_ATB] &= (~(AT_MARK << BLOCK_SHIFT(block))); } while (0)
#define ATB_FREE_TO_HEAD(block) do { gc_alloc_table_start[(block) / BLOCKS_PER_ATB] |= (AT_HEAD << BLOCK_SHIFT(block)); } while (0)
#define ATB_FREE_TO_TAIL(block) do { gc_alloc_table_start[(block) / BLOCKS_PER_ATB] |= (AT_TAIL << BLOCK_SHIFT(block)); } while (0)
#define ATB_HEAD_TO_MARK(block) do { gc_alloc_table_start[(block) / BLOCKS_PER_ATB] |= (AT_MARK << BLOCK_SHIFT(block)); } while (0)
#define ATB_MARK_TO_HEAD(block) do { gc_alloc_table_start[(block) / BLOCKS_PER_ATB] &= (~(AT_TAIL << BLOCK_SHIFT(block))); } while (0)
#define BLOCK_FROM_PTR(ptr) (((ptr) - (machine_uint_t)gc_pool_start) / BYTES_PER_BLOCK)
#define PTR_FROM_BLOCK(block) (((block) * BYTES_PER_BLOCK + (machine_uint_t)gc_pool_start))
#define ATB_FROM_BLOCK(bl) ((bl) / BLOCKS_PER_ATB)
// TODO waste less memory; currently requires that all entries in alloc_table have a corresponding block in pool
void gc_init(void *start, void *end) {
// align end pointer on block boundary
end = (void*)((machine_uint_t)end & (~(BYTES_PER_BLOCK - 1)));
DEBUG_printf("Initializing GC heap: %p-%p\n", start, end);
// calculate parameters for GC
machine_uint_t total_word_len = (machine_uint_t*)end - (machine_uint_t*)start;
gc_alloc_table_byte_len = total_word_len * BYTES_PER_WORD / (1 + BITS_PER_BYTE / 2 * BYTES_PER_BLOCK);
gc_alloc_table_start = (byte*)start;
machine_uint_t gc_pool_block_len = gc_alloc_table_byte_len * BITS_PER_BYTE / 2;
machine_uint_t gc_pool_word_len = gc_pool_block_len * WORDS_PER_BLOCK;
gc_pool_start = (machine_uint_t*)end - gc_pool_word_len;
gc_pool_end = end;
// clear ATBs
memset(gc_alloc_table_start, 0, gc_alloc_table_byte_len);
// allocate first block because gc_pool_start points there and it will never
// be freed, so allocating 1 block with null pointers will minimise memory loss
ATB_FREE_TO_HEAD(0);
for (int i = 0; i < WORDS_PER_BLOCK; i++) {
gc_pool_start[i] = 0;
}
DEBUG_printf("GC layout:\n");
DEBUG_printf(" alloc table at %p, length " UINT_FMT " bytes\n", gc_alloc_table_start, gc_alloc_table_byte_len);
DEBUG_printf(" pool at %p, length " UINT_FMT " blocks = " UINT_FMT " words = " UINT_FMT " bytes\n", gc_pool_start, gc_pool_block_len, gc_pool_word_len, gc_pool_word_len * BYTES_PER_WORD);
}
#define VERIFY_PTR(ptr) ( \
(ptr & (BYTES_PER_BLOCK - 1)) == 0 /* must be aligned on a block */ \
&& ptr >= (machine_uint_t)gc_pool_start /* must be above start of pool */ \
&& ptr < (machine_uint_t)gc_pool_end /* must be below end of pool */ \
)
#define VERIFY_MARK_AND_PUSH(ptr) \
do { \
if (VERIFY_PTR(ptr)) { \
machine_uint_t _block = BLOCK_FROM_PTR(ptr); \
if (ATB_GET_KIND(_block) == AT_HEAD) { \
/* an unmarked head, mark it, and push it on gc stack */ \
ATB_HEAD_TO_MARK(_block); \
if (gc_sp < &gc_stack[STACK_SIZE]) { \
*gc_sp++ = _block; \
} else { \
gc_stack_overflow = 1; \
} \
} \
} \
} while (0)
STATIC void gc_drain_stack(void) {
while (gc_sp > gc_stack) {
// pop the next block off the stack
machine_uint_t block = *--gc_sp;
// work out number of consecutive blocks in the chain starting with this one
machine_uint_t n_blocks = 0;
do {
n_blocks += 1;
} while (ATB_GET_KIND(block + n_blocks) == AT_TAIL);
// check this block's children
machine_uint_t *scan = (machine_uint_t*)PTR_FROM_BLOCK(block);
for (machine_uint_t i = n_blocks * WORDS_PER_BLOCK; i > 0; i--, scan++) {
machine_uint_t ptr2 = *scan;
VERIFY_MARK_AND_PUSH(ptr2);
}
}
}
STATIC void gc_deal_with_stack_overflow(void) {
while (gc_stack_overflow) {
gc_stack_overflow = 0;
gc_sp = gc_stack;
// scan entire memory looking for blocks which have been marked but not their children
for (machine_uint_t block = 0; block < gc_alloc_table_byte_len * BLOCKS_PER_ATB; block++) {
// trace (again) if mark bit set
if (ATB_GET_KIND(block) == AT_MARK) {
*gc_sp++ = block;
gc_drain_stack();
}
}
}
}
STATIC void gc_sweep(void) {
// free unmarked heads and their tails
int free_tail = 0;
for (machine_uint_t block = 0; block < gc_alloc_table_byte_len * BLOCKS_PER_ATB; block++) {
switch (ATB_GET_KIND(block)) {
case AT_HEAD:
free_tail = 1;
// fall through to free the head
case AT_TAIL:
if (free_tail) {
ATB_ANY_TO_FREE(block);
}
break;
case AT_MARK:
ATB_MARK_TO_HEAD(block);
free_tail = 0;
break;
}
}
}
void gc_collect_start(void) {
gc_stack_overflow = 0;
gc_sp = gc_stack;
}
void gc_collect_root(void **ptrs, machine_uint_t len) {
for (machine_uint_t i = 0; i < len; i++) {
machine_uint_t ptr = (machine_uint_t)ptrs[i];
VERIFY_MARK_AND_PUSH(ptr);
gc_drain_stack();
}
}
void gc_collect_end(void) {
gc_deal_with_stack_overflow();
gc_sweep();
}
void gc_info(gc_info_t *info) {
info->total = (gc_pool_end - gc_pool_start) * sizeof(machine_uint_t);
info->used = 0;
info->free = 0;
info->num_1block = 0;
info->num_2block = 0;
info->max_block = 0;
for (machine_uint_t block = 0, len = 0; block < gc_alloc_table_byte_len * BLOCKS_PER_ATB; block++) {
machine_uint_t kind = ATB_GET_KIND(block);
if (kind == AT_FREE || kind == AT_HEAD) {
if (len == 1) {
info->num_1block += 1;
} else if (len == 2) {
info->num_2block += 1;
}
if (len > info->max_block) {
info->max_block = len;
}
}
switch (kind) {
case AT_FREE:
info->free += 1;
len = 0;
break;
case AT_HEAD:
info->used += 1;
len = 1;
break;
case AT_TAIL:
info->used += 1;
len += 1;
break;
case AT_MARK:
// shouldn't happen
break;
}
}
info->used *= BYTES_PER_BLOCK;
info->free *= BYTES_PER_BLOCK;
}
void *gc_alloc(machine_uint_t n_bytes) {
machine_uint_t n_blocks = ((n_bytes + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1))) / BYTES_PER_BLOCK;
DEBUG_printf("gc_alloc(" UINT_FMT " bytes -> " UINT_FMT " blocks)\n", n_bytes, n_blocks);
// check for 0 allocation
if (n_blocks == 0) {
return NULL;
}
machine_uint_t i;
machine_uint_t end_block;
machine_uint_t start_block;
machine_uint_t n_free = 0;
int collected = 0;
for (;;) {
// look for a run of n_blocks available blocks
for (i = 0; i < gc_alloc_table_byte_len; i++) {
byte a = gc_alloc_table_start[i];
if (ATB_0_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 0; goto found; } } else { n_free = 0; }
if (ATB_1_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 1; goto found; } } else { n_free = 0; }
if (ATB_2_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 2; goto found; } } else { n_free = 0; }
if (ATB_3_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 3; goto found; } } else { n_free = 0; }
}
// nothing found!
if (collected) {
return NULL;
}
DEBUG_printf("gc_alloc(" UINT_FMT "): no free mem, triggering GC\n", n_bytes);
gc_collect();
collected = 1;
}
// found, ending at block i inclusive
found:
// get starting and end blocks, both inclusive
end_block = i;
start_block = i - n_free + 1;
// mark first block as used head
ATB_FREE_TO_HEAD(start_block);
// mark rest of blocks as used tail
// TODO for a run of many blocks can make this more efficient
for (machine_uint_t bl = start_block + 1; bl <= end_block; bl++) {
ATB_FREE_TO_TAIL(bl);
}
// return pointer to first block
return (void*)(gc_pool_start + start_block * WORDS_PER_BLOCK);
}
// force the freeing of a piece of memory
void gc_free(void *ptr_in) {
machine_uint_t ptr = (machine_uint_t)ptr_in;
if (VERIFY_PTR(ptr)) {
machine_uint_t block = BLOCK_FROM_PTR(ptr);
if (ATB_GET_KIND(block) == AT_HEAD) {
// free head and all of its tail blocks
do {
ATB_ANY_TO_FREE(block);
block += 1;
} while (ATB_GET_KIND(block) == AT_TAIL);
}
}
}
machine_uint_t gc_nbytes(void *ptr_in) {
machine_uint_t ptr = (machine_uint_t)ptr_in;
if (VERIFY_PTR(ptr)) {
machine_uint_t block = BLOCK_FROM_PTR(ptr);
if (ATB_GET_KIND(block) == AT_HEAD) {
// work out number of consecutive blocks in the chain starting with this on
machine_uint_t n_blocks = 0;
do {
n_blocks += 1;
} while (ATB_GET_KIND(block + n_blocks) == AT_TAIL);
return n_blocks * BYTES_PER_BLOCK;
}
}
// invalid pointer
return 0;
}
// use this realloc for now, one below is broken
void *gc_realloc(void *ptr, machine_uint_t n_bytes) {
machine_uint_t n_existing = gc_nbytes(ptr);
if (n_bytes <= n_existing) {
return ptr;
} else {
// TODO check if we can grow inplace
void *ptr2 = gc_alloc(n_bytes);
if (ptr2 == NULL) {
return ptr2;
}
memcpy(ptr2, ptr, n_existing);
gc_free(ptr);
return ptr2;
}
}
#if 0
void *gc_realloc(void *ptr_in, machine_uint_t n_bytes) {
void *ptr_out = NULL;
machine_uint_t block = 0;
machine_uint_t ptr = (machine_uint_t)ptr_in;
if (ptr_in == NULL) {
return gc_alloc(n_bytes);
}
if (VERIFY_PTR(ptr) /* verify pointer */
&& (block = BLOCK_FROM_PTR(ptr)) /* get first block */
&& ATB_GET_KIND(block) == AT_HEAD) { /* make sure it's a HEAD block */
byte block_type;
machine_uint_t n_free = 0;
machine_uint_t n_blocks = 1; /* counting HEAD block */
/* get the number of consecutive tail blocks and
the number of free blocks after last tail block */
// XXX make sure we stop if we get to end of heap
do {
block_type = ATB_GET_KIND(block + n_blocks + n_free);
switch (block_type) {
case AT_FREE: n_free++; break;
case AT_TAIL: n_blocks++; break;
default: break;
}
/* stop as soon as we find enough blocks for n_bytes */
// XXX check for n_bytes is wrong since we don't include n_free
} while (block_type != AT_HEAD && (n_bytes > (n_blocks * BYTES_PER_BLOCK)));
/* number of allocated bytes */
machine_uint_t n_existing = n_blocks * BYTES_PER_BLOCK;
/* check if realloc'ing to a smaller size */
if (n_bytes <= n_existing) {
ptr_out = ptr_in;
/* free unneeded tail blocks */
for (machine_uint_t bl = block + n_blocks; ATB_GET_KIND(bl) == AT_TAIL; bl++) {
ATB_ANY_TO_FREE(bl);
}
/* check if we can expand in place */
// XXX disabled for now
} else if (0 && n_bytes <= (n_existing + (n_free * BYTES_PER_BLOCK))) {
/* number of blocks needed to expand +1 if there's a remainder */
// XXX this has a bug, but don't know why; try: l=[i for i in range(1000)]; for i in l: print(i/3)
machine_uint_t n_diff = ( n_bytes - n_existing)/BYTES_PER_BLOCK+
((n_bytes - n_existing)%BYTES_PER_BLOCK!=0);
DEBUG_printf("gc_realloc: expanding " UINT_FMT " blocks (" UINT_FMT " bytes) to " UINT_FMT " blocks (" UINT_FMT " bytes)\n",
n_existing/BYTES_PER_BLOCK, n_existing, n_existing/BYTES_PER_BLOCK+n_diff, n_existing + n_diff*BYTES_PER_BLOCK);
/* mark rest of blocks as used tail */
for (machine_uint_t bl = block + n_blocks; bl < (block + n_blocks + n_diff); bl++) {
ATB_FREE_TO_TAIL(bl);
}
ptr_out = ptr_in;
/* try to find a new contiguous chain */
} else if ((ptr_out = gc_alloc(n_bytes)) != NULL) {
DEBUG_printf("gc_realloc: allocated new block \n");
memcpy(ptr_out, ptr_in, n_existing);
gc_free(ptr_in);
}
}
return ptr_out;
}
#endif
void gc_dump_info() {
gc_info_t info;
gc_info(&info);
printf("GC: total: " UINT_FMT ", used: " UINT_FMT ", free: " UINT_FMT "\n", info.total, info.used, info.free);
printf(" No. of 1-blocks: " UINT_FMT ", 2-blocks: " UINT_FMT ", max blk sz: " UINT_FMT "\n",
info.num_1block, info.num_2block, info.max_block);
}
void gc_dump_alloc_table(void) {
printf("GC memory layout:");
for (machine_uint_t bl = 0; bl < gc_alloc_table_byte_len * BLOCKS_PER_ATB; bl++) {
if (bl % 64 == 0) {
printf("\n%04x: ", (uint)bl);
}
int c = ' ';
switch (ATB_GET_KIND(bl)) {
case AT_FREE: c = '.'; break;
case AT_HEAD: c = 'h'; break;
case AT_TAIL: c = 't'; break;
case AT_MARK: c = 'm'; break;
}
printf("%c", c);
}
printf("\n");
}
#if DEBUG_PRINT
void gc_test(void) {
machine_uint_t len = 500;
machine_uint_t *heap = malloc(len);
gc_init(heap, heap + len / sizeof(machine_uint_t));
void *ptrs[100];
{
machine_uint_t **p = gc_alloc(16);
p[0] = gc_alloc(64);
p[1] = gc_alloc(1);
p[2] = gc_alloc(1);
p[3] = gc_alloc(1);
machine_uint_t ***p2 = gc_alloc(16);
p2[0] = p;
p2[1] = p;
ptrs[0] = p2;
}
for (int i = 0; i < 25; i+=2) {
machine_uint_t *p = gc_alloc(i);
printf("p=%p\n", p);
if (i & 3) {
//ptrs[i] = p;
}
}
printf("Before GC:\n");
gc_dump_alloc_table();
printf("Starting GC...\n");
gc_collect_start();
gc_collect_root(ptrs, sizeof(ptrs) / sizeof(void*));
gc_collect_end();
printf("After GC:\n");
gc_dump_alloc_table();
}
#endif
#endif // MICROPY_ENABLE_GC