diff --git a/py/gc.c b/py/gc.c index fef67f61bb..271bc94624 100755 --- a/py/gc.c +++ b/py/gc.c @@ -150,9 +150,13 @@ void gc_init(void *start, void *end) { #endif // Set first free ATB index to the start of the heap. - MP_STATE_MEM(gc_first_free_atb_index) = 0; + for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) { + MP_STATE_MEM(gc_first_free_atb_index)[i] = 0; + } + // Set last free ATB index to the end of the heap. MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1; + // Set the lowest long lived ptr to the end of the heap to start. This will be lowered as long // lived objects are allocated. MP_STATE_MEM(gc_lowest_long_lived_ptr) = (void*) PTR_FROM_BLOCK(MP_STATE_MEM(gc_alloc_table_byte_len * BLOCKS_PER_ATB)); @@ -387,7 +391,9 @@ void gc_collect_root(void **ptrs, size_t len) { void gc_collect_end(void) { gc_deal_with_stack_overflow(); gc_sweep(); - MP_STATE_MEM(gc_first_free_atb_index) = 0; + for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) { + MP_STATE_MEM(gc_first_free_atb_index)[i] = 0; + } MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1; MP_STATE_MEM(gc_lock_depth)--; GC_EXIT(); @@ -513,14 +519,16 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) { size_t crossover_block = BLOCK_FROM_PTR(MP_STATE_MEM(gc_lowest_long_lived_ptr)); while (keep_looking) { int8_t direction = 1; - size_t start = MP_STATE_MEM(gc_first_free_atb_index); + size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1; + size_t first_free = MP_STATE_MEM(gc_first_free_atb_index)[bucket]; + size_t start = first_free; if (long_lived) { direction = -1; start = MP_STATE_MEM(gc_last_free_atb_index); } n_free = 0; // look for a run of n_blocks available blocks - for (size_t i = start; keep_looking && MP_STATE_MEM(gc_first_free_atb_index) <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) { + for (size_t i = start; keep_looking && first_free <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) { byte a = MP_STATE_MEM(gc_alloc_table_start)[i]; // Four ATB states are packed into a single byte. int j = 0; @@ -565,22 +573,24 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) { // Found free space ending at found_block inclusive. // Also, set last free ATB index to block after last block we found, for start of - // next scan. To reduce fragmentation, we only do this if we were looking - // for a single free block, which guarantees that there are no free blocks - // before this one. Also, whenever we free or shrink a block we must check - // if this index needs adjusting (see gc_realloc and gc_free). + // next scan. Also, whenever we free or shrink a block we must check if this index needs + // adjusting (see gc_realloc and gc_free). if (!long_lived) { end_block = found_block; start_block = found_block - n_free + 1; - if (n_blocks == 1) { - MP_STATE_MEM(gc_first_free_atb_index) = (found_block + 1) / BLOCKS_PER_ATB; + if (n_blocks < MICROPY_ATB_INDICES) { + size_t next_free_atb = (found_block + n_blocks) / BLOCKS_PER_ATB; + // Update all atb indices for larger blocks too. + for (size_t i = n_blocks - 1; i < MICROPY_ATB_INDICES; i++) { + MP_STATE_MEM(gc_first_free_atb_index)[i] = next_free_atb; + } } } else { start_block = found_block; end_block = found_block + n_free - 1; - if (n_blocks == 1) { - MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB; - } + // Always update the bounds of the long lived area because we assume it is contiguous. (It + // can still be reset by a sweep.) + MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB; } #ifdef LOG_HEAP_ACTIVITY @@ -676,30 +686,37 @@ void gc_free(void *ptr) { } // get the GC block number corresponding to this pointer assert(VERIFY_PTR(ptr)); - size_t block = BLOCK_FROM_PTR(ptr); - assert(ATB_GET_KIND(block) == AT_HEAD); + size_t start_block = BLOCK_FROM_PTR(ptr); + assert(ATB_GET_KIND(start_block) == AT_HEAD); #if MICROPY_ENABLE_FINALISER - FTB_CLEAR(block); + FTB_CLEAR(start_block); #endif - // set the last_free pointer to this block if it's earlier in the heap - if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) { - MP_STATE_MEM(gc_first_free_atb_index) = block / BLOCKS_PER_ATB; - } - if (block / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) { - MP_STATE_MEM(gc_last_free_atb_index) = block / BLOCKS_PER_ATB; - } - // free head and all of its tail blocks - #ifdef LOG_HEAP_ACTIVITY - gc_log_change(block, 0); - #endif + #ifdef LOG_HEAP_ACTIVITY + gc_log_change(start_block, 0); + #endif + size_t block = start_block; do { ATB_ANY_TO_FREE(block); block += 1; } while (ATB_GET_KIND(block) == AT_TAIL); + // Update the first free pointer for our size only. Not much calls gc_free directly so there + // is decent chance we'll want to allocate this size again. By only updating the specific + // size we don't risk something smaller fitting in. + size_t n_blocks = block - start_block; + size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1; + size_t new_free_atb = start_block / BLOCKS_PER_ATB; + if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) { + MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb; + } + // set the last_free pointer to this block if it's earlier in the heap + if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) { + MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb; + } + GC_EXIT(); #if EXTENSIVE_HEAP_PROFILING @@ -870,11 +887,13 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { } // set the last_free pointer to end of this block if it's earlier in the heap - if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) { - MP_STATE_MEM(gc_first_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB; + size_t new_free_atb = (block + new_blocks) / BLOCKS_PER_ATB; + size_t bucket = MIN(n_blocks - new_blocks, MICROPY_ATB_INDICES) - 1; + if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) { + MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb; } - if ((block + new_blocks) / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) { - MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB; + if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) { + MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb; } GC_EXIT(); diff --git a/py/mpconfig.h b/py/mpconfig.h index a0d211bfaa..1512c7d3a3 100755 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -244,6 +244,14 @@ #define alloca(x) m_malloc(x) #endif +// Number of atb indices to cache. Allocations of fewer blocks will be faster +// because the search will be accelerated by the index cache. This only applies +// to short lived allocations because we assume the long lived allocations are +// contiguous. +#ifndef MICROPY_ATB_INDICES +#define MICROPY_ATB_INDICES (8) +#endif + /*****************************************************************************/ /* MicroPython emitters */ diff --git a/py/mpstate.h b/py/mpstate.h index a3d7e5dccb..a5815776a4 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -92,7 +92,7 @@ typedef struct _mp_state_mem_t { size_t gc_alloc_threshold; #endif - size_t gc_first_free_atb_index; + size_t gc_first_free_atb_index[MICROPY_ATB_INDICES]; size_t gc_last_free_atb_index; #if MICROPY_PY_GC_COLLECT_RETVAL diff --git a/tools/gc_activity.md b/tools/gc_activity.md index d1a4c28031..686d9b6b7b 100644 --- a/tools/gc_activity.md +++ b/tools/gc_activity.md @@ -13,7 +13,7 @@ correct port. GDB is usually :3333 and JLink is :2331. Now, run gdb from your port directory: ``` -arm-none-eabi-gdb -x ../tools/output_gc_until_repl.txt build-metro_m0_express/firmware.elf +arm-none-eabi-gdb -x ../../tools/output_gc_until_repl.txt build-metro_m0_express/firmware.elf ``` This will take a little time while it breaks, backtraces and continues for every diff --git a/tools/gc_activity_between_collects.py b/tools/gc_activity_between_collects.py new file mode 100644 index 0000000000..c02c479f9a --- /dev/null +++ b/tools/gc_activity_between_collects.py @@ -0,0 +1,141 @@ +import sys +import json + +# Map start block to current allocation info. +current_heap = {} +allocation_history = [] +root = {} + +def change_root(trace, size): + level = root + for frame in reversed(trace): + file_location = frame[1] + if file_location not in level: + level[file_location] = {"blocks": 0, + "file": file_location, + "function": frame[2], + "subcalls": {}} + level[file_location]["blocks"] += size + level = level[file_location]["subcalls"] + +total_actions = 0 +non_single_block_streak = 0 +max_nsbs = 0 +last_action = None +last_total_actions = 0 +count = 0 +actions = {} +last_ticks_ms = 0 +ticks_ms = 0 +block_sizes = {} +allocation_sources = {} +with open(sys.argv[1], "r") as f: + for line in f: + if not line.strip(): + break + for line in f: + action = None + if line.startswith("Breakpoint 2"): + break + next(f) # throw away breakpoint code line + # print(next(f)) # first frame + block = 0 + size = 0 + trace = [] + for line in f: + # print(line.strip()) + if line[0] == "#": + frame = line.strip().split() + if frame[1].startswith("0x"): + trace.append((frame[1], frame[-1], frame[3])) + else: + trace.append(("0x0", frame[-1], frame[1])) + elif line[0] == "$": + #print(line.strip().split()[-1]) + block = int(line.strip().split()[-1][2:], 16) + next_line = next(f) + size = int(next_line.strip().split()[-1][2:], 16) + # next_line = next(f) + # ticks_ms = int(next_line.strip().split()[-1][2:], 16) + if not line.strip(): + break + + action = "unknown" + if block not in current_heap: + current_heap[block] = {"start_block": block, "size": size, "start_trace": trace, "start_time": total_actions} + action = "alloc" + if size == 1: + max_nsbs = max(max_nsbs, non_single_block_streak) + non_single_block_streak = 0 + else: + non_single_block_streak += 1 + #change_root(trace, size) + if size not in block_sizes: + block_sizes[size] = 0 + source = trace[-1][-1] + if source not in allocation_sources: + print(trace) + allocation_sources[source] = 0 + allocation_sources[source] += 1 + block_sizes[size] += 1 + else: + alloc = current_heap[block] + alloc["end_trace"] = trace + alloc["end_time"] = total_actions + change_root(alloc["start_trace"], -1 * alloc["size"]) + if size > 0: + action = "realloc" + current_heap[block] = {"start_block": block, "size": size, "start_trace": trace, "start_time": total_actions} + #change_root(trace, size) + else: + action = "free" + if trace[0][2] == "gc_sweep": + action = "sweep" + non_single_block_streak = 0 + if (trace[3][2] == "py_gc_collect" or (trace[3][2] == "gc_deinit" and count > 1)) and last_action != "sweep": + print(ticks_ms - last_ticks_ms, total_actions - last_total_actions, "gc.collect", max_nsbs) + print(actions) + print(block_sizes) + print(allocation_sources) + actions = {} + block_sizes = {} + allocation_sources = {} + if count % 2 == 0: + print() + count += 1 + last_total_actions = total_actions + last_ticks_ms = ticks_ms + max_nsbs = 0 + del current_heap[block] + alloc["end_cause"] = action + allocation_history.append(alloc) + if action not in actions: + actions[action] = 0 + actions[action] += 1 + last_action = action + #print(total_actions, non_single_block_streak, action, block, size) + total_actions += 1 +print(actions) +print(max_nsbs) +print() + +for alloc in current_heap.values(): + alloc["end_trace"] = "" + alloc["end_time"] = total_actions + allocation_history.append(alloc) + +def print_frame(frame, indent=0): + for key in sorted(frame): + if not frame[key]["blocks"] or key.startswith("../py/malloc.c") or key.startswith("../py/gc.c"): + continue + print(" " * (indent - 1), key, frame[key]["function"], frame[key]["blocks"], "blocks") + print_frame(frame[key]["subcalls"], indent + 2) + +# print_frame(root) +# total_blocks = 0 +# for key in sorted(root): +# total_blocks += root[key]["blocks"] +# print(total_blocks, "total blocks") + +# with open("allocation_history.json", "w") as f: +# json.dump(allocation_history, f) diff --git a/tools/output_gc_until_repl.txt b/tools/output_gc_until_repl.txt index 81711deeb8..1c124de801 100644 --- a/tools/output_gc_until_repl.txt +++ b/tools/output_gc_until_repl.txt @@ -10,16 +10,23 @@ set logging on set remote hardware-breakpoint-limit 4 # gc log -break gc.c:103 +break gc.c:106 commands -backtrace p/x start_block p/x length -append binary memory ram.bin &_srelocate &_estack +p/x ticks_ms +# backtrace output redirect is currently broken in gdb so we use up instead. +# https://sourceware.org/bugzilla/show_bug.cgi?id=23439 +# backtrace +up +up +up +up +# append binary memory ram.bin &_srelocate &_estack continue end -break main.c:179 +break main.c:251 continue