Merge pull request #2614 from tannewt/gc_multiblock_speedup
Track first free atbs for multiple block sizes instead of just 1
This commit is contained in:
commit
01fb668904
81
py/gc.c
81
py/gc.c
@ -150,9 +150,13 @@ void gc_init(void *start, void *end) {
|
||||
#endif
|
||||
|
||||
// Set first free ATB index to the start of the heap.
|
||||
MP_STATE_MEM(gc_first_free_atb_index) = 0;
|
||||
for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index)[i] = 0;
|
||||
}
|
||||
|
||||
// Set last free ATB index to the end of the heap.
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
|
||||
|
||||
// Set the lowest long lived ptr to the end of the heap to start. This will be lowered as long
|
||||
// lived objects are allocated.
|
||||
MP_STATE_MEM(gc_lowest_long_lived_ptr) = (void*) PTR_FROM_BLOCK(MP_STATE_MEM(gc_alloc_table_byte_len * BLOCKS_PER_ATB));
|
||||
@ -387,7 +391,9 @@ void gc_collect_root(void **ptrs, size_t len) {
|
||||
void gc_collect_end(void) {
|
||||
gc_deal_with_stack_overflow();
|
||||
gc_sweep();
|
||||
MP_STATE_MEM(gc_first_free_atb_index) = 0;
|
||||
for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index)[i] = 0;
|
||||
}
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
|
||||
MP_STATE_MEM(gc_lock_depth)--;
|
||||
GC_EXIT();
|
||||
@ -513,14 +519,16 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {
|
||||
size_t crossover_block = BLOCK_FROM_PTR(MP_STATE_MEM(gc_lowest_long_lived_ptr));
|
||||
while (keep_looking) {
|
||||
int8_t direction = 1;
|
||||
size_t start = MP_STATE_MEM(gc_first_free_atb_index);
|
||||
size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1;
|
||||
size_t first_free = MP_STATE_MEM(gc_first_free_atb_index)[bucket];
|
||||
size_t start = first_free;
|
||||
if (long_lived) {
|
||||
direction = -1;
|
||||
start = MP_STATE_MEM(gc_last_free_atb_index);
|
||||
}
|
||||
n_free = 0;
|
||||
// look for a run of n_blocks available blocks
|
||||
for (size_t i = start; keep_looking && MP_STATE_MEM(gc_first_free_atb_index) <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
|
||||
for (size_t i = start; keep_looking && first_free <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
|
||||
byte a = MP_STATE_MEM(gc_alloc_table_start)[i];
|
||||
// Four ATB states are packed into a single byte.
|
||||
int j = 0;
|
||||
@ -565,22 +573,24 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {
|
||||
|
||||
// Found free space ending at found_block inclusive.
|
||||
// Also, set last free ATB index to block after last block we found, for start of
|
||||
// next scan. To reduce fragmentation, we only do this if we were looking
|
||||
// for a single free block, which guarantees that there are no free blocks
|
||||
// before this one. Also, whenever we free or shrink a block we must check
|
||||
// if this index needs adjusting (see gc_realloc and gc_free).
|
||||
// next scan. Also, whenever we free or shrink a block we must check if this index needs
|
||||
// adjusting (see gc_realloc and gc_free).
|
||||
if (!long_lived) {
|
||||
end_block = found_block;
|
||||
start_block = found_block - n_free + 1;
|
||||
if (n_blocks == 1) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index) = (found_block + 1) / BLOCKS_PER_ATB;
|
||||
if (n_blocks < MICROPY_ATB_INDICES) {
|
||||
size_t next_free_atb = (found_block + n_blocks) / BLOCKS_PER_ATB;
|
||||
// Update all atb indices for larger blocks too.
|
||||
for (size_t i = n_blocks - 1; i < MICROPY_ATB_INDICES; i++) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index)[i] = next_free_atb;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
start_block = found_block;
|
||||
end_block = found_block + n_free - 1;
|
||||
if (n_blocks == 1) {
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
|
||||
}
|
||||
// Always update the bounds of the long lived area because we assume it is contiguous. (It
|
||||
// can still be reset by a sweep.)
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
|
||||
}
|
||||
|
||||
#ifdef LOG_HEAP_ACTIVITY
|
||||
@ -676,30 +686,37 @@ void gc_free(void *ptr) {
|
||||
}
|
||||
// get the GC block number corresponding to this pointer
|
||||
assert(VERIFY_PTR(ptr));
|
||||
size_t block = BLOCK_FROM_PTR(ptr);
|
||||
assert(ATB_GET_KIND(block) == AT_HEAD);
|
||||
size_t start_block = BLOCK_FROM_PTR(ptr);
|
||||
assert(ATB_GET_KIND(start_block) == AT_HEAD);
|
||||
|
||||
#if MICROPY_ENABLE_FINALISER
|
||||
FTB_CLEAR(block);
|
||||
FTB_CLEAR(start_block);
|
||||
#endif
|
||||
|
||||
// set the last_free pointer to this block if it's earlier in the heap
|
||||
if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index) = block / BLOCKS_PER_ATB;
|
||||
}
|
||||
if (block / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = block / BLOCKS_PER_ATB;
|
||||
}
|
||||
|
||||
// free head and all of its tail blocks
|
||||
#ifdef LOG_HEAP_ACTIVITY
|
||||
gc_log_change(block, 0);
|
||||
#endif
|
||||
#ifdef LOG_HEAP_ACTIVITY
|
||||
gc_log_change(start_block, 0);
|
||||
#endif
|
||||
size_t block = start_block;
|
||||
do {
|
||||
ATB_ANY_TO_FREE(block);
|
||||
block += 1;
|
||||
} while (ATB_GET_KIND(block) == AT_TAIL);
|
||||
|
||||
// Update the first free pointer for our size only. Not much calls gc_free directly so there
|
||||
// is decent chance we'll want to allocate this size again. By only updating the specific
|
||||
// size we don't risk something smaller fitting in.
|
||||
size_t n_blocks = block - start_block;
|
||||
size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1;
|
||||
size_t new_free_atb = start_block / BLOCKS_PER_ATB;
|
||||
if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb;
|
||||
}
|
||||
// set the last_free pointer to this block if it's earlier in the heap
|
||||
if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) {
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb;
|
||||
}
|
||||
|
||||
GC_EXIT();
|
||||
|
||||
#if EXTENSIVE_HEAP_PROFILING
|
||||
@ -870,11 +887,13 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
|
||||
}
|
||||
|
||||
// set the last_free pointer to end of this block if it's earlier in the heap
|
||||
if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
|
||||
size_t new_free_atb = (block + new_blocks) / BLOCKS_PER_ATB;
|
||||
size_t bucket = MIN(n_blocks - new_blocks, MICROPY_ATB_INDICES) - 1;
|
||||
if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) {
|
||||
MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb;
|
||||
}
|
||||
if ((block + new_blocks) / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
|
||||
if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) {
|
||||
MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb;
|
||||
}
|
||||
|
||||
GC_EXIT();
|
||||
|
@ -244,6 +244,14 @@
|
||||
#define alloca(x) m_malloc(x)
|
||||
#endif
|
||||
|
||||
// Number of atb indices to cache. Allocations of fewer blocks will be faster
|
||||
// because the search will be accelerated by the index cache. This only applies
|
||||
// to short lived allocations because we assume the long lived allocations are
|
||||
// contiguous.
|
||||
#ifndef MICROPY_ATB_INDICES
|
||||
#define MICROPY_ATB_INDICES (8)
|
||||
#endif
|
||||
|
||||
/*****************************************************************************/
|
||||
/* MicroPython emitters */
|
||||
|
||||
|
@ -92,7 +92,7 @@ typedef struct _mp_state_mem_t {
|
||||
size_t gc_alloc_threshold;
|
||||
#endif
|
||||
|
||||
size_t gc_first_free_atb_index;
|
||||
size_t gc_first_free_atb_index[MICROPY_ATB_INDICES];
|
||||
size_t gc_last_free_atb_index;
|
||||
|
||||
#if MICROPY_PY_GC_COLLECT_RETVAL
|
||||
|
@ -13,7 +13,7 @@ correct port. GDB is usually :3333 and JLink is :2331.
|
||||
Now, run gdb from your port directory:
|
||||
|
||||
```
|
||||
arm-none-eabi-gdb -x ../tools/output_gc_until_repl.txt build-metro_m0_express/firmware.elf
|
||||
arm-none-eabi-gdb -x ../../tools/output_gc_until_repl.txt build-metro_m0_express/firmware.elf
|
||||
```
|
||||
|
||||
This will take a little time while it breaks, backtraces and continues for every
|
||||
|
141
tools/gc_activity_between_collects.py
Normal file
141
tools/gc_activity_between_collects.py
Normal file
@ -0,0 +1,141 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
# Map start block to current allocation info.
|
||||
current_heap = {}
|
||||
allocation_history = []
|
||||
root = {}
|
||||
|
||||
def change_root(trace, size):
|
||||
level = root
|
||||
for frame in reversed(trace):
|
||||
file_location = frame[1]
|
||||
if file_location not in level:
|
||||
level[file_location] = {"blocks": 0,
|
||||
"file": file_location,
|
||||
"function": frame[2],
|
||||
"subcalls": {}}
|
||||
level[file_location]["blocks"] += size
|
||||
level = level[file_location]["subcalls"]
|
||||
|
||||
total_actions = 0
|
||||
non_single_block_streak = 0
|
||||
max_nsbs = 0
|
||||
last_action = None
|
||||
last_total_actions = 0
|
||||
count = 0
|
||||
actions = {}
|
||||
last_ticks_ms = 0
|
||||
ticks_ms = 0
|
||||
block_sizes = {}
|
||||
allocation_sources = {}
|
||||
with open(sys.argv[1], "r") as f:
|
||||
for line in f:
|
||||
if not line.strip():
|
||||
break
|
||||
for line in f:
|
||||
action = None
|
||||
if line.startswith("Breakpoint 2"):
|
||||
break
|
||||
next(f) # throw away breakpoint code line
|
||||
# print(next(f)) # first frame
|
||||
block = 0
|
||||
size = 0
|
||||
trace = []
|
||||
for line in f:
|
||||
# print(line.strip())
|
||||
if line[0] == "#":
|
||||
frame = line.strip().split()
|
||||
if frame[1].startswith("0x"):
|
||||
trace.append((frame[1], frame[-1], frame[3]))
|
||||
else:
|
||||
trace.append(("0x0", frame[-1], frame[1]))
|
||||
elif line[0] == "$":
|
||||
#print(line.strip().split()[-1])
|
||||
block = int(line.strip().split()[-1][2:], 16)
|
||||
next_line = next(f)
|
||||
size = int(next_line.strip().split()[-1][2:], 16)
|
||||
# next_line = next(f)
|
||||
# ticks_ms = int(next_line.strip().split()[-1][2:], 16)
|
||||
if not line.strip():
|
||||
break
|
||||
|
||||
action = "unknown"
|
||||
if block not in current_heap:
|
||||
current_heap[block] = {"start_block": block, "size": size, "start_trace": trace, "start_time": total_actions}
|
||||
action = "alloc"
|
||||
if size == 1:
|
||||
max_nsbs = max(max_nsbs, non_single_block_streak)
|
||||
non_single_block_streak = 0
|
||||
else:
|
||||
non_single_block_streak += 1
|
||||
#change_root(trace, size)
|
||||
if size not in block_sizes:
|
||||
block_sizes[size] = 0
|
||||
source = trace[-1][-1]
|
||||
if source not in allocation_sources:
|
||||
print(trace)
|
||||
allocation_sources[source] = 0
|
||||
allocation_sources[source] += 1
|
||||
block_sizes[size] += 1
|
||||
else:
|
||||
alloc = current_heap[block]
|
||||
alloc["end_trace"] = trace
|
||||
alloc["end_time"] = total_actions
|
||||
change_root(alloc["start_trace"], -1 * alloc["size"])
|
||||
if size > 0:
|
||||
action = "realloc"
|
||||
current_heap[block] = {"start_block": block, "size": size, "start_trace": trace, "start_time": total_actions}
|
||||
#change_root(trace, size)
|
||||
else:
|
||||
action = "free"
|
||||
if trace[0][2] == "gc_sweep":
|
||||
action = "sweep"
|
||||
non_single_block_streak = 0
|
||||
if (trace[3][2] == "py_gc_collect" or (trace[3][2] == "gc_deinit" and count > 1)) and last_action != "sweep":
|
||||
print(ticks_ms - last_ticks_ms, total_actions - last_total_actions, "gc.collect", max_nsbs)
|
||||
print(actions)
|
||||
print(block_sizes)
|
||||
print(allocation_sources)
|
||||
actions = {}
|
||||
block_sizes = {}
|
||||
allocation_sources = {}
|
||||
if count % 2 == 0:
|
||||
print()
|
||||
count += 1
|
||||
last_total_actions = total_actions
|
||||
last_ticks_ms = ticks_ms
|
||||
max_nsbs = 0
|
||||
del current_heap[block]
|
||||
alloc["end_cause"] = action
|
||||
allocation_history.append(alloc)
|
||||
if action not in actions:
|
||||
actions[action] = 0
|
||||
actions[action] += 1
|
||||
last_action = action
|
||||
#print(total_actions, non_single_block_streak, action, block, size)
|
||||
total_actions += 1
|
||||
print(actions)
|
||||
print(max_nsbs)
|
||||
print()
|
||||
|
||||
for alloc in current_heap.values():
|
||||
alloc["end_trace"] = ""
|
||||
alloc["end_time"] = total_actions
|
||||
allocation_history.append(alloc)
|
||||
|
||||
def print_frame(frame, indent=0):
|
||||
for key in sorted(frame):
|
||||
if not frame[key]["blocks"] or key.startswith("../py/malloc.c") or key.startswith("../py/gc.c"):
|
||||
continue
|
||||
print(" " * (indent - 1), key, frame[key]["function"], frame[key]["blocks"], "blocks")
|
||||
print_frame(frame[key]["subcalls"], indent + 2)
|
||||
|
||||
# print_frame(root)
|
||||
# total_blocks = 0
|
||||
# for key in sorted(root):
|
||||
# total_blocks += root[key]["blocks"]
|
||||
# print(total_blocks, "total blocks")
|
||||
|
||||
# with open("allocation_history.json", "w") as f:
|
||||
# json.dump(allocation_history, f)
|
@ -10,16 +10,23 @@ set logging on
|
||||
set remote hardware-breakpoint-limit 4
|
||||
|
||||
# gc log
|
||||
break gc.c:103
|
||||
break gc.c:106
|
||||
commands
|
||||
backtrace
|
||||
p/x start_block
|
||||
p/x length
|
||||
append binary memory ram.bin &_srelocate &_estack
|
||||
p/x ticks_ms
|
||||
# backtrace output redirect is currently broken in gdb so we use up instead.
|
||||
# https://sourceware.org/bugzilla/show_bug.cgi?id=23439
|
||||
# backtrace
|
||||
up
|
||||
up
|
||||
up
|
||||
up
|
||||
# append binary memory ram.bin &_srelocate &_estack
|
||||
continue
|
||||
end
|
||||
|
||||
break main.c:179
|
||||
break main.c:251
|
||||
|
||||
continue
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user