Slim down stack frames

This reduces the stack frame size of mp_builtin___import__ by
limiting the support path length of files from 256 to 96. This
function can be called recursively for nested imports so it adds up.

Also reduce mp_execute_bytecode (vm.c) from 206 a bc call to 124.
This too is recursive and adds up. It is reduced by preventing
some inlining. It may decrease performance slightly when importing
and unpacking.

Adds two new scripts for debugging. One is used from gdb to print
frame sizes in a backtrace. The other prints what pcs use a
particular stack offset. This helps find infrequently used stack
space.

Fixes #8053.
This commit is contained in:
Scott Shawcroft 2023-06-06 16:20:47 -07:00
parent 475ffc3925
commit dd71ae10b9
No known key found for this signature in database
GPG Key ID: 0DFD512649C052DA
4 changed files with 99 additions and 7 deletions

View File

@ -59,8 +59,8 @@ extern void common_hal_mcu_enable_interrupts(void);
//
// default is 128; consider raising to reduce fragmentation.
#define MICROPY_ALLOC_PARSE_CHUNK_INIT (16)
// default is 512.
#define MICROPY_ALLOC_PATH_MAX (256)
// default is 512. Longest path in .py bundle as of June 6th, 2023 is 73 characters.
#define MICROPY_ALLOC_PATH_MAX (96)
#define MICROPY_CAN_OVERRIDE_BUILTINS (1)
#define MICROPY_COMP_CONST (1)
#define MICROPY_COMP_DOUBLE_TUPLE_ASSIGN (1)

View File

@ -201,7 +201,7 @@ mp_obj_t MICROPY_WRAP_MP_LOAD_GLOBAL(mp_load_global)(qstr qst) {
return elem->value;
}
mp_obj_t mp_load_build_class(void) {
mp_obj_t __attribute__((noinline)) mp_load_build_class(void) {
DEBUG_OP_printf("load_build_class\n");
#if MICROPY_CAN_OVERRIDE_BUILTINS
if (MP_STATE_VM(mp_module_builtins_override_dict) != NULL) {
@ -858,7 +858,7 @@ mp_obj_t mp_call_method_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ob
}
// unpacked items are stored in reverse order into the array pointed to by items
void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
void __attribute__((noinline,)) mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
size_t seq_len;
if (mp_obj_is_type(seq_in, &mp_type_tuple) || mp_obj_is_type(seq_in, &mp_type_list)) {
mp_obj_t *seq_items;
@ -905,7 +905,7 @@ too_long:
}
// unpacked items are stored in reverse order into the array pointed to by items
void mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) {
void __attribute__((noinline)) mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) {
size_t num_left = num_in & 0xff;
size_t num_right = (num_in >> 8) & 0xff;
DEBUG_OP_printf("unpack ex " UINT_FMT " " UINT_FMT "\n", num_left, num_right);
@ -1482,7 +1482,7 @@ mp_obj_t mp_import_name(qstr name, mp_obj_t fromlist, mp_obj_t level) {
return mp_builtin___import__(5, args);
}
mp_obj_t mp_import_from(mp_obj_t module, qstr name) {
mp_obj_t __attribute__((noinline,)) mp_import_from(mp_obj_t module, qstr name) {
DEBUG_printf("import from %p %s\n", module, qstr_str(name));
mp_obj_t dest[2];
@ -1528,7 +1528,7 @@ mp_obj_t mp_import_from(mp_obj_t module, qstr name) {
#endif
}
void mp_import_all(mp_obj_t module) {
void __attribute__((noinline)) mp_import_all(mp_obj_t module) {
DEBUG_printf("import all %p\n", module);
// TODO: Support __all__

64
tools/gdb-stack-size.py Normal file
View File

@ -0,0 +1,64 @@
"""Source this file into gdb `source ../../tools/gdb-stack-size.py` then run
`stack-size` to print a backtrace with each frame size next to it."""
class StackSize(gdb.Command):
def __init__(self):
super(StackSize, self).__init__("stack-size", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
frame = gdb.newest_frame()
total_size = 0
while frame:
sp = frame.read_register("sp")
frame_up = frame.older()
if not frame_up:
break
f = frame.function()
l = frame.level()
if l < 10:
l = "#" + str(l) + " "
else:
l = "#" + str(l)
size = frame_up.read_register("sp") - sp
total_size += size
print(l, sp, frame.type(), f, " " * (40 - len(str(f))), size)
# print(dir(f))
# Tweak this if for more detail for a specific function.
if False and f.name == "mp_execute_bytecode":
b = frame.block()
prev_b = None
while not b.is_static:
print(" block", hex(b.start), hex(b.end), b.function)
for sym in b:
if not sym.needs_frame:
continue
v = sym.value(frame)
print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v)
prev_b = b
b = b.superblock
if b.function == f:
break
b = prev_b
print("pc scan", hex(b.start), hex(b.end))
seen = set()
for pc in range(b.start, b.end, 2):
b = gdb.block_for_pc(pc)
r = (b.start, b.end)
if r in seen:
continue
seen.add(r)
print(" ", hex(pc), hex(b.start), hex(b.end), b.function)
for sym in b:
if not sym.needs_frame:
continue
# if sym.type.sizeof <= 4:
# continue
v = sym.value(frame)
print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v)
frame = frame_up
print("total size:", total_size)
StackSize()

28
tools/stack-loc-to-pc.py Normal file
View File

@ -0,0 +1,28 @@
"""Prints the pcs that access each stack location in a function. Useful for finding
infrequently used stack space.
Pipe in disassembly like so:
arm-none-eabi-objdump --disassemble=mp_execute_bytecode build-metro_m0_express/firmware.elf | python ../../tools/stack-loc-to-pc.py
"""
import sys
import re
offset = re.compile(r"sp, #(\d+)")
offsets = {}
for line in sys.stdin:
if "sp" in line:
m = offset.search(line)
o = int(m.groups()[0])
pc = line.split(":")[0]
if o not in offsets:
offsets[o] = []
offsets[o].append(pc.strip())
print("Offset", "Size", "PCs", sep="\t")
last_o = 0
for o in sorted(offsets):
print(o, o - last_o, offsets[o], sep="\t")
last_o = o