From a8a5d1e8c8db3b7c64e1921005ceb5a5d47280f4 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 9 Jun 2017 13:31:57 +1000 Subject: [PATCH] py: Provide mp_decode_uint_skip() to help reduce stack usage. Taking the address of a local variable leads to increased stack usage, so the mp_decode_uint_skip() function is added to reduce the need for taking addresses. The changes in this patch reduce stack usage of a Python call by 8 bytes on ARM Thumb, by 16 bytes on non-windowing Xtensa archs, and by 16 bytes on x86-64. Code size is also slightly reduced on most archs by around 32 bytes. --- py/bc.c | 10 +++++++++- py/bc.h | 1 + py/objfun.c | 22 ++++++++-------------- py/objgenerator.c | 7 ++----- py/vm.c | 13 ++++++++----- 5 files changed, 28 insertions(+), 25 deletions(-) diff --git a/py/bc.c b/py/bc.c index fc17946839..2e481bce77 100644 --- a/py/bc.c +++ b/py/bc.c @@ -64,6 +64,14 @@ mp_uint_t mp_decode_uint_value(const byte *ptr) { return mp_decode_uint(&ptr); } +// This function is used to help reduce stack usage at the caller, for the case when +// the caller doesn't need the actual value and just wants to skip over it. +const byte *mp_decode_uint_skip(const byte *ptr) { + while ((*ptr++) & 0x80) { + } + return ptr; +} + STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, size_t expected, size_t given) { #if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE // generic message, used also for other argument issues @@ -115,7 +123,7 @@ void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw // get params size_t n_state = mp_decode_uint(&code_state->ip); - mp_decode_uint(&code_state->ip); // skip n_exc_stack + code_state->ip = mp_decode_uint_skip(code_state->ip); // skip n_exc_stack size_t scope_flags = *code_state->ip++; size_t n_pos_args = *code_state->ip++; size_t n_kwonly_args = *code_state->ip++; diff --git a/py/bc.h b/py/bc.h index e8d4286125..88045dc55b 100644 --- a/py/bc.h +++ b/py/bc.h @@ -92,6 +92,7 @@ typedef struct _mp_code_state_t { mp_uint_t mp_decode_uint(const byte **ptr); mp_uint_t mp_decode_uint_value(const byte *ptr); +const byte *mp_decode_uint_skip(const byte *ptr); mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp_obj_t inject_exc); mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, size_t n_args, size_t n_kw, const mp_obj_t *args); diff --git a/py/objfun.c b/py/objfun.c index 08d031c8d8..9f35891243 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -141,11 +141,11 @@ const mp_obj_type_t mp_type_fun_builtin_var = { /* byte code functions */ qstr mp_obj_code_get_name(const byte *code_info) { - mp_decode_uint(&code_info); // skip code_info_size entry + code_info = mp_decode_uint_skip(code_info); // skip code_info_size entry #if MICROPY_PERSISTENT_CODE return code_info[0] | (code_info[1] << 8); #else - return mp_decode_uint(&code_info); + return mp_decode_uint_value(code_info); #endif } @@ -163,8 +163,8 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) { #endif const byte *bc = fun->bytecode; - mp_decode_uint(&bc); // skip n_state - mp_decode_uint(&bc); // skip n_exc_stack + bc = mp_decode_uint_skip(bc); // skip n_state + bc = mp_decode_uint_skip(bc); // skip n_exc_stack bc++; // skip scope_params bc++; // skip n_pos_args bc++; // skip n_kwonly_args @@ -205,12 +205,9 @@ mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, size_t n_args MP_STACK_CHECK(); mp_obj_fun_bc_t *self = MP_OBJ_TO_PTR(self_in); - // get start of bytecode - const byte *ip = self->bytecode; - // bytecode prelude: state size and exception stack size - size_t n_state = mp_decode_uint(&ip); - size_t n_exc_stack = mp_decode_uint(&ip); + size_t n_state = mp_decode_uint_value(self->bytecode); + size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self->bytecode)); // allocate state for locals and stack size_t state_size = n_state * sizeof(mp_obj_t) + n_exc_stack * sizeof(mp_exc_stack_t); @@ -243,12 +240,9 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_fun_bc_t *self = MP_OBJ_TO_PTR(self_in); DEBUG_printf("Func n_def_args: %d\n", self->n_def_args); - // get start of bytecode - const byte *ip = self->bytecode; - // bytecode prelude: state size and exception stack size - size_t n_state = mp_decode_uint(&ip); - size_t n_exc_stack = mp_decode_uint(&ip); + size_t n_state = mp_decode_uint_value(self->bytecode); + size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self->bytecode)); #if VM_DETECT_STACK_OVERFLOW n_state += 1; diff --git a/py/objgenerator.c b/py/objgenerator.c index 2e57fdf4b6..8cb0e60ccb 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -54,12 +54,9 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t*)self->fun; assert(self_fun->base.type == &mp_type_fun_bc); - // get start of bytecode - const byte *ip = self_fun->bytecode; - // bytecode prelude: get state size and exception stack size - mp_uint_t n_state = mp_decode_uint(&ip); - mp_uint_t n_exc_stack = mp_decode_uint(&ip); + size_t n_state = mp_decode_uint_value(self_fun->bytecode); + size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self_fun->bytecode)); // allocate the generator object, with room for local stack and exception stack mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, byte, diff --git a/py/vm.c b/py/vm.c index ad3d9e29c8..404c799123 100644 --- a/py/vm.c +++ b/py/vm.c @@ -1363,22 +1363,25 @@ unwind_loop: // TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj) if (nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) { const byte *ip = code_state->fun_bc->bytecode; - mp_decode_uint(&ip); // skip n_state - mp_decode_uint(&ip); // skip n_exc_stack + ip = mp_decode_uint_skip(ip); // skip n_state + ip = mp_decode_uint_skip(ip); // skip n_exc_stack ip++; // skip scope_params ip++; // skip n_pos_args ip++; // skip n_kwonly_args ip++; // skip n_def_pos_args size_t bc = code_state->ip - ip; - size_t code_info_size = mp_decode_uint(&ip); + size_t code_info_size = mp_decode_uint_value(ip); + ip = mp_decode_uint_skip(ip); // skip code_info_size bc -= code_info_size; #if MICROPY_PERSISTENT_CODE qstr block_name = ip[0] | (ip[1] << 8); qstr source_file = ip[2] | (ip[3] << 8); ip += 4; #else - qstr block_name = mp_decode_uint(&ip); - qstr source_file = mp_decode_uint(&ip); + qstr block_name = mp_decode_uint_value(ip); + ip = mp_decode_uint_skip(ip); + qstr source_file = mp_decode_uint_value(ip); + ip = mp_decode_uint_skip(ip); #endif size_t source_line = 1; size_t c;