From b534e1b9f14c189d2cef209d40f598e62164694a Mon Sep 17 00:00:00 2001 From: Damien George Date: Thu, 4 Sep 2014 14:44:01 +0100 Subject: [PATCH] py: Use variable length encoded uints in more places in bytecode. Code-info size, block name, source name, n_state and n_exc_stack now use variable length encoded uints. This saves 7-9 bytes per bytecode function for most functions. --- py/bc.c | 239 ++++++++++++++++++++++++++++++++++++++++++++++ py/bc.h | 2 + py/emitbc.c | 97 +++++++++++-------- py/objfun.c | 186 ++---------------------------------- py/objgenerator.c | 15 ++- py/py.mk | 1 + py/showbc.c | 14 +-- py/vm.c | 19 ++-- 8 files changed, 329 insertions(+), 244 deletions(-) create mode 100644 py/bc.c diff --git a/py/bc.c b/py/bc.c new file mode 100644 index 0000000000..e9cba3823d --- /dev/null +++ b/py/bc.c @@ -0,0 +1,239 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2014 Damien P. George + * Copyright (c) 2014 Paul Sokolovsky + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include "mpconfig.h" +#include "nlr.h" +#include "misc.h" +#include "qstr.h" +#include "obj.h" +#include "objtuple.h" +#include "objfun.h" +#include "runtime0.h" +#include "runtime.h" +#include "bc.h" +#include "stackctrl.h" + +#if 0 // print debugging info +#define DEBUG_PRINT (1) +#else // don't print debugging info +#define DEBUG_printf(...) (void)0 +#endif + +mp_uint_t mp_decode_uint(const byte **ptr) { + mp_uint_t unum = 0; + byte val; + const byte *p = *ptr; + do { + val = *p++; + unum = (unum << 7) | (val & 0x7f); + } while ((val & 0x80) != 0); + *ptr = p; + return unum; +} + +STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, mp_uint_t expected, mp_uint_t given) { +#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE + // Generic message, to be reused for other argument issues + nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, + "argument num/types mismatch")); +#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, + "function takes %d positional arguments but %d were given", expected, given)); +#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_DETAILED + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, + "%s() takes %d positional arguments but %d were given", + mp_obj_fun_get_name(f), expected, given)); +#endif +} + +#if DEBUG_PRINT +STATIC void dump_args(const mp_obj_t *a, int sz) { + DEBUG_printf("%p: ", a); + for (int i = 0; i < sz; i++) { + DEBUG_printf("%p ", a[i]); + } + DEBUG_printf("\n"); +} +#else +#define dump_args(...) (void)0 +#endif + +// code_state should have ->ip filled in (pointing past code info block), +// as well as ->n_state. +void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) { + // This function is pretty complicated. It's main aim is to be efficient in speed and RAM + // usage for the common case of positional only args. + mp_obj_fun_bc_t *self = self_in; + mp_uint_t n_state = code_state->n_state; + const byte *ip = code_state->ip; + + code_state->code_info = self->bytecode; + code_state->sp = &code_state->state[0] - 1; + code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1; + + // zero out the local stack to begin with + memset(code_state->state, 0, n_state * sizeof(*code_state->state)); + + const mp_obj_t *kwargs = args + n_args; + + // var_pos_kw_args points to the stack where the var-args tuple, and var-kw dict, should go (if they are needed) + mp_obj_t *var_pos_kw_args = &code_state->state[n_state - 1 - self->n_pos_args - self->n_kwonly_args]; + + // check positional arguments + + if (n_args > self->n_pos_args) { + // given more than enough arguments + if (!self->takes_var_args) { + fun_pos_args_mismatch(self, self->n_pos_args, n_args); + } + // put extra arguments in varargs tuple + *var_pos_kw_args-- = mp_obj_new_tuple(n_args - self->n_pos_args, args + self->n_pos_args); + n_args = self->n_pos_args; + } else { + if (self->takes_var_args) { + DEBUG_printf("passing empty tuple as *args\n"); + *var_pos_kw_args-- = mp_const_empty_tuple; + } + // Apply processing and check below only if we don't have kwargs, + // otherwise, kw handling code below has own extensive checks. + if (n_kw == 0 && !self->has_def_kw_args) { + if (n_args >= self->n_pos_args - self->n_def_args) { + // given enough arguments, but may need to use some default arguments + for (mp_uint_t i = n_args; i < self->n_pos_args; i++) { + code_state->state[n_state - 1 - i] = self->extra_args[i - (self->n_pos_args - self->n_def_args)]; + } + } else { + fun_pos_args_mismatch(self, self->n_pos_args - self->n_def_args, n_args); + } + } + } + + // copy positional args into state + for (mp_uint_t i = 0; i < n_args; i++) { + code_state->state[n_state - 1 - i] = args[i]; + } + + // check keyword arguments + + if (n_kw != 0 || self->has_def_kw_args) { + DEBUG_printf("Initial args: "); + dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + + mp_obj_t dict = MP_OBJ_NULL; + if (self->takes_kw_args) { + dict = mp_obj_new_dict(n_kw); // TODO: better go conservative with 0? + *var_pos_kw_args = dict; + } + + for (mp_uint_t i = 0; i < n_kw; i++) { + qstr arg_name = MP_OBJ_QSTR_VALUE(kwargs[2 * i]); + for (mp_uint_t j = 0; j < self->n_pos_args + self->n_kwonly_args; j++) { + if (arg_name == self->args[j]) { + if (code_state->state[n_state - 1 - j] != MP_OBJ_NULL) { + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, + "function got multiple values for argument '%s'", qstr_str(arg_name))); + } + code_state->state[n_state - 1 - j] = kwargs[2 * i + 1]; + goto continue2; + } + } + // Didn't find name match with positional args + if (!self->takes_kw_args) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "function does not take keyword arguments")); + } + mp_obj_dict_store(dict, kwargs[2 * i], kwargs[2 * i + 1]); +continue2:; + } + + DEBUG_printf("Args with kws flattened: "); + dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + + // fill in defaults for positional args + mp_obj_t *d = &code_state->state[n_state - self->n_pos_args]; + mp_obj_t *s = &self->extra_args[self->n_def_args - 1]; + for (int i = self->n_def_args; i > 0; i--, d++, s--) { + if (*d == MP_OBJ_NULL) { + *d = *s; + } + } + + DEBUG_printf("Args after filling default positional: "); + dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + + // Check that all mandatory positional args are specified + while (d < &code_state->state[n_state]) { + if (*d++ == MP_OBJ_NULL) { + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, + "function missing required positional argument #%d", &code_state->state[n_state] - d)); + } + } + + // Check that all mandatory keyword args are specified + // Fill in default kw args if we have them + for (mp_uint_t i = 0; i < self->n_kwonly_args; i++) { + if (code_state->state[n_state - 1 - self->n_pos_args - i] == MP_OBJ_NULL) { + mp_map_elem_t *elem = NULL; + if (self->has_def_kw_args) { + elem = mp_map_lookup(&((mp_obj_dict_t*)self->extra_args[self->n_def_args])->map, MP_OBJ_NEW_QSTR(self->args[self->n_pos_args + i]), MP_MAP_LOOKUP); + } + if (elem != NULL) { + code_state->state[n_state - 1 - self->n_pos_args - i] = elem->value; + } else { + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, + "function missing required keyword argument '%s'", qstr_str(self->args[self->n_pos_args + i]))); + } + } + } + + } else { + // no keyword arguments given + if (self->n_kwonly_args != 0) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, + "function missing keyword-only argument")); + } + if (self->takes_kw_args) { + *var_pos_kw_args = mp_obj_new_dict(0); + } + } + + // bytecode prelude: initialise closed over variables + for (mp_uint_t n_local = *ip++; n_local > 0; n_local--) { + mp_uint_t local_num = *ip++; + code_state->state[n_state - 1 - local_num] = mp_obj_new_cell(code_state->state[n_state - 1 - local_num]); + } + + // now that we skipped over the prelude, set the ip for the VM + code_state->ip = ip; + + DEBUG_printf("Calling: n_pos_args=%d, n_kwonly_args=%d\n", self->n_pos_args, self->n_kwonly_args); + dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + dump_args(code_state->state, n_state); +} diff --git a/py/bc.h b/py/bc.h index 4793174997..b92342d84d 100644 --- a/py/bc.h +++ b/py/bc.h @@ -49,6 +49,8 @@ typedef struct _mp_code_state { //mp_exc_stack_t exc_state[0]; } mp_code_state; +mp_uint_t mp_decode_uint(const byte **ptr); + mp_vm_return_kind_t mp_execute_bytecode(mp_code_state *code_state, volatile mp_obj_t inject_exc); void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args); void mp_bytecode_print(const void *descr, const byte *code, int len); diff --git a/py/emitbc.c b/py/emitbc.c index a1eacb2986..eada190e6f 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -85,6 +85,22 @@ void emit_bc_free(emit_t *emit) { m_del_obj(emit_t, emit); } +STATIC void emit_write_uint(emit_t* emit, byte*(*allocator)(emit_t*, int), mp_uint_t val) { + // We store each 7 bits in a separate byte, and that's how many bytes needed + byte buf[BYTES_FOR_INT]; + byte *p = buf + sizeof(buf); + // We encode in little-ending order, but store in big-endian, to help decoding + do { + *--p = val & 0x7f; + val >>= 7; + } while (val != 0); + byte* c = allocator(emit, buf + sizeof(buf) - p); + while (p != buf + sizeof(buf) - 1) { + *c++ = *p++ | 0x80; + } + *c = *p; +} + // all functions must go through this one to emit code info STATIC byte* emit_get_cur_to_write_code_info(emit_t* emit, int num_bytes_to_write) { //printf("emit %d\n", num_bytes_to_write); @@ -103,13 +119,12 @@ STATIC void emit_align_code_info_to_machine_word(emit_t* emit) { emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1)); } -STATIC void emit_write_code_info_qstr(emit_t* emit, qstr qstr) { - byte* c = emit_get_cur_to_write_code_info(emit, 4); - // TODO variable length encoding for qstr - c[0] = qstr & 0xff; - c[1] = (qstr >> 8) & 0xff; - c[2] = (qstr >> 16) & 0xff; - c[3] = (qstr >> 24) & 0xff; +STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) { + emit_write_uint(emit, emit_get_cur_to_write_code_info, val); +} + +STATIC void emit_write_code_info_qstr(emit_t* emit, qstr qst) { + emit_write_uint(emit, emit_get_cur_to_write_code_info, qst); } #if MICROPY_ENABLE_SOURCE_LINE @@ -160,6 +175,10 @@ STATIC void emit_write_bytecode_byte(emit_t* emit, byte b1) { c[0] = b1; } +STATIC void emit_write_bytecode_uint(emit_t* emit, mp_uint_t val) { + emit_write_uint(emit, emit_get_cur_to_write_bytecode, val); +} + STATIC void emit_write_bytecode_byte_byte(emit_t* emit, byte b1, uint b2) { assert((b2 & (~0xff)) == 0); byte* c = emit_get_cur_to_write_bytecode(emit, 2); @@ -167,22 +186,6 @@ STATIC void emit_write_bytecode_byte_byte(emit_t* emit, byte b1, uint b2) { c[1] = b2; } -STATIC void emit_write_bytecode_uint(emit_t* emit, uint num) { - // We store each 7 bits in a separate byte, and that's how many bytes needed - byte buf[BYTES_FOR_INT]; - byte *p = buf + sizeof(buf); - // We encode in little-ending order, but store in big-endian, to help decoding - do { - *--p = num & 0x7f; - num >>= 7; - } while (num != 0); - byte* c = emit_get_cur_to_write_bytecode(emit, buf + sizeof(buf) - p); - while (p != buf + sizeof(buf) - 1) { - *c++ = *p++ | 0x80; - } - *c = *p; -} - // Similar to emit_write_bytecode_uint(), just some extra handling to encode sign STATIC void emit_write_bytecode_byte_int(emit_t* emit, byte b1, mp_int_t num) { emit_write_bytecode_byte(emit, b1); @@ -210,9 +213,9 @@ STATIC void emit_write_bytecode_byte_int(emit_t* emit, byte b1, mp_int_t num) { *c = *p; } -STATIC void emit_write_bytecode_byte_uint(emit_t* emit, byte b, uint num) { +STATIC void emit_write_bytecode_byte_uint(emit_t* emit, byte b, mp_uint_t val) { emit_write_bytecode_byte(emit, b); - emit_write_bytecode_uint(emit, num); + emit_write_uint(emit, emit_get_cur_to_write_bytecode, val); } // aligns the pointer so it is friendly to GC @@ -281,23 +284,18 @@ STATIC void emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit->bytecode_offset = 0; emit->code_info_offset = 0; - // write code info size; use maximum space (4 bytes) to write it; TODO possible optimise this - { - byte* c = emit_get_cur_to_write_code_info(emit, 4); - mp_uint_t s = emit->code_info_size; - c[0] = s & 0xff; - c[1] = (s >> 8) & 0xff; - c[2] = (s >> 16) & 0xff; - c[3] = (s >> 24) & 0xff; + // Write code info size as compressed uint. If we are not in the final pass + // then space for this uint is reserved in emit_bc_end_pass. + if (pass == MP_PASS_EMIT) { + emit_write_code_info_uint(emit, emit->code_info_size); } - // code info - emit_write_code_info_qstr(emit, scope->source_file); + // write the name and source file of this function emit_write_code_info_qstr(emit, scope->simple_name); + emit_write_code_info_qstr(emit, scope->source_file); // bytecode prelude: local state size and exception stack size; 16 bit uints for now { - byte* c = emit_get_cur_to_write_bytecode(emit, 4); uint n_state = scope->num_locals + scope->stack_size; if (n_state == 0) { // Need at least 1 entry in the state, in the case an exception is @@ -305,10 +303,8 @@ STATIC void emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { // the highest slot in the state (fastn[0], see vm.c). n_state = 1; } - c[0] = n_state & 0xff; - c[1] = (n_state >> 8) & 0xff; - c[2] = scope->exc_stack_size & 0xff; - c[3] = (scope->exc_stack_size >> 8) & 0xff; + emit_write_bytecode_uint(emit, n_state); + emit_write_bytecode_uint(emit, scope->exc_stack_size); } // bytecode prelude: initialise closed over variables @@ -336,10 +332,27 @@ STATIC void emit_bc_end_pass(emit_t *emit) { } *emit_get_cur_to_write_code_info(emit, 1) = 0; // end of line number info - emit_align_code_info_to_machine_word(emit); // align so that following bytecode is aligned if (emit->pass == MP_PASS_CODE_SIZE) { - // calculate size of code in bytes + // Need to make sure we have enough room in the code-info block to write + // the size of the code-info block. Since the size is written as a + // compressed uint, we don't know its size until we write it! Thus, we + // take the biggest possible value it could be and write that here. + // Then there will be enough room to write the value, and any leftover + // space will be absorbed in the alignment at the end of the code-info + // block. + mp_uint_t max_code_info_size = + emit->code_info_offset // current code-info size + + BYTES_FOR_INT // maximum space for compressed uint + + BYTES_PER_WORD - 1; // maximum space for alignment padding + emit_write_code_info_uint(emit, max_code_info_size); + + // Align code-info so that following bytecode is aligned on a machine word. + // We don't need to write anything here, it's just dead space between the + // code-info block and the bytecode block that follows it. + emit_align_code_info_to_machine_word(emit); + + // calculate size of total code-info + bytecode, in bytes emit->code_info_size = emit->code_info_offset; emit->bytecode_size = emit->bytecode_offset; emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size); diff --git a/py/objfun.c b/py/objfun.c index 2a86033dad..8c08ce78c2 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -143,8 +143,8 @@ mp_obj_t mp_make_function_var_between(int n_args_min, int n_args_max, mp_fun_var /* byte code functions */ const char *mp_obj_code_get_name(const byte *code_info) { - qstr block_name = code_info[8] | (code_info[9] << 8) | (code_info[10] << 16) | (code_info[11] << 24); - return qstr_str(block_name); + mp_decode_uint(&code_info); // skip code_info_size entry + return qstr_str(mp_decode_uint(&code_info)); } const char *mp_obj_fun_get_name(mp_const_obj_t fun_in) { @@ -172,21 +172,6 @@ STATIC void dump_args(const mp_obj_t *a, int sz) { #define dump_args(...) (void)0 #endif -STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, mp_uint_t expected, mp_uint_t given) { -#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE - // Generic message, to be reused for other argument issues - nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, - "argument num/types mismatch")); -#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL - nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "function takes %d positional arguments but %d were given", expected, given)); -#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_DETAILED - nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "%s() takes %d positional arguments but %d were given", - mp_obj_fun_get_name(f), expected, given)); -#endif -} - // With this macro you can tune the maximum number of function state bytes // that will be allocated on the stack. Any function that needs more // than this will use the heap. @@ -195,159 +180,6 @@ STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, mp_uint_t expecte // Set this to enable a simple stack overflow check. #define VM_DETECT_STACK_OVERFLOW (0) -// code_state should have ->ip filled in (pointing past code info block), -// as well as ->n_state. -void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) { - // This function is pretty complicated. It's main aim is to be efficient in speed and RAM - // usage for the common case of positional only args. - mp_obj_fun_bc_t *self = self_in; - mp_uint_t n_state = code_state->n_state; - const byte *ip = code_state->ip; - - code_state->code_info = self->bytecode; - code_state->sp = &code_state->state[0] - 1; - code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1; - - // zero out the local stack to begin with - memset(code_state->state, 0, n_state * sizeof(*code_state->state)); - - const mp_obj_t *kwargs = args + n_args; - - // var_pos_kw_args points to the stack where the var-args tuple, and var-kw dict, should go (if they are needed) - mp_obj_t *var_pos_kw_args = &code_state->state[n_state - 1 - self->n_pos_args - self->n_kwonly_args]; - - // check positional arguments - - if (n_args > self->n_pos_args) { - // given more than enough arguments - if (!self->takes_var_args) { - fun_pos_args_mismatch(self, self->n_pos_args, n_args); - } - // put extra arguments in varargs tuple - *var_pos_kw_args-- = mp_obj_new_tuple(n_args - self->n_pos_args, args + self->n_pos_args); - n_args = self->n_pos_args; - } else { - if (self->takes_var_args) { - DEBUG_printf("passing empty tuple as *args\n"); - *var_pos_kw_args-- = mp_const_empty_tuple; - } - // Apply processing and check below only if we don't have kwargs, - // otherwise, kw handling code below has own extensive checks. - if (n_kw == 0 && !self->has_def_kw_args) { - if (n_args >= self->n_pos_args - self->n_def_args) { - // given enough arguments, but may need to use some default arguments - for (mp_uint_t i = n_args; i < self->n_pos_args; i++) { - code_state->state[n_state - 1 - i] = self->extra_args[i - (self->n_pos_args - self->n_def_args)]; - } - } else { - fun_pos_args_mismatch(self, self->n_pos_args - self->n_def_args, n_args); - } - } - } - - // copy positional args into state - for (mp_uint_t i = 0; i < n_args; i++) { - code_state->state[n_state - 1 - i] = args[i]; - } - - // check keyword arguments - - if (n_kw != 0 || self->has_def_kw_args) { - DEBUG_printf("Initial args: "); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); - - mp_obj_t dict = MP_OBJ_NULL; - if (self->takes_kw_args) { - dict = mp_obj_new_dict(n_kw); // TODO: better go conservative with 0? - *var_pos_kw_args = dict; - } - - for (mp_uint_t i = 0; i < n_kw; i++) { - qstr arg_name = MP_OBJ_QSTR_VALUE(kwargs[2 * i]); - for (mp_uint_t j = 0; j < self->n_pos_args + self->n_kwonly_args; j++) { - if (arg_name == self->args[j]) { - if (code_state->state[n_state - 1 - j] != MP_OBJ_NULL) { - nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "function got multiple values for argument '%s'", qstr_str(arg_name))); - } - code_state->state[n_state - 1 - j] = kwargs[2 * i + 1]; - goto continue2; - } - } - // Didn't find name match with positional args - if (!self->takes_kw_args) { - nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "function does not take keyword arguments")); - } - mp_obj_dict_store(dict, kwargs[2 * i], kwargs[2 * i + 1]); -continue2:; - } - - DEBUG_printf("Args with kws flattened: "); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); - - // fill in defaults for positional args - mp_obj_t *d = &code_state->state[n_state - self->n_pos_args]; - mp_obj_t *s = &self->extra_args[self->n_def_args - 1]; - for (int i = self->n_def_args; i > 0; i--, d++, s--) { - if (*d == MP_OBJ_NULL) { - *d = *s; - } - } - - DEBUG_printf("Args after filling default positional: "); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); - - // Check that all mandatory positional args are specified - while (d < &code_state->state[n_state]) { - if (*d++ == MP_OBJ_NULL) { - nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "function missing required positional argument #%d", &code_state->state[n_state] - d)); - } - } - - // Check that all mandatory keyword args are specified - // Fill in default kw args if we have them - for (mp_uint_t i = 0; i < self->n_kwonly_args; i++) { - if (code_state->state[n_state - 1 - self->n_pos_args - i] == MP_OBJ_NULL) { - mp_map_elem_t *elem = NULL; - if (self->has_def_kw_args) { - elem = mp_map_lookup(&((mp_obj_dict_t*)self->extra_args[self->n_def_args])->map, MP_OBJ_NEW_QSTR(self->args[self->n_pos_args + i]), MP_MAP_LOOKUP); - } - if (elem != NULL) { - code_state->state[n_state - 1 - self->n_pos_args - i] = elem->value; - } else { - nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "function missing required keyword argument '%s'", qstr_str(self->args[self->n_pos_args + i]))); - } - } - } - - } else { - // no keyword arguments given - if (self->n_kwonly_args != 0) { - nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, - "function missing keyword-only argument")); - } - if (self->takes_kw_args) { - *var_pos_kw_args = mp_obj_new_dict(0); - } - } - - // bytecode prelude: initialise closed over variables - for (mp_uint_t n_local = *ip++; n_local > 0; n_local--) { - mp_uint_t local_num = *ip++; - code_state->state[n_state - 1 - local_num] = mp_obj_new_cell(code_state->state[n_state - 1 - local_num]); - } - - // now that we skipped over the prelude, set the ip for the VM - code_state->ip = ip; - - DEBUG_printf("Calling: n_pos_args=%d, n_kwonly_args=%d\n", self->n_pos_args, self->n_kwonly_args); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); - dump_args(code_state->state, n_state); -} - - STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) { MP_STACK_CHECK(); @@ -359,16 +191,14 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, mp_obj_fun_bc_t *self = self_in; DEBUG_printf("Func n_def_args: %d\n", self->n_def_args); - const byte *ip = self->bytecode; - - // get code info size, and skip line number table - mp_uint_t code_info_size = ip[0] | (ip[1] << 8) | (ip[2] << 16) | (ip[3] << 24); - ip += code_info_size; + // skip code-info block + const byte *code_info = self->bytecode; + mp_uint_t code_info_size = mp_decode_uint(&code_info); + const byte *ip = self->bytecode + code_info_size; // bytecode prelude: state size and exception stack size; 16 bit uints - mp_uint_t n_state = ip[0] | (ip[1] << 8); - mp_uint_t n_exc_stack = ip[2] | (ip[3] << 8); - ip += 4; + mp_uint_t n_state = mp_decode_uint(&ip); + mp_uint_t n_exc_stack = mp_decode_uint(&ip); #if VM_DETECT_STACK_OVERFLOW n_state += 1; diff --git a/py/objgenerator.c b/py/objgenerator.c index 7c364a7b2a..9d16d2bee0 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -57,15 +57,14 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t*)self->fun; assert(MP_OBJ_IS_TYPE(self_fun, &mp_type_fun_bc)); - const byte *bytecode = self_fun->bytecode; - // get code info size, and skip the line number table - mp_uint_t code_info_size = bytecode[0] | (bytecode[1] << 8) | (bytecode[2] << 16) | (bytecode[3] << 24); - bytecode += code_info_size; + // skip code-info block + const byte *code_info = self_fun->bytecode; + mp_uint_t code_info_size = mp_decode_uint(&code_info); + const byte *ip = self_fun->bytecode + code_info_size; // bytecode prelude: get state size and exception stack size - mp_uint_t n_state = bytecode[0] | (bytecode[1] << 8); - mp_uint_t n_exc_stack = bytecode[2] | (bytecode[3] << 8); - bytecode += 4; + mp_uint_t n_state = mp_decode_uint(&ip); + mp_uint_t n_exc_stack = mp_decode_uint(&ip); // allocate the generator object, with room for local stack and exception stack mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, byte, @@ -74,7 +73,7 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw o->globals = self_fun->globals; o->code_state.n_state = n_state; - o->code_state.ip = bytecode; + o->code_state.ip = ip; mp_setup_code_state(&o->code_state, self_fun, n_args, n_kw, args); return o; } diff --git a/py/py.mk b/py/py.mk index 62a37ddbb7..0720e434b1 100644 --- a/py/py.mk +++ b/py/py.mk @@ -102,6 +102,7 @@ PY_O_BASENAME = \ modstruct.o \ modsys.o \ vm.o \ + bc.o \ showbc.o \ repl.o \ smallint.o \ diff --git a/py/showbc.c b/py/showbc.c index 6c10333c9f..67c5d7adea 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -30,7 +30,10 @@ #include "mpconfig.h" #include "misc.h" #include "qstr.h" +#include "obj.h" +#include "runtime.h" #include "bc0.h" +#include "bc.h" #if MICROPY_DEBUG_PRINTERS @@ -60,20 +63,19 @@ void mp_bytecode_print(const void *descr, const byte *ip, int len) { const byte *ip_start = ip; // get code info size - mp_uint_t code_info_size = ip[0] | (ip[1] << 8) | (ip[2] << 16) | (ip[3] << 24); const byte *code_info = ip; + mp_uint_t code_info_size = mp_decode_uint(&code_info); ip += code_info_size; - qstr source_file = code_info[4] | (code_info[5] << 8) | (code_info[6] << 16) | (code_info[7] << 24); - qstr block_name = code_info[8] | (code_info[9] << 8) | (code_info[10] << 16) | (code_info[11] << 24); + qstr block_name = mp_decode_uint(&code_info); + qstr source_file = mp_decode_uint(&code_info); printf("File %s, code block '%s' (descriptor: %p, bytecode @%p %d bytes)\n", qstr_str(source_file), qstr_str(block_name), descr, code_info, len); // bytecode prelude: state size and exception stack size; 16 bit uints { - uint n_state = ip[0] | (ip[1] << 8); - uint n_exc_stack = ip[2] | (ip[3] << 8); - ip += 4; + uint n_state = mp_decode_uint(&ip); + uint n_exc_stack = mp_decode_uint(&ip); printf("(N_STATE %u)\n", n_state); printf("(N_EXC_STACK %u)\n", n_exc_stack); } diff --git a/py/vm.c b/py/vm.c index 8c59b9c3aa..d959880f12 100644 --- a/py/vm.c +++ b/py/vm.c @@ -921,26 +921,25 @@ exception_handler: // But consider how to handle nested exceptions. // TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj) if (mp_obj_is_exception_instance(nlr.ret_val) && nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) { - const byte *code_info = code_state->code_info; - mp_uint_t code_info_size = code_info[0] | (code_info[1] << 8) | (code_info[2] << 16) | (code_info[3] << 24); - qstr source_file = code_info[4] | (code_info[5] << 8) | (code_info[6] << 16) | (code_info[7] << 24); - qstr block_name = code_info[8] | (code_info[9] << 8) | (code_info[10] << 16) | (code_info[11] << 24); - mp_uint_t bc = code_state->ip - code_info - code_info_size; - //printf("find %lu %d %d\n", bc, code_info[12], code_info[13]); + const byte *ip = code_state->code_info; + mp_uint_t code_info_size = mp_decode_uint(&ip); + qstr block_name = mp_decode_uint(&ip); + qstr source_file = mp_decode_uint(&ip); + mp_uint_t bc = code_state->ip - code_state->code_info - code_info_size; mp_uint_t source_line = 1; mp_uint_t c; - for (const byte *ci = code_info + 12; (c = *ci);) { + while ((c = *ip)) { mp_uint_t b, l; if ((c & 0x80) == 0) { // 0b0LLBBBBB encoding b = c & 0x1f; l = c >> 5; - ci += 1; + ip += 1; } else { // 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte) b = c & 0xf; - l = ((c << 4) & 0x700) | ci[1]; - ci += 2; + l = ((c << 4) & 0x700) | ip[1]; + ip += 2; } if (bc >= b) { bc -= b;