diff --git a/py/bc.c b/py/bc.c index 4a29f439e3..b178e7c202 100644 --- a/py/bc.c +++ b/py/bc.c @@ -382,7 +382,7 @@ STATIC const byte opcode_format_table[64] = { #undef V #undef O -uint mp_opcode_format(const byte *ip, size_t *opcode_size) { +uint mp_opcode_format(const byte *ip, size_t *opcode_size, bool count_var_uint) { uint f = (opcode_format_table[*ip >> 2] >> (2 * (*ip & 3))) & 3; const byte *ip_start = ip; if (f == MP_OPCODE_QSTR) { @@ -403,7 +403,9 @@ uint mp_opcode_format(const byte *ip, size_t *opcode_size) { ); ip += 1; if (f == MP_OPCODE_VAR_UINT) { - while ((*ip++ & 0x80) != 0) { + if (count_var_uint) { + while ((*ip++ & 0x80) != 0) { + } } } else if (f == MP_OPCODE_OFFSET) { ip += 2; diff --git a/py/bc.h b/py/bc.h index ebfdeaac1d..6d86fbdea9 100644 --- a/py/bc.h +++ b/py/bc.h @@ -114,7 +114,7 @@ const byte *mp_bytecode_print_str(const byte *ip); #define MP_OPCODE_VAR_UINT (2) #define MP_OPCODE_OFFSET (3) -uint mp_opcode_format(const byte *ip, size_t *opcode_size); +uint mp_opcode_format(const byte *ip, size_t *opcode_size, bool count_var_uint); #endif diff --git a/py/persistentcode.c b/py/persistentcode.c index 9cea08a2d7..d47425db92 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -133,6 +133,8 @@ typedef struct _bytecode_prelude_t { uint code_info_size; } bytecode_prelude_t; +#if MICROPY_PERSISTENT_CODE_SAVE + // ip will point to start of opcodes // ip2 will point to simple_name, source_file qstrs STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_t *prelude) { @@ -149,6 +151,8 @@ STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_ } } +#endif + #endif // MICROPY_PERSISTENT_CODE_LOAD || MICROPY_PERSISTENT_CODE_SAVE #if MICROPY_PERSISTENT_CODE_LOAD @@ -165,10 +169,14 @@ STATIC void read_bytes(mp_reader_t *reader, byte *buf, size_t len) { } } -STATIC size_t read_uint(mp_reader_t *reader) { +STATIC size_t read_uint(mp_reader_t *reader, byte **out) { size_t unum = 0; for (;;) { byte b = reader->readbyte(reader->data); + if (out != NULL) { + **out = b; + ++*out; + } unum = (unum << 7) | (b & 0x7f); if ((b & 0x80) == 0) { break; @@ -178,7 +186,7 @@ STATIC size_t read_uint(mp_reader_t *reader) { } STATIC qstr load_qstr(mp_reader_t *reader, qstr_window_t *qw) { - size_t len = read_uint(reader); + size_t len = read_uint(reader, NULL); if (len & 1) { // qstr in window return qstr_window_access(qw, len >> 1); @@ -197,7 +205,7 @@ STATIC mp_obj_t load_obj(mp_reader_t *reader) { if (obj_type == 'e') { return MP_OBJ_FROM_PTR(&mp_const_ellipsis_obj); } else { - size_t len = read_uint(reader); + size_t len = read_uint(reader, NULL); vstr_t vstr; vstr_init_len(&vstr, len); read_bytes(reader, (byte*)vstr.buf, len); @@ -212,41 +220,66 @@ STATIC mp_obj_t load_obj(mp_reader_t *reader) { } } -STATIC void load_bytecode_qstrs(mp_reader_t *reader, qstr_window_t *qw, byte *ip, byte *ip_top) { +STATIC void load_prelude(mp_reader_t *reader, byte **ip, byte **ip2, bytecode_prelude_t *prelude) { + prelude->n_state = read_uint(reader, ip); + prelude->n_exc_stack = read_uint(reader, ip); + read_bytes(reader, *ip, 4); + prelude->scope_flags = *(*ip)++; + prelude->n_pos_args = *(*ip)++; + prelude->n_kwonly_args = *(*ip)++; + prelude->n_def_pos_args = *(*ip)++; + *ip2 = *ip; + prelude->code_info_size = read_uint(reader, ip2); + read_bytes(reader, *ip2, prelude->code_info_size - (*ip2 - *ip)); + *ip += prelude->code_info_size; + while ((*(*ip)++ = read_byte(reader)) != 255) { + } +} + +STATIC void load_bytecode(mp_reader_t *reader, qstr_window_t *qw, byte *ip, byte *ip_top) { while (ip < ip_top) { + *ip = read_byte(reader); size_t sz; - uint f = mp_opcode_format(ip, &sz); + uint f = mp_opcode_format(ip, &sz, false); + ++ip; + --sz; if (f == MP_OPCODE_QSTR) { qstr qst = load_qstr(reader, qw); - ip[1] = qst; - ip[2] = qst >> 8; + *ip++ = qst; + *ip++ = qst >> 8; + sz -= 2; + } else if (f == MP_OPCODE_VAR_UINT) { + while ((*ip++ = read_byte(reader)) & 0x80) { + } } + read_bytes(reader, ip, sz); ip += sz; } } STATIC mp_raw_code_t *load_raw_code(mp_reader_t *reader, qstr_window_t *qw) { - // load bytecode - size_t bc_len = read_uint(reader); + // get bytecode size and allocate memory for it + size_t bc_len = read_uint(reader, NULL); byte *bytecode = m_new(byte, bc_len); - read_bytes(reader, bytecode, bc_len); - // extract prelude - const byte *ip = bytecode; - const byte *ip2; + // load prelude + byte *ip = bytecode; + byte *ip2; bytecode_prelude_t prelude; - extract_prelude(&ip, &ip2, &prelude); + load_prelude(reader, &ip, &ip2, &prelude); + + // load bytecode + load_bytecode(reader, qw, ip, bytecode + bc_len); // load qstrs and link global qstr ids into bytecode qstr simple_name = load_qstr(reader, qw); qstr source_file = load_qstr(reader, qw); ((byte*)ip2)[0] = simple_name; ((byte*)ip2)[1] = simple_name >> 8; ((byte*)ip2)[2] = source_file; ((byte*)ip2)[3] = source_file >> 8; - load_bytecode_qstrs(reader, qw, (byte*)ip, bytecode + bc_len); // load constant table - size_t n_obj = read_uint(reader); - size_t n_raw_code = read_uint(reader); + size_t n_obj = read_uint(reader, NULL); + size_t n_raw_code = read_uint(reader, NULL); mp_uint_t *const_table = m_new(mp_uint_t, prelude.n_pos_args + prelude.n_kwonly_args + n_obj + n_raw_code); mp_uint_t *ct = const_table; for (size_t i = 0; i < prelude.n_pos_args + prelude.n_kwonly_args; ++i) { @@ -382,14 +415,18 @@ STATIC void save_obj(mp_print_t *print, mp_obj_t o) { } } -STATIC void save_bytecode_qstrs(mp_print_t *print, qstr_window_t *qw, const byte *ip, const byte *ip_top) { +STATIC void save_bytecode(mp_print_t *print, qstr_window_t *qw, const byte *ip, const byte *ip_top) { while (ip < ip_top) { size_t sz; - uint f = mp_opcode_format(ip, &sz); + uint f = mp_opcode_format(ip, &sz, true); if (f == MP_OPCODE_QSTR) { + mp_print_bytes(print, ip, 1); qstr qst = ip[1] | (ip[2] << 8); save_qstr(print, qw, qst); + ip += 3; + sz -= 3; } + mp_print_bytes(print, ip, sz); ip += sz; } } @@ -399,20 +436,24 @@ STATIC void save_raw_code(mp_print_t *print, mp_raw_code_t *rc, qstr_window_t *q mp_raise_ValueError("can only save bytecode"); } - // save bytecode - mp_print_uint(print, rc->data.u_byte.bc_len); - mp_print_bytes(print, rc->data.u_byte.bytecode, rc->data.u_byte.bc_len); - // extract prelude const byte *ip = rc->data.u_byte.bytecode; const byte *ip2; bytecode_prelude_t prelude; extract_prelude(&ip, &ip2, &prelude); + // save prelude + size_t prelude_len = ip - rc->data.u_byte.bytecode; + const byte *ip_top = rc->data.u_byte.bytecode + rc->data.u_byte.bc_len; + mp_print_uint(print, rc->data.u_byte.bc_len); + mp_print_bytes(print, rc->data.u_byte.bytecode, prelude_len); + + // save bytecode + save_bytecode(print, qstr_window, ip, ip_top); + // save qstrs save_qstr(print, qstr_window, ip2[0] | (ip2[1] << 8)); // simple_name save_qstr(print, qstr_window, ip2[2] | (ip2[3] << 8)); // source_file - save_bytecode_qstrs(print, qstr_window, ip, rc->data.u_byte.bytecode + rc->data.u_byte.bc_len); // save constant table mp_print_uint(print, rc->data.u_byte.n_obj); diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 8e7c84f6fe..4d14f5256a 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -174,7 +174,7 @@ def make_opcode_format(): )) # this function mirrors that in py/bc.c -def mp_opcode_format(bytecode, ip, opcode_format=make_opcode_format()): +def mp_opcode_format(bytecode, ip, count_var_uint, opcode_format=make_opcode_format()): opcode = bytecode[ip] ip_start = ip f = (opcode_format[opcode >> 2] >> (2 * (opcode & 3))) & 3 @@ -194,9 +194,10 @@ def mp_opcode_format(bytecode, ip, opcode_format=make_opcode_format()): ) ip += 1 if f == MP_OPCODE_VAR_UINT: - while bytecode[ip] & 0x80 != 0: + if count_var_uint: + while bytecode[ip] & 0x80 != 0: + ip += 1 ip += 1 - ip += 1 elif f == MP_OPCODE_OFFSET: ip += 2 ip += extra_byte @@ -288,7 +289,7 @@ class RawCode: print() ip = self.ip while ip < len(self.bytecode): - f, sz = mp_opcode_format(self.bytecode, ip) + f, sz = mp_opcode_format(self.bytecode, ip, True) if f == 1: qst = self._unpack_qstr(ip + 1).qstr_id extra = '' if sz == 3 else ' 0x%02x,' % self.bytecode[ip + 3] @@ -393,10 +394,28 @@ class RawCode: print(' },') print('};') -def read_uint(f): +class BytecodeBuffer: + def __init__(self, size): + self.buf = bytearray(size) + self.idx = 0 + + def is_full(self): + return self.idx == len(self.buf) + + def append(self, b): + self.buf[self.idx] = b + self.idx += 1 + +def read_byte(f, out=None): + b = bytes_cons(f.read(1))[0] + if out is not None: + out.append(b) + return b + +def read_uint(f, out=None): i = 0 while True: - b = bytes_cons(f.read(1))[0] + b = read_byte(f, out) i = (i << 7) | (b & 0x7f) if b & 0x80 == 0: break @@ -435,31 +454,55 @@ def read_obj(f): else: assert 0 -def read_qstr_and_pack(f, bytecode, ip, qstr_win): - qst = read_qstr(f, qstr_win) - bytecode[ip] = qst & 0xff - bytecode[ip + 1] = qst >> 8 +def read_prelude(f, bytecode): + n_state = read_uint(f, bytecode) + n_exc_stack = read_uint(f, bytecode) + scope_flags = read_byte(f, bytecode) + n_pos_args = read_byte(f, bytecode) + n_kwonly_args = read_byte(f, bytecode) + n_def_pos_args = read_byte(f, bytecode) + l1 = bytecode.idx + code_info_size = read_uint(f, bytecode) + l2 = bytecode.idx + for _ in range(code_info_size - (l2 - l1)): + read_byte(f, bytecode) + while read_byte(f, bytecode) != 255: + pass + return l2, (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args, code_info_size) -def read_bytecode_qstrs(file, bytecode, ip, qstr_win): - while ip < len(bytecode): - f, sz = mp_opcode_format(bytecode, ip) - if f == 1: - read_qstr_and_pack(file, bytecode, ip + 1, qstr_win) - ip += sz +def read_qstr_and_pack(f, bytecode, qstr_win): + qst = read_qstr(f, qstr_win) + bytecode.append(qst & 0xff) + bytecode.append(qst >> 8) + +def read_bytecode(file, bytecode, qstr_win): + while not bytecode.is_full(): + op = read_byte(file, bytecode) + f, sz = mp_opcode_format(bytecode.buf, bytecode.idx - 1, False) + sz -= 1 + if f == MP_OPCODE_QSTR: + read_qstr_and_pack(file, bytecode, qstr_win) + sz -= 2 + elif f == MP_OPCODE_VAR_UINT: + while read_byte(file, bytecode) & 0x80: + pass + for _ in range(sz): + read_byte(file, bytecode) def read_raw_code(f, qstr_win): bc_len = read_uint(f) - bytecode = bytearray(f.read(bc_len)) - ip, ip2, prelude = extract_prelude(bytecode) - read_qstr_and_pack(f, bytecode, ip2, qstr_win) # simple_name - read_qstr_and_pack(f, bytecode, ip2 + 2, qstr_win) # source_file - read_bytecode_qstrs(f, bytecode, ip, qstr_win) + bytecode = BytecodeBuffer(bc_len) + name_idx, prelude = read_prelude(f, bytecode) + read_bytecode(f, bytecode, qstr_win) + bytecode.idx = name_idx # rewind to where qstrs are in prelude + read_qstr_and_pack(f, bytecode, qstr_win) # simple_name + read_qstr_and_pack(f, bytecode, qstr_win) # source_file n_obj = read_uint(f) n_raw_code = read_uint(f) qstrs = [read_qstr(f, qstr_win) for _ in range(prelude[3] + prelude[4])] objs = [read_obj(f) for _ in range(n_obj)] raw_codes = [read_raw_code(f, qstr_win) for _ in range(n_raw_code)] - return RawCode(bytecode, qstrs, objs, raw_codes) + return RawCode(bytecode.buf, qstrs, objs, raw_codes) def read_mpy(filename): with open(filename, 'rb') as f: