py: Add MICROPY_PERSISTENT_CODE so code can persist beyond the runtime.

Main changes when MICROPY_PERSISTENT_CODE is enabled are:

- qstrs are encoded as 2-byte fixed width in the bytecode
- all pointers are removed from bytecode and put in const_table (this
  includes const objects and raw code pointers)

Ultimately this option will enable persistence for not just bytecode but
also native code.
This commit is contained in:
Damien George 2015-11-02 17:27:18 +00:00
parent 713ea1800d
commit c8e9c0d89a
6 changed files with 140 additions and 21 deletions

View File

@ -56,7 +56,14 @@ struct _emit_t {
mp_uint_t bytecode_offset; mp_uint_t bytecode_offset;
mp_uint_t bytecode_size; mp_uint_t bytecode_size;
byte *code_base; // stores both byte code and code info byte *code_base; // stores both byte code and code info
#if MICROPY_PERSISTENT_CODE
uint16_t ct_cur_obj;
uint16_t ct_num_obj;
uint16_t ct_cur_raw_code;
#endif
mp_uint_t *const_table; mp_uint_t *const_table;
// Accessed as mp_uint_t, so must be aligned as such // Accessed as mp_uint_t, so must be aligned as such
byte dummy_data[DUMMY_DATA_SIZE]; byte dummy_data[DUMMY_DATA_SIZE];
}; };
@ -108,10 +115,6 @@ STATIC byte *emit_get_cur_to_write_code_info(emit_t *emit, int num_bytes_to_writ
} }
} }
STATIC void emit_align_code_info_to_machine_word(emit_t *emit) {
emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
}
STATIC void emit_write_code_info_byte(emit_t* emit, byte val) { STATIC void emit_write_code_info_byte(emit_t* emit, byte val) {
*emit_get_cur_to_write_code_info(emit, 1) = val; *emit_get_cur_to_write_code_info(emit, 1) = val;
} }
@ -121,7 +124,14 @@ STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) {
} }
STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) { STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) {
#if MICROPY_PERSISTENT_CODE
assert((qst >> 16) == 0);
byte *c = emit_get_cur_to_write_code_info(emit, 2);
c[0] = qst;
c[1] = qst >> 8;
#else
emit_write_uint(emit, emit_get_cur_to_write_code_info, qst); emit_write_uint(emit, emit_get_cur_to_write_code_info, qst);
#endif
} }
#if MICROPY_ENABLE_SOURCE_LINE #if MICROPY_ENABLE_SOURCE_LINE
@ -163,10 +173,6 @@ STATIC byte *emit_get_cur_to_write_bytecode(emit_t *emit, int num_bytes_to_write
} }
} }
STATIC void emit_align_bytecode_to_machine_word(emit_t *emit) {
emit->bytecode_offset = (emit->bytecode_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
}
STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) { STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) {
byte *c = emit_get_cur_to_write_bytecode(emit, 1); byte *c = emit_get_cur_to_write_bytecode(emit, 1);
c[0] = b1; c[0] = b1;
@ -211,18 +217,55 @@ STATIC void emit_write_bytecode_byte_uint(emit_t *emit, byte b, mp_uint_t val) {
emit_write_uint(emit, emit_get_cur_to_write_bytecode, val); emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
} }
// aligns the pointer so it is friendly to GC #if MICROPY_PERSISTENT_CODE
STATIC void emit_write_bytecode_byte_const(emit_t *emit, byte b, mp_uint_t n, mp_uint_t c) {
if (emit->pass == MP_PASS_EMIT) {
emit->const_table[n] = c;
}
emit_write_bytecode_byte_uint(emit, b, n);
}
#else
STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) { STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
// aligns the pointer so it is friendly to GC
emit_write_bytecode_byte(emit, b); emit_write_bytecode_byte(emit, b);
emit_align_bytecode_to_machine_word(emit); emit->bytecode_offset = (mp_uint_t)MP_ALIGN(emit->bytecode_offset, sizeof(mp_uint_t));
mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t)); mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t));
// Verify thar c is already uint-aligned // Verify thar c is already uint-aligned
assert(c == MP_ALIGN(c, sizeof(mp_uint_t))); assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
*c = (mp_uint_t)ptr; *c = (mp_uint_t)ptr;
} }
#endif
STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) { STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) {
#if MICROPY_PERSISTENT_CODE
assert((qst >> 16) == 0);
byte *c = emit_get_cur_to_write_bytecode(emit, 3);
c[0] = b;
c[1] = qst;
c[2] = qst >> 8;
#else
emit_write_bytecode_byte_uint(emit, b, qst); emit_write_bytecode_byte_uint(emit, b, qst);
#endif
}
STATIC void emit_write_bytecode_byte_obj(emit_t *emit, byte b, void *ptr) {
#if MICROPY_PERSISTENT_CODE
emit_write_bytecode_byte_const(emit, b,
emit->scope->num_pos_args + emit->scope->num_kwonly_args
+ emit->ct_cur_obj++, (mp_uint_t)ptr);
#else
emit_write_bytecode_byte_ptr(emit, b, ptr);
#endif
}
STATIC void emit_write_bytecode_byte_raw_code(emit_t *emit, byte b, mp_raw_code_t *rc) {
#if MICROPY_PERSISTENT_CODE
emit_write_bytecode_byte_const(emit, b,
emit->scope->num_pos_args + emit->scope->num_kwonly_args
+ emit->ct_num_obj + emit->ct_cur_raw_code++, (mp_uint_t)rc);
#else
emit_write_bytecode_byte_ptr(emit, b, rc);
#endif
} }
// unsigned labels are relative to ip following this instruction, stored as 16 bits // unsigned labels are relative to ip following this instruction, stored as 16 bits
@ -318,6 +361,11 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
} }
emit_write_bytecode_byte(emit, 255); // end of list sentinel emit_write_bytecode_byte(emit, 255); // end of list sentinel
#if MICROPY_PERSISTENT_CODE
emit->ct_cur_obj = 0;
emit->ct_cur_raw_code = 0;
#endif
if (pass == MP_PASS_EMIT) { if (pass == MP_PASS_EMIT) {
// Write argument names (needed to resolve positional args passed as // Write argument names (needed to resolve positional args passed as
// keywords). We store them as full word-sized objects for efficient access // keywords). We store them as full word-sized objects for efficient access
@ -360,16 +408,30 @@ void mp_emit_bc_end_pass(emit_t *emit) {
emit_write_code_info_byte(emit, 0); // end of line number info emit_write_code_info_byte(emit, 0); // end of line number info
#if MICROPY_PERSISTENT_CODE
assert(emit->pass <= MP_PASS_STACK_SIZE || (emit->ct_num_obj == emit->ct_cur_obj));
emit->ct_num_obj = emit->ct_cur_obj;
#endif
if (emit->pass == MP_PASS_CODE_SIZE) { if (emit->pass == MP_PASS_CODE_SIZE) {
#if !MICROPY_PERSISTENT_CODE
// so bytecode is aligned // so bytecode is aligned
emit_align_code_info_to_machine_word(emit); emit->code_info_offset = (mp_uint_t)MP_ALIGN(emit->code_info_offset, sizeof(mp_uint_t));
#endif
// calculate size of total code-info + bytecode, in bytes // calculate size of total code-info + bytecode, in bytes
emit->code_info_size = emit->code_info_offset; emit->code_info_size = emit->code_info_offset;
emit->bytecode_size = emit->bytecode_offset; emit->bytecode_size = emit->bytecode_offset;
emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size); emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size);
emit->const_table = m_new0(mp_uint_t, emit->scope->num_pos_args + emit->scope->num_kwonly_args); #if MICROPY_PERSISTENT_CODE
emit->const_table = m_new0(mp_uint_t,
emit->scope->num_pos_args + emit->scope->num_kwonly_args
+ emit->ct_cur_obj + emit->ct_cur_raw_code);
#else
emit->const_table = m_new0(mp_uint_t,
emit->scope->num_pos_args + emit->scope->num_kwonly_args);
#endif
} else if (emit->pass == MP_PASS_EMIT) { } else if (emit->pass == MP_PASS_EMIT) {
mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base, mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base,
@ -457,7 +519,7 @@ void mp_emit_bc_load_const_tok(emit_t *emit, mp_token_kind_t tok) {
case MP_TOKEN_KW_NONE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_NONE); break; case MP_TOKEN_KW_NONE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_NONE); break;
case MP_TOKEN_KW_TRUE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_TRUE); break; case MP_TOKEN_KW_TRUE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_TRUE); break;
no_other_choice: no_other_choice:
case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break; case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break;
default: assert(0); goto no_other_choice; // to help flow control analysis default: assert(0); goto no_other_choice; // to help flow control analysis
} }
} }
@ -478,7 +540,7 @@ void mp_emit_bc_load_const_str(emit_t *emit, qstr qst) {
void mp_emit_bc_load_const_obj(emit_t *emit, void *obj) { void mp_emit_bc_load_const_obj(emit_t *emit, void *obj) {
emit_bc_pre(emit, 1); emit_bc_pre(emit, 1);
emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, obj); emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, obj);
} }
void mp_emit_bc_load_null(emit_t *emit) { void mp_emit_bc_load_null(emit_t *emit) {
@ -821,22 +883,22 @@ void mp_emit_bc_unpack_ex(emit_t *emit, mp_uint_t n_left, mp_uint_t n_right) {
void mp_emit_bc_make_function(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) { void mp_emit_bc_make_function(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) {
if (n_pos_defaults == 0 && n_kw_defaults == 0) { if (n_pos_defaults == 0 && n_kw_defaults == 0) {
emit_bc_pre(emit, 1); emit_bc_pre(emit, 1);
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION, scope->raw_code); emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION, scope->raw_code);
} else { } else {
emit_bc_pre(emit, -1); emit_bc_pre(emit, -1);
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code); emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code);
} }
} }
void mp_emit_bc_make_closure(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) { void mp_emit_bc_make_closure(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) {
if (n_pos_defaults == 0 && n_kw_defaults == 0) { if (n_pos_defaults == 0 && n_kw_defaults == 0) {
emit_bc_pre(emit, -n_closed_over + 1); emit_bc_pre(emit, -n_closed_over + 1);
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE, scope->raw_code); emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE, scope->raw_code);
emit_write_bytecode_byte(emit, n_closed_over); emit_write_bytecode_byte(emit, n_closed_over);
} else { } else {
assert(n_closed_over <= 255); assert(n_closed_over <= 255);
emit_bc_pre(emit, -2 - n_closed_over + 1); emit_bc_pre(emit, -2 - n_closed_over + 1);
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code); emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code);
emit_write_bytecode_byte(emit, n_closed_over); emit_write_bytecode_byte(emit, n_closed_over);
} }
} }

View File

@ -830,10 +830,16 @@ STATIC void emit_native_end_pass(emit_t *emit) {
ASM_DATA(emit->as, 1, emit->scope->num_kwonly_args); ASM_DATA(emit->as, 1, emit->scope->num_kwonly_args);
ASM_DATA(emit->as, 1, emit->scope->num_def_pos_args); ASM_DATA(emit->as, 1, emit->scope->num_def_pos_args);
// write code info (just contains block name and source file) // write code info
#if MICROPY_PERSISTENT_CODE
ASM_DATA(emit->as, 1, 5); ASM_DATA(emit->as, 1, 5);
ASM_DATA(emit->as, 2, emit->scope->simple_name); ASM_DATA(emit->as, 1, emit->scope->simple_name);
ASM_DATA(emit->as, 2, emit->scope->source_file); ASM_DATA(emit->as, 1, emit->scope->simple_name >> 8);
ASM_DATA(emit->as, 1, emit->scope->source_file);
ASM_DATA(emit->as, 1, emit->scope->source_file >> 8);
#else
ASM_DATA(emit->as, 1, 1);
#endif
// bytecode prelude: initialise closed over variables // bytecode prelude: initialise closed over variables
for (int i = 0; i < emit->scope->id_info_len; i++) { for (int i = 0; i < emit->scope->id_info_len; i++) {

View File

@ -192,6 +192,11 @@
/*****************************************************************************/ /*****************************************************************************/
/* Micro Python emitters */ /* Micro Python emitters */
// Whether generated code can persist independently of the VM/runtime instance
#ifndef MICROPY_PERSISTENT_CODE
#define MICROPY_PERSISTENT_CODE (0)
#endif
// Whether to emit x64 native code // Whether to emit x64 native code
#ifndef MICROPY_EMIT_X64 #ifndef MICROPY_EMIT_X64
#define MICROPY_EMIT_X64 (0) #define MICROPY_EMIT_X64 (0)

View File

@ -106,7 +106,11 @@ const mp_obj_type_t mp_type_fun_builtin = {
qstr mp_obj_code_get_name(const byte *code_info) { qstr mp_obj_code_get_name(const byte *code_info) {
mp_decode_uint(&code_info); // skip code_info_size entry mp_decode_uint(&code_info); // skip code_info_size entry
#if MICROPY_PERSISTENT_CODE
return code_info[0] | (code_info[1] << 8);
#else
return mp_decode_uint(&code_info); return mp_decode_uint(&code_info);
#endif
} }
#if MICROPY_EMIT_NATIVE #if MICROPY_EMIT_NATIVE

View File

@ -40,6 +40,18 @@
} }
#define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0) #define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0)
#define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0) #define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0)
#if MICROPY_PERSISTENT_CODE
#define DECODE_QSTR \
qst = ip[0] | ip[1] << 8; \
ip += 2;
#define DECODE_PTR \
DECODE_UINT; \
unum = mp_showbc_const_table[unum]
#else
#define DECODE_QSTR { \ #define DECODE_QSTR { \
qst = 0; \ qst = 0; \
do { \ do { \
@ -52,10 +64,14 @@
ip += sizeof(mp_uint_t); \ ip += sizeof(mp_uint_t); \
} while (0) } while (0)
#endif
const byte *mp_showbc_code_start; const byte *mp_showbc_code_start;
const mp_uint_t *mp_showbc_const_table;
void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) { void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) {
mp_showbc_code_start = ip; mp_showbc_code_start = ip;
mp_showbc_const_table = const_table;
// get bytecode parameters // get bytecode parameters
mp_uint_t n_state = mp_decode_uint(&ip); mp_uint_t n_state = mp_decode_uint(&ip);
@ -69,8 +85,13 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
mp_uint_t code_info_size = mp_decode_uint(&code_info); mp_uint_t code_info_size = mp_decode_uint(&code_info);
ip += code_info_size; ip += code_info_size;
#if MICROPY_PERSISTENT_CODE
qstr block_name = code_info[0] | (code_info[1] << 8);
qstr source_file = code_info[2] | (code_info[3] << 8);
#else
qstr block_name = mp_decode_uint(&code_info); qstr block_name = mp_decode_uint(&code_info);
qstr source_file = mp_decode_uint(&code_info); qstr source_file = mp_decode_uint(&code_info);
#endif
printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n", printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n",
qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len); qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len);

21
py/vm.c
View File

@ -65,6 +65,18 @@ typedef enum {
} while ((*ip++ & 0x80) != 0) } while ((*ip++ & 0x80) != 0)
#define DECODE_ULABEL mp_uint_t ulab = (ip[0] | (ip[1] << 8)); ip += 2 #define DECODE_ULABEL mp_uint_t ulab = (ip[0] | (ip[1] << 8)); ip += 2
#define DECODE_SLABEL mp_uint_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2 #define DECODE_SLABEL mp_uint_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2
#if MICROPY_PERSISTENT_CODE
#define DECODE_QSTR \
qstr qst = ip[0] | ip[1] << 8; \
ip += 2;
#define DECODE_PTR \
DECODE_UINT; \
void *ptr = (void*)code_state->const_table[unum]
#else
#define DECODE_QSTR qstr qst = 0; \ #define DECODE_QSTR qstr qst = 0; \
do { \ do { \
qst = (qst << 7) + (*ip & 0x7f); \ qst = (qst << 7) + (*ip & 0x7f); \
@ -73,6 +85,9 @@ typedef enum {
ip = (byte*)(((mp_uint_t)ip + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1))); /* align ip */ \ ip = (byte*)(((mp_uint_t)ip + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1))); /* align ip */ \
void *ptr = (void*)*(mp_uint_t*)ip; \ void *ptr = (void*)*(mp_uint_t*)ip; \
ip += sizeof(mp_uint_t) ip += sizeof(mp_uint_t)
#endif
#define PUSH(val) *++sp = (val) #define PUSH(val) *++sp = (val)
#define POP() (*sp--) #define POP() (*sp--)
#define TOP() (*sp) #define TOP() (*sp)
@ -1280,8 +1295,14 @@ unwind_loop:
if (mp_obj_is_exception_instance(nlr.ret_val) && nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) { if (mp_obj_is_exception_instance(nlr.ret_val) && nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) {
const byte *ip = code_state->code_info; const byte *ip = code_state->code_info;
mp_uint_t code_info_size = mp_decode_uint(&ip); mp_uint_t code_info_size = mp_decode_uint(&ip);
#if MICROPY_PERSISTENT_CODE
qstr block_name = ip[0] | (ip[1] << 8);
qstr source_file = ip[2] | (ip[3] << 8);
ip += 4;
#else
qstr block_name = mp_decode_uint(&ip); qstr block_name = mp_decode_uint(&ip);
qstr source_file = mp_decode_uint(&ip); qstr source_file = mp_decode_uint(&ip);
#endif
mp_uint_t bc = code_state->ip - code_state->code_info - code_info_size; mp_uint_t bc = code_state->ip - code_state->code_info - code_info_size;
mp_uint_t source_line = 1; mp_uint_t source_line = 1;
mp_uint_t c; mp_uint_t c;