py: Reorganise bytecode layout so it's more structured, easier to edit.

This commit is contained in:
Damien George 2015-03-18 17:47:47 +00:00
parent f882d53fcd
commit 9b7f583b0c
7 changed files with 130 additions and 123 deletions

34
py/bc.c
View File

@ -84,10 +84,8 @@ STATIC void dump_args(const mp_obj_t *a, mp_uint_t sz) {
// On entry code_state should be allocated somewhere (stack/heap) and
// contain the following valid entries:
// - code_state->code_info should be the offset in bytes from the start of
// the bytecode chunk to the start of the code-info within the bytecode
// - code_state->ip should contain the offset in bytes from the start of
// the bytecode chunk to the start of the prelude within the bytecode
// the bytecode chunk to just after n_state and n_exc_stack
// - code_state->n_state should be set to the state size (locals plus stack)
void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
// This function is pretty complicated. It's main aim is to be efficient in speed and RAM
@ -95,10 +93,16 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t
mp_obj_fun_bc_t *self = self_in;
mp_uint_t n_state = code_state->n_state;
// ip comes in as an offset into bytecode, so turn it into a true pointer
code_state->ip = self->bytecode + (mp_uint_t)code_state->ip;
#if MICROPY_STACKLESS
code_state->prev = NULL;
#endif
code_state->code_info = self->bytecode + (mp_uint_t)code_state->code_info;
// align ip
code_state->ip = MP_ALIGN(code_state->ip, sizeof(mp_uint_t));
code_state->sp = &code_state->state[0] - 1;
code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1;
@ -156,13 +160,8 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t
*var_pos_kw_args = dict;
}
// get pointer to arg_names array at start of bytecode prelude
const mp_obj_t *arg_names;
{
const byte *code_info = code_state->code_info;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
arg_names = (const mp_obj_t*)(code_state->code_info + code_info_size);
}
// get pointer to arg_names array
const mp_obj_t *arg_names = (const mp_obj_t*)code_state->ip;
for (mp_uint_t i = 0; i < n_kw; i++) {
mp_obj_t wanted_arg_name = kwargs[2 * i];
@ -235,8 +234,19 @@ continue2:;
}
}
// get the ip and skip argument names
const byte *ip = code_state->ip;
ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_uint_t);
// store pointer to code_info and jump over it
{
code_state->code_info = ip;
const byte *ip2 = ip;
mp_uint_t code_info_size = mp_decode_uint(&ip2);
ip += code_info_size;
}
// bytecode prelude: initialise closed over variables
const byte *ip = self->bytecode + (mp_uint_t)code_state->ip;
mp_uint_t local_num;
while ((local_num = *ip++) != 255) {
code_state->state[n_state - 1 - local_num] =

24
py/bc.h
View File

@ -29,6 +29,30 @@
#include "py/runtime.h"
#include "py/obj.h"
// bytecode layout:
//
// n_state : var uint
// n_exc_stack : var uint
//
// <word alignment padding>
//
// argname0 : obj (qstr)
// ... : obj (qstr)
// argnameN : obj (qstr) N = num_pos_args + num_kwonly_args
//
// code_info_size : var uint | code_info_size counts bytes in this chunk
// simple_name : var qstr |
// source_file : var qstr |
// <line number info> |
// <word alignment padding> |
//
// num_cells : byte number of locals that are cells
// local_num0 : byte
// ... : byte
// local_numN : byte N = num_cells
//
// <bytecode>
// Exception stack entry
typedef struct _mp_exc_stack {
const byte *handler;

View File

@ -111,7 +111,11 @@ STATIC void emit_align_code_info_to_machine_word(emit_t *emit) {
emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
}
STATIC void emit_write_code_info_uint(emit_t *emit, mp_uint_t val) {
STATIC void emit_write_code_info_byte(emit_t* emit, byte val) {
*emit_get_cur_to_write_code_info(emit, 1) = val;
}
STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) {
emit_write_uint(emit, emit_get_cur_to_write_code_info, val);
}
@ -119,6 +123,13 @@ STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) {
emit_write_uint(emit, emit_get_cur_to_write_code_info, qst);
}
STATIC void emit_write_code_info_prealigned_ptr(emit_t* emit, void *ptr) {
mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_code_info(emit, sizeof(mp_uint_t));
// Verify thar c is already uint-aligned
assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
*c = (mp_uint_t)ptr;
}
#if MICROPY_ENABLE_SOURCE_LINE
STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_skip, mp_uint_t lines_to_skip) {
assert(bytes_to_skip > 0 || lines_to_skip > 0);
@ -167,11 +178,7 @@ STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) {
c[0] = b1;
}
STATIC void emit_write_bytecode_uint(emit_t *emit, mp_uint_t val) {
emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
}
STATIC void emit_write_bytecode_byte_byte(emit_t *emit, byte b1, byte b2) {
STATIC void emit_write_bytecode_byte_byte(emit_t* emit, byte b1, byte b2) {
assert((b2 & (~0xff)) == 0);
byte *c = emit_get_cur_to_write_bytecode(emit, 2);
c[0] = b1;
@ -210,13 +217,6 @@ STATIC void emit_write_bytecode_byte_uint(emit_t *emit, byte b, mp_uint_t val) {
emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
}
STATIC void emit_write_bytecode_prealigned_ptr(emit_t *emit, void *ptr) {
mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t));
// Verify thar c is already uint-aligned
assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
*c = (mp_uint_t)ptr;
}
// aligns the pointer so it is friendly to GC
STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
emit_write_bytecode_byte(emit, b);
@ -227,15 +227,7 @@ STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
*c = (mp_uint_t)ptr;
}
/* currently unused
STATIC void emit_write_bytecode_byte_uint_uint(emit_t *emit, byte b, mp_uint_t num1, mp_uint_t num2) {
emit_write_bytecode_byte(emit, b);
emit_write_bytecode_byte_uint(emit, num1);
emit_write_bytecode_byte_uint(emit, num2);
}
*/
STATIC void emit_write_bytecode_byte_qstr(emit_t *emit, byte b, qstr qst) {
STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) {
emit_write_bytecode_byte_uint(emit, b, qst);
}
@ -289,19 +281,26 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
emit->bytecode_offset = 0;
emit->code_info_offset = 0;
// Write code info size as compressed uint. If we are not in the final pass
// then space for this uint is reserved in emit_bc_end_pass.
if (pass == MP_PASS_EMIT) {
emit_write_code_info_uint(emit, emit->code_info_size);
// Write local state size and exception stack size.
{
mp_uint_t n_state = scope->num_locals + scope->stack_size;
if (n_state == 0) {
// Need at least 1 entry in the state, in the case an exception is
// propagated through this function, the exception is returned in
// the highest slot in the state (fastn[0], see vm.c).
n_state = 1;
}
emit_write_code_info_uint(emit, n_state);
emit_write_code_info_uint(emit, scope->exc_stack_size);
}
// write the name and source file of this function
emit_write_code_info_qstr(emit, scope->simple_name);
emit_write_code_info_qstr(emit, scope->source_file);
// Align code-info so that following pointers are aligned on a machine word.
emit_align_code_info_to_machine_word(emit);
// bytecode prelude: argument names (needed to resolve positional args passed as keywords)
// we store them as full word-sized objects for efficient access in mp_setup_code_state
// this is the start of the prelude and is guaranteed to be aligned on a word boundary
// Write argument names (needed to resolve positional args passed as
// keywords). We store them as full word-sized objects for efficient access
// in mp_setup_code_state this is the start of the prelude and is guaranteed
// to be aligned on a word boundary.
{
// For a given argument position (indexed by i) we need to find the
// corresponding id_info which is a parameter, as it has the correct
@ -322,23 +321,23 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
break;
}
}
emit_write_bytecode_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
emit_write_code_info_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
}
}
// bytecode prelude: local state size and exception stack size
{
mp_uint_t n_state = scope->num_locals + scope->stack_size;
if (n_state == 0) {
// Need at least 1 entry in the state, in the case an exception is
// propagated through this function, the exception is returned in
// the highest slot in the state (fastn[0], see vm.c).
n_state = 1;
}
emit_write_bytecode_uint(emit, n_state);
emit_write_bytecode_uint(emit, scope->exc_stack_size);
// Write size of the rest of the code info. We don't know how big this
// variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes
// for it and hope that is enough! TODO assert this or something.
if (pass == MP_PASS_EMIT) {
emit_write_code_info_uint(emit, emit->code_info_size - emit->code_info_offset);
} else {
emit_get_cur_to_write_code_info(emit, 2);
}
// Write the name and source file of this function.
emit_write_code_info_qstr(emit, scope->simple_name);
emit_write_code_info_qstr(emit, scope->source_file);
// bytecode prelude: initialise closed over variables
for (int i = 0; i < scope->id_info_len; i++) {
id_info_t *id = &scope->id_info[i];
@ -360,25 +359,10 @@ void mp_emit_bc_end_pass(emit_t *emit) {
mp_printf(&mp_plat_print, "ERROR: stack size not back to zero; got %d\n", emit->stack_size);
}
*emit_get_cur_to_write_code_info(emit, 1) = 0; // end of line number info
emit_write_code_info_byte(emit, 0); // end of line number info
if (emit->pass == MP_PASS_CODE_SIZE) {
// Need to make sure we have enough room in the code-info block to write
// the size of the code-info block. Since the size is written as a
// compressed uint, we don't know its size until we write it! Thus, we
// take the biggest possible value it could be and write that here.
// Then there will be enough room to write the value, and any leftover
// space will be absorbed in the alignment at the end of the code-info
// block.
mp_uint_t max_code_info_size =
emit->code_info_offset // current code-info size
+ BYTES_FOR_INT // maximum space for compressed uint
+ BYTES_PER_WORD - 1; // maximum space for alignment padding
emit_write_code_info_uint(emit, max_code_info_size);
// Align code-info so that following bytecode is aligned on a machine word.
// We don't need to write anything here, it's just dead space between the
// code-info block and the bytecode block that follows it.
// so bytecode is aligned
emit_align_code_info_to_machine_word(emit);
// calculate size of total code-info + bytecode, in bytes

View File

@ -566,8 +566,6 @@ struct _emit_t {
stack_info_t *stack_info;
vtype_kind_t saved_stack_vtype;
int code_info_size;
int code_info_offset;
int prelude_offset;
int n_state;
int stack_start;
@ -774,10 +772,6 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1);
#endif
// set code_state.code_info (offset from start of this function to code_info data)
// XXX this encoding may change size
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->code_info_offset, offsetof(mp_code_state, code_info) / sizeof(mp_uint_t), REG_ARG_1);
// set code_state.ip (offset from start of this function to prelude info)
// XXX this encoding may change size
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state, ip) / sizeof(mp_uint_t), REG_ARG_1);
@ -829,11 +823,10 @@ STATIC void emit_native_end_pass(emit_t *emit) {
}
if (!emit->do_viper_types) {
// write dummy code info (for mp_setup_code_state to parse) and arg names
emit->code_info_offset = ASM_GET_CODE_POS(emit->as);
ASM_DATA(emit->as, 1, emit->code_info_size);
emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
ASM_ALIGN(emit->as, ASM_WORD_SIZE);
emit->code_info_size = ASM_GET_CODE_POS(emit->as) - emit->code_info_offset;
// write argument names as qstr objects
// see comment in corresponding part of emitbc.c about the logic here
for (int i = 0; i < emit->scope->num_pos_args + emit->scope->num_kwonly_args; i++) {
qstr qst = MP_QSTR__star_;
@ -847,8 +840,10 @@ STATIC void emit_native_end_pass(emit_t *emit) {
ASM_DATA(emit->as, ASM_WORD_SIZE, (mp_uint_t)MP_OBJ_NEW_QSTR(qst));
}
// write dummy code info (for mp_setup_code_state to parse)
ASM_DATA(emit->as, 1, 1);
// bytecode prelude: initialise closed over variables
emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
for (int i = 0; i < emit->scope->id_info_len; i++) {
id_info_t *id = &emit->scope->id_info[i];
if (id->kind == ID_INFO_KIND_CELL) {

View File

@ -121,8 +121,13 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) {
return MP_QSTR_;
}
#endif
const byte *code_info = fun->bytecode;
return mp_obj_code_get_name(code_info);
const byte *bc = fun->bytecode;
mp_decode_uint(&bc); // skip n_state
mp_decode_uint(&bc); // skip n_exc_stack
bc = MP_ALIGN(bc, sizeof(mp_uint_t)); // align
bc += (fun->n_pos_args + fun->n_kwonly_args) * sizeof(mp_uint_t); // skip arg names
return mp_obj_code_get_name(bc);
}
#if MICROPY_CPYTHON_COMPAT
@ -158,13 +163,8 @@ mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, mp_uint_t n_arg
MP_STACK_CHECK();
mp_obj_fun_bc_t *self = self_in;
// skip code-info block
const byte *code_info = self->bytecode;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
const byte *ip = self->bytecode + code_info_size;
// bytecode prelude: skip arg names
ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
// get start of bytecode
const byte *ip = self->bytecode;
// bytecode prelude: state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);
@ -178,9 +178,8 @@ mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, mp_uint_t n_arg
return NULL;
}
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
code_state->n_state = n_state;
code_state->code_info = 0; // offset to code-info
code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
// execute the byte code with the correct globals context
@ -202,13 +201,8 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw,
mp_obj_fun_bc_t *self = self_in;
DEBUG_printf("Func n_def_args: %d\n", self->n_def_args);
// skip code-info block
const byte *code_info = self->bytecode;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
const byte *ip = self->bytecode + code_info_size;
// bytecode prelude: skip arg names
ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
// get start of bytecode
const byte *ip = self->bytecode;
// bytecode prelude: state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);
@ -229,9 +223,8 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw,
state_size = 0; // indicate that we allocated using alloca
}
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
code_state->n_state = n_state;
code_state->code_info = 0; // offset to code-info
code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
// execute the byte code with the correct globals context

View File

@ -54,13 +54,8 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw
mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t*)self->fun;
assert(MP_OBJ_IS_TYPE(self_fun, &mp_type_fun_bc));
// skip code-info block
const byte *code_info = self_fun->bytecode;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
const byte *ip = self_fun->bytecode + code_info_size;
// bytecode prelude: skip arg names
ip += (self_fun->n_pos_args + self_fun->n_kwonly_args) * sizeof(mp_obj_t);
// get start of bytecode
const byte *ip = self_fun->bytecode;
// bytecode prelude: get state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);

View File

@ -57,7 +57,16 @@ const byte *mp_showbc_code_start;
void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip, mp_uint_t len) {
mp_showbc_code_start = ip;
// get code info size
// get state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);
mp_uint_t n_exc_stack = mp_decode_uint(&ip);
ip = MP_ALIGN(ip, sizeof(mp_uint_t));
// get and skip arg names
const mp_obj_t *arg_names = (const mp_obj_t*)ip;
ip += n_total_args * sizeof(mp_uint_t);
const byte *code_info = ip;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
ip += code_info_size;
@ -65,7 +74,7 @@ void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip
qstr block_name = mp_decode_uint(&code_info);
qstr source_file = mp_decode_uint(&code_info);
printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n",
qstr_str(source_file), qstr_str(block_name), descr, code_info, len);
qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len);
// raw bytecode dump
printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n", code_info_size, len - code_info_size);
@ -80,18 +89,15 @@ void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip
// bytecode prelude: arg names (as qstr objects)
printf("arg names:");
for (mp_uint_t i = 0; i < n_total_args; i++) {
printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(*(mp_obj_t*)ip)));
ip += sizeof(mp_obj_t);
printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(arg_names[i])));
}
printf("\n");
// bytecode prelude: state size and exception stack size; 16 bit uints
{
uint n_state = mp_decode_uint(&ip);
uint n_exc_stack = mp_decode_uint(&ip);
printf("(N_STATE %u)\n", n_state);
printf("(N_EXC_STACK %u)\n", n_exc_stack);
}
printf("(N_STATE " UINT_FMT ")\n", n_state);
printf("(N_EXC_STACK " UINT_FMT ")\n", n_exc_stack);
// for printing line number info
const byte *bytecode_start = ip;
// bytecode prelude: initialise closed over variables
{
@ -104,7 +110,7 @@ void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip
// print out line number info
{
mp_int_t bc = (mp_showbc_code_start + code_info_size) - ip; // start counting from the prelude
mp_int_t bc = bytecode_start - ip;
mp_uint_t source_line = 1;
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
for (const byte* ci = code_info; *ci;) {