From a506335524a69fbccad245d486f5f641a0cf8919 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 17 Jun 2022 23:14:32 +1000 Subject: [PATCH] py/emit: Suppress unreachable bytecode/native code that follows jump. This new logic tracks when an unconditional jump/raise occurs in the emitted code stream (bytecode or native machine code) and suppresses all subsequent code, until a label is assigned. This eliminates a lot of cases of dead code, with relatively simple logic. This commit combined with the previous one (that removed the existing dead-code finding logic) has the following code size change: bare-arm: -16 -0.028% minimal x86: -60 -0.036% unix x64: -368 -0.070% unix nanbox: -80 -0.017% stm32: -204 -0.052% PYBV10 cc3200: +0 +0.000% esp8266: -232 -0.033% GENERIC esp32: -224 -0.015% GENERIC[incl -40(data)] mimxrt: -192 -0.054% TEENSY40 renesas-ra: -200 -0.032% RA6M2_EK nrf: +28 +0.015% pca10040 rp2: -256 -0.050% PICO samd: -12 -0.009% ADAFRUIT_ITSYBITSY_M4_EXPRESS Signed-off-by: Damien George --- py/asmbase.c | 10 +++ py/asmbase.h | 11 ++- py/emitbc.c | 22 +++++ py/emitnative.c | 2 + tests/cmdline/cmd_showbc.py | 8 +- tests/cmdline/cmd_showbc.py.exp | 126 ++++++++++++++-------------- tests/cmdline/cmd_showbc_opt.py | 33 ++++++++ tests/cmdline/cmd_showbc_opt.py.exp | 122 +++++++++++++++++++++++++++ 8 files changed, 268 insertions(+), 66 deletions(-) create mode 100644 tests/cmdline/cmd_showbc_opt.py create mode 100644 tests/cmdline/cmd_showbc_opt.py.exp diff --git a/py/asmbase.c b/py/asmbase.c index 4a3fd089cb..da4273506a 100644 --- a/py/asmbase.c +++ b/py/asmbase.c @@ -55,15 +55,20 @@ void mp_asm_base_start_pass(mp_asm_base_t *as, int pass) { assert(as->code_base != NULL); } as->pass = pass; + as->suppress = false; as->code_offset = 0; } // all functions must go through this one to emit bytes // if as->pass < MP_ASM_PASS_EMIT, then this function just counts the number // of bytes needed and returns NULL, and callers should not store any data +// It also returns NULL if generated code should be suppressed at this point. uint8_t *mp_asm_base_get_cur_to_write_bytes(void *as_in, size_t num_bytes_to_write) { mp_asm_base_t *as = as_in; uint8_t *c = NULL; + if (as->suppress) { + return c; + } if (as->pass == MP_ASM_PASS_EMIT) { assert(as->code_offset + num_bytes_to_write <= as->code_size); c = as->code_base + as->code_offset; @@ -74,6 +79,11 @@ uint8_t *mp_asm_base_get_cur_to_write_bytes(void *as_in, size_t num_bytes_to_wri void mp_asm_base_label_assign(mp_asm_base_t *as, size_t label) { assert(label < as->max_num_labels); + + // Assiging a label ends any dead-code region, and all following machine + // code should be emitted (until another mp_asm_base_suppress_code() call). + as->suppress = false; + if (as->pass < MP_ASM_PASS_EMIT) { // assign label offset assert(as->label_offsets[label] == (size_t)-1); diff --git a/py/asmbase.h b/py/asmbase.h index 960be7685f..352d2f54cc 100644 --- a/py/asmbase.h +++ b/py/asmbase.h @@ -33,7 +33,12 @@ #define MP_ASM_PASS_EMIT (2) typedef struct _mp_asm_base_t { - int pass; + uint8_t pass; + + // Set to true using mp_asm_base_suppress_code() if the code generator + // should suppress emitted code due to it being dead code. + bool suppress; + size_t code_offset; size_t code_size; uint8_t *code_base; @@ -50,6 +55,10 @@ void mp_asm_base_label_assign(mp_asm_base_t *as, size_t label); void mp_asm_base_align(mp_asm_base_t *as, unsigned int align); void mp_asm_base_data(mp_asm_base_t *as, unsigned int bytesize, uintptr_t val); +static inline void mp_asm_base_suppress_code(mp_asm_base_t *as) { + as->suppress = true; +} + static inline size_t mp_asm_base_get_code_pos(mp_asm_base_t *as) { return as->code_offset; } diff --git a/py/emitbc.c b/py/emitbc.c index 9c0d78d790..70a4d8b12e 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -48,6 +48,11 @@ struct _emit_t { pass_kind_t pass : 8; + // Set to true if the code generator should suppress emitted code due to it + // being dead code. This can happen when opcodes immediately follow an + // unconditional flow control (eg jump or raise). + bool suppress; + int stack_size; mp_emit_common_t *emit_common; @@ -140,6 +145,9 @@ STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_sk // all functions must go through this one to emit byte code STATIC uint8_t *emit_get_cur_to_write_bytecode(void *emit_in, size_t num_bytes_to_write) { emit_t *emit = emit_in; + if (emit->suppress) { + return emit->dummy_data; + } if (emit->pass < MP_PASS_EMIT) { emit->bytecode_offset += num_bytes_to_write; return emit->dummy_data; @@ -222,6 +230,10 @@ STATIC void emit_write_bytecode_byte_child(emit_t *emit, int stack_adj, byte b, STATIC void emit_write_bytecode_byte_label(emit_t *emit, int stack_adj, byte b1, mp_uint_t label) { mp_emit_bc_adjust_stack_size(emit, stack_adj); + if (emit->suppress) { + return; + } + // Determine if the jump offset is signed or unsigned, based on the opcode. const bool is_signed = b1 <= MP_BC_POP_JUMP_IF_FALSE; @@ -271,6 +283,7 @@ STATIC void emit_write_bytecode_byte_label(emit_t *emit, int stack_adj, byte b1, void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit->pass = pass; emit->stack_size = 0; + emit->suppress = false; emit->scope = scope; emit->last_source_line_offset = 0; emit->last_source_line = 1; @@ -426,6 +439,10 @@ void mp_emit_bc_set_source_line(emit_t *emit, mp_uint_t source_line) { } void mp_emit_bc_label_assign(emit_t *emit, mp_uint_t l) { + // Assiging a label ends any dead-code region, and all following opcodes + // should be emitted (until another unconditional flow control). + emit->suppress = false; + mp_emit_bc_adjust_stack_size(emit, 0); if (emit->pass == MP_PASS_SCOPE) { return; @@ -589,6 +606,7 @@ void mp_emit_bc_rot_three(emit_t *emit) { void mp_emit_bc_jump(emit_t *emit, mp_uint_t label) { emit_write_bytecode_byte_label(emit, 0, MP_BC_JUMP, label); + emit->suppress = true; } void mp_emit_bc_pop_jump_if(emit_t *emit, bool cond, mp_uint_t label) { @@ -622,6 +640,7 @@ void mp_emit_bc_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_dept emit_write_bytecode_byte_label(emit, 0, MP_BC_UNWIND_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR); emit_write_bytecode_raw_byte(emit, ((label & MP_EMIT_BREAK_FROM_FOR) ? 0x80 : 0) | except_depth); } + emit->suppress = true; } void mp_emit_bc_setup_block(emit_t *emit, mp_uint_t label, int kind) { @@ -663,6 +682,7 @@ void mp_emit_bc_for_iter_end(emit_t *emit) { void mp_emit_bc_pop_except_jump(emit_t *emit, mp_uint_t label, bool within_exc_handler) { (void)within_exc_handler; emit_write_bytecode_byte_label(emit, 0, MP_BC_POP_EXCEPT_JUMP, label); + emit->suppress = true; } void mp_emit_bc_unary_op(emit_t *emit, mp_unary_op_t op) { @@ -766,6 +786,7 @@ void mp_emit_bc_call_method(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_ke void mp_emit_bc_return_value(emit_t *emit) { emit_write_bytecode_byte(emit, -1, MP_BC_RETURN_VALUE); + emit->suppress = true; } void mp_emit_bc_raise_varargs(emit_t *emit, mp_uint_t n_args) { @@ -773,6 +794,7 @@ void mp_emit_bc_raise_varargs(emit_t *emit, mp_uint_t n_args) { MP_STATIC_ASSERT(MP_BC_RAISE_LAST + 2 == MP_BC_RAISE_FROM); assert(n_args <= 2); emit_write_bytecode_byte(emit, -n_args, MP_BC_RAISE_LAST + n_args); + emit->suppress = true; } void mp_emit_bc_yield(emit_t *emit, int kind) { diff --git a/py/emitnative.c b/py/emitnative.c index 5b695a22a7..957e713fdd 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -1966,6 +1966,7 @@ STATIC void emit_native_jump(emit_t *emit, mp_uint_t label) { need_stack_settled(emit); ASM_JUMP(emit->as, label); emit_post(emit); + mp_asm_base_suppress_code(&emit->as->base); } STATIC void emit_native_jump_helper(emit_t *emit, bool cond, mp_uint_t label, bool pop) { @@ -2810,6 +2811,7 @@ STATIC void emit_native_raise_varargs(emit_t *emit, mp_uint_t n_args) { } // TODO probably make this 1 call to the runtime (which could even call convert, native_raise(obj, type)) emit_call(emit, MP_F_NATIVE_RAISE); + mp_asm_base_suppress_code(&emit->as->base); } STATIC void emit_native_yield(emit_t *emit, int kind) { diff --git a/tests/cmdline/cmd_showbc.py b/tests/cmdline/cmd_showbc.py index e5874f990a..4a2e6500aa 100644 --- a/tests/cmdline/cmd_showbc.py +++ b/tests/cmdline/cmd_showbc.py @@ -119,12 +119,12 @@ def f(): #from sys import * # tested at module scope # raise - raise - raise 1 + if a: raise + if a: raise 1 # return - return - return 1 + if a: return + if a: return 1 # function with lots of locals def f(): diff --git a/tests/cmdline/cmd_showbc.py.exp b/tests/cmdline/cmd_showbc.py.exp index 2eeb8eadd0..db06de9237 100644 --- a/tests/cmdline/cmd_showbc.py.exp +++ b/tests/cmdline/cmd_showbc.py.exp @@ -47,8 +47,8 @@ arg names: 42 IMPORT_STAR 43 LOAD_CONST_NONE 44 RETURN_VALUE -File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 45\[46\] bytes) -Raw bytecode (code_info_size=8\[46\], bytecode_size=372): +File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 46\[68\] bytes) +Raw bytecode (code_info_size=8\[46\], bytecode_size=382): a8 12 9\[bf\] 03 05 60 60 26 22 24 64 22 24 25 25 24 26 23 63 22 22 25 23 23 2f 6c 25 65 25 25 69 68 26 65 27 6a 62 20 23 62 2a 29 69 24 25 28 67 26 @@ -126,19 +126,18 @@ arg names: bc=313 line=100 bc=315 line=101 ######## - bc=321 line=103 - bc=327 line=106 - bc=331 line=107 - bc=337 line=110 - bc=340 line=111 - bc=346 line=114 - bc=346 line=117 - bc=351 line=118 - bc=363 line=121 - bc=363 line=122 - bc=364 line=123 - bc=366 line=126 - bc=368 line=127 + bc=325 line=106 + bc=329 line=107 + bc=335 line=110 + bc=338 line=111 + bc=344 line=114 + bc=344 line=117 + bc=349 line=118 + bc=361 line=121 + bc=361 line=122 + bc=365 line=123 + bc=370 line=126 + bc=375 line=127 00 LOAD_CONST_NONE 01 LOAD_CONST_FALSE 02 BINARY_OP 27 __add__ @@ -363,48 +362,55 @@ arg names: 308 LOAD_FAST 1 309 POP_TOP 310 END_FINALLY -311 JUMP 324 -313 SETUP_EXCEPT 320 -315 UNWIND_JUMP 327 1 -318 POP_EXCEPT_JUMP 324 -320 POP_TOP -321 POP_EXCEPT_JUMP 324 -323 END_FINALLY -324 LOAD_FAST 0 -325 POP_JUMP_IF_TRUE 313 -327 LOAD_FAST 0 -328 SETUP_WITH 335 -330 POP_TOP -331 LOAD_DEREF 14 -333 POP_TOP -334 LOAD_CONST_NONE -335 WITH_CLEANUP -336 END_FINALLY -337 LOAD_CONST_SMALL_INT 1 -338 STORE_DEREF 16 -340 LOAD_FAST_N 16 -342 MAKE_CLOSURE \.\+ 1 -345 STORE_FAST 13 -346 LOAD_CONST_SMALL_INT 0 -347 LOAD_CONST_NONE -348 IMPORT_NAME 'a' -350 STORE_FAST 0 -351 LOAD_CONST_SMALL_INT 0 -352 LOAD_CONST_STRING 'b' -354 BUILD_TUPLE 1 -356 IMPORT_NAME 'a' -358 IMPORT_FROM 'b' -360 STORE_DEREF 14 -362 POP_TOP -363 RAISE_LAST -364 LOAD_CONST_SMALL_INT 1 -365 RAISE_OBJ -366 LOAD_CONST_NONE -367 RETURN_VALUE +311 JUMP 322 +313 SETUP_EXCEPT 318 +315 UNWIND_JUMP 325 1 +318 POP_TOP +319 POP_EXCEPT_JUMP 322 +321 END_FINALLY +322 LOAD_FAST 0 +323 POP_JUMP_IF_TRUE 313 +325 LOAD_FAST 0 +326 SETUP_WITH 333 +328 POP_TOP +329 LOAD_DEREF 14 +331 POP_TOP +332 LOAD_CONST_NONE +333 WITH_CLEANUP +334 END_FINALLY +335 LOAD_CONST_SMALL_INT 1 +336 STORE_DEREF 16 +338 LOAD_FAST_N 16 +340 MAKE_CLOSURE \.\+ 1 +343 STORE_FAST 13 +344 LOAD_CONST_SMALL_INT 0 +345 LOAD_CONST_NONE +346 IMPORT_NAME 'a' +348 STORE_FAST 0 +349 LOAD_CONST_SMALL_INT 0 +350 LOAD_CONST_STRING 'b' +352 BUILD_TUPLE 1 +354 IMPORT_NAME 'a' +356 IMPORT_FROM 'b' +358 STORE_DEREF 14 +360 POP_TOP +361 LOAD_FAST 0 +362 POP_JUMP_IF_FALSE 365 +364 RAISE_LAST +365 LOAD_FAST 0 +366 POP_JUMP_IF_FALSE 370 368 LOAD_CONST_SMALL_INT 1 -369 RETURN_VALUE -370 LOAD_CONST_NONE -371 RETURN_VALUE +369 RAISE_OBJ +370 LOAD_FAST 0 +371 POP_JUMP_IF_FALSE 375 +373 LOAD_CONST_NONE +374 RETURN_VALUE +375 LOAD_FAST 0 +376 POP_JUMP_IF_FALSE 380 +378 LOAD_CONST_SMALL_INT 1 +379 RETURN_VALUE +380 LOAD_CONST_NONE +381 RETURN_VALUE File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 59 bytes) Raw bytecode (code_info_size=8, bytecode_size=51): a8 10 0a 05 80 82 34 38 81 57 c0 57 c1 57 c2 57 @@ -623,9 +629,9 @@ arg names: * 08 DELETE_DEREF 0 10 LOAD_CONST_NONE 11 RETURN_VALUE -File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 15 bytes) -Raw bytecode (code_info_size=8, bytecode_size=7): - 9a 01 0a 05 03 08 80 8b b1 25 00 f2 63 51 63 +File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 13 bytes) +Raw bytecode (code_info_size=8, bytecode_size=5): + 9a 01 0a 05 03 08 80 8b b1 25 00 f2 63 arg names: * b (N_STATE 4) (N_EXC_STACK 0) @@ -635,8 +641,6 @@ arg names: * b 01 LOAD_DEREF 0 03 BINARY_OP 27 __add__ 04 RETURN_VALUE -05 LOAD_CONST_NONE -06 RETURN_VALUE mem: total=\\d\+, current=\\d\+, peak=\\d\+ stack: \\d\+ out of \\d\+ GC: total: \\d\+, used: \\d\+, free: \\d\+ diff --git a/tests/cmdline/cmd_showbc_opt.py b/tests/cmdline/cmd_showbc_opt.py new file mode 100644 index 0000000000..02785860d9 --- /dev/null +++ b/tests/cmdline/cmd_showbc_opt.py @@ -0,0 +1,33 @@ +# cmdline: -v -v +# test printing of bytecode when it's optimised away + + +def f0(): + return 0 + print(1) + + +def f1(x): + if x: + return + print(1) + print(2) + + +def f2(x): + raise Exception + print(1) + + +def f3(x): + while x: + break + print(1) + print(2) + + +def f4(x): + while x: + continue + print(1) + print(2) diff --git a/tests/cmdline/cmd_showbc_opt.py.exp b/tests/cmdline/cmd_showbc_opt.py.exp new file mode 100644 index 0000000000..9e4e4fae10 --- /dev/null +++ b/tests/cmdline/cmd_showbc_opt.py.exp @@ -0,0 +1,122 @@ +File cmdline/cmd_showbc_opt.py, code block '' (descriptor: \.\+, bytecode @\.\+ 35 bytes) +Raw bytecode (code_info_size=13, bytecode_size=22): + 00 16 01 60 20 64 40 84 07 64 40 84 07 32 00 16 + 02 32 01 16 03 32 02 16 04 32 03 16 05 32 04 16 + 06 51 63 +arg names: +(N_STATE 1) +(N_EXC_STACK 0) + bc=0 line=1 + bc=0 line=4 + bc=0 line=5 + bc=4 line=8 + bc=4 line=10 + bc=8 line=17 + bc=12 line=20 + bc=12 line=22 + bc=16 line=29 +00 MAKE_FUNCTION \.\+ +02 STORE_NAME f0 +04 MAKE_FUNCTION \.\+ +06 STORE_NAME f1 +08 MAKE_FUNCTION \.\+ +10 STORE_NAME f2 +12 MAKE_FUNCTION \.\+ +14 STORE_NAME f3 +16 MAKE_FUNCTION \.\+ +18 STORE_NAME f4 +20 LOAD_CONST_NONE +21 RETURN_VALUE +File cmdline/cmd_showbc_opt.py, code block 'f0' (descriptor: \.\+, bytecode @\.\+ 8 bytes) +Raw bytecode (code_info_size=6, bytecode_size=2): + 08 08 02 60 40 22 80 63 +arg names: +(N_STATE 2) +(N_EXC_STACK 0) + bc=0 line=1 + bc=0 line=4 + bc=0 line=6 + bc=2 line=7 +00 LOAD_CONST_SMALL_INT 0 +01 RETURN_VALUE +File cmdline/cmd_showbc_opt.py, code block 'f1' (descriptor: \.\+, bytecode @\.\+ 22 bytes) +Raw bytecode (code_info_size=9, bytecode_size=13): + 11 0e 03 08 80 0a 23 22 20 b0 44 42 51 63 12 07 + 82 34 01 59 51 63 +arg names: x +(N_STATE 3) +(N_EXC_STACK 0) + bc=0 line=1 + bc=0 line=11 + bc=3 line=12 + bc=5 line=13 + bc=5 line=14 +00 LOAD_FAST 0 +01 POP_JUMP_IF_FALSE 5 +03 LOAD_CONST_NONE +04 RETURN_VALUE +05 LOAD_GLOBAL print +07 LOAD_CONST_SMALL_INT 2 +08 CALL_FUNCTION n=1 nkw=0 +10 POP_TOP +11 LOAD_CONST_NONE +12 RETURN_VALUE +File cmdline/cmd_showbc_opt.py, code block 'f2' (descriptor: \.\+, bytecode @\.\+ 10 bytes) +Raw bytecode (code_info_size=7, bytecode_size=3): + 11 0a 04 08 80 11 23 12 09 65 +arg names: x +(N_STATE 3) +(N_EXC_STACK 0) + bc=0 line=1 + bc=0 line=18 + bc=3 line=19 +00 LOAD_GLOBAL Exception +02 RAISE_OBJ +File cmdline/cmd_showbc_opt.py, code block 'f3' (descriptor: \.\+, bytecode @\.\+ 24 bytes) +Raw bytecode (code_info_size=9, bytecode_size=15): + 11 0e 05 08 80 16 22 22 23 42 42 42 43 b0 43 3b + 12 07 82 34 01 59 51 63 +arg names: x +(N_STATE 3) +(N_EXC_STACK 0) + bc=0 line=1 + bc=0 line=23 + bc=2 line=24 + bc=4 line=25 + bc=7 line=26 +00 JUMP 4 +02 JUMP 7 +04 LOAD_FAST 0 +05 POP_JUMP_IF_TRUE 2 +07 LOAD_GLOBAL print +09 LOAD_CONST_SMALL_INT 2 +10 CALL_FUNCTION n=1 nkw=0 +12 POP_TOP +13 LOAD_CONST_NONE +14 RETURN_VALUE +File cmdline/cmd_showbc_opt.py, code block 'f4' (descriptor: \.\+, bytecode @\.\+ 24 bytes) +Raw bytecode (code_info_size=9, bytecode_size=15): + 11 0e 06 08 80 1d 22 22 23 42 42 42 40 b0 43 3b + 12 07 82 34 01 59 51 63 +arg names: x +(N_STATE 3) +(N_EXC_STACK 0) + bc=0 line=1 + bc=0 line=30 + bc=2 line=31 + bc=4 line=32 + bc=7 line=33 +00 JUMP 4 +02 JUMP 4 +04 LOAD_FAST 0 +05 POP_JUMP_IF_TRUE 2 +07 LOAD_GLOBAL print +09 LOAD_CONST_SMALL_INT 2 +10 CALL_FUNCTION n=1 nkw=0 +12 POP_TOP +13 LOAD_CONST_NONE +14 RETURN_VALUE +mem: total=\\d\+, current=\\d\+, peak=\\d\+ +stack: \\d\+ out of \\d\+ +GC: total: \\d\+, used: \\d\+, free: \\d\+ + No. of 1-blocks: \\d\+, 2-blocks: \\d\+, max blk sz: \\d\+, max free sz: \\d\+