From 355eb8eafb1a0b0e096cd452d1107ab45bbf72c4 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 13 Oct 2018 13:59:19 +1100 Subject: [PATCH] py/asmx86: Change indirect calls to load fun ptr from the native table. Instead of storing the function pointer directly in the assembly code. This makes the generated code more independent of the runtime (so easier to relocate the code), and reduces the generated code size. --- py/asmx86.c | 18 ++++-------------- py/asmx86.h | 4 ++-- py/emitnative.c | 4 ++++ 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/py/asmx86.c b/py/asmx86.c index 81ff1d00d2..60917fdeb7 100644 --- a/py/asmx86.c +++ b/py/asmx86.c @@ -494,7 +494,7 @@ void asm_x86_push_local_addr(asm_x86_t *as, int local_num, int temp_r32) } #endif -void asm_x86_call_ind(asm_x86_t *as, void *ptr, mp_uint_t n_args, int temp_r32) { +void asm_x86_call_ind(asm_x86_t *as, size_t fun_id, mp_uint_t n_args, int temp_r32) { // TODO align stack on 16-byte boundary before the call assert(n_args <= 5); if (n_args > 4) { @@ -512,20 +512,10 @@ void asm_x86_call_ind(asm_x86_t *as, void *ptr, mp_uint_t n_args, int temp_r32) if (n_args > 0) { asm_x86_push_r32(as, ASM_X86_REG_ARG_1); } -#ifdef __LP64__ - // We wouldn't run x86 code on an x64 machine. This is here to enable - // testing of the x86 emitter only. - asm_x86_mov_i32_to_r32(as, (int32_t)(int64_t)ptr, temp_r32); -#else - // If we get here, sizeof(int) == sizeof(void*). - asm_x86_mov_i32_to_r32(as, (int32_t)ptr, temp_r32); -#endif + + // Load the pointer to the function and make the call + asm_x86_mov_mem32_to_r32(as, ASM_X86_REG_EBP, fun_id * WORD_SIZE, temp_r32); asm_x86_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R32(2) | MODRM_RM_REG | MODRM_RM_R32(temp_r32)); - // this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all - /* - asm_x86_write_byte_1(as, OPCODE_CALL_REL32); - asm_x86_write_word32(as, ptr - (void*)(as->code_base + as->base.code_offset + 4)); - */ // the caller must clean up the stack if (n_args > 0) { diff --git a/py/asmx86.h b/py/asmx86.h index 15518d98c3..a5535b5488 100644 --- a/py/asmx86.h +++ b/py/asmx86.h @@ -112,7 +112,7 @@ void asm_x86_mov_local_to_r32(asm_x86_t* as, int src_local_num, int dest_r32); void asm_x86_mov_r32_to_local(asm_x86_t* as, int src_r32, int dest_local_num); void asm_x86_mov_local_addr_to_r32(asm_x86_t* as, int local_num, int dest_r32); void asm_x86_mov_reg_pcrel(asm_x86_t *as, int dest_r64, mp_uint_t label); -void asm_x86_call_ind(asm_x86_t* as, void* ptr, mp_uint_t n_args, int temp_r32); +void asm_x86_call_ind(asm_x86_t* as, size_t fun_id, mp_uint_t n_args, int temp_r32); #if GENERIC_ASM_API @@ -169,7 +169,7 @@ void asm_x86_call_ind(asm_x86_t* as, void* ptr, mp_uint_t n_args, int temp_r32); asm_x86_jcc_label(as, ASM_X86_CC_JE, label); \ } while (0) #define ASM_JUMP_REG(as, reg) asm_x86_jmp_reg((as), (reg)) -#define ASM_CALL_IND(as, ptr, idx) asm_x86_call_ind(as, ptr, mp_f_n_args[idx], ASM_X86_REG_EAX) +#define ASM_CALL_IND(as, ptr, idx) asm_x86_call_ind(as, idx, mp_f_n_args[idx], ASM_X86_REG_EAX) #define ASM_MOV_LOCAL_REG(as, local_num, reg_src) asm_x86_mov_r32_to_local((as), (reg_src), (local_num)) #define ASM_MOV_REG_IMM(as, reg_dest, imm) asm_x86_mov_i32_to_r32((as), (imm), (reg_dest)) diff --git a/py/emitnative.c b/py/emitnative.c index 81669af7cb..4d6c3445f4 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -379,6 +379,8 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table); #elif N_XTENSA ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table); + #elif N_X86 + asm_x86_mov_i32_to_r32(emit->as, (intptr_t)mp_fun_table, ASM_X86_REG_EBP); #elif N_X64 asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP); #endif @@ -471,6 +473,8 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table); #elif N_XTENSA ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table); + #elif N_X86 + asm_x86_mov_i32_to_r32(emit->as, (intptr_t)mp_fun_table, ASM_X86_REG_EBP); #elif N_X64 asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP); #endif