b326edf68c
This commit removes all parts of code associated with the existing MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the -mcache-lookup-bc option to mpy-cross. This feature originally provided a significant performance boost for Unix, but wasn't able to be enabled for MCU targets (due to frozen bytecode), and added significant extra complexity to generating and distributing .mpy files. The equivalent performance gain is now provided by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has been enabled on the unix port in the previous commit). It's hard to provide precise performance numbers, but tests have been run on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V, xtensa) and they all generally agree on the qualitative improvements seen by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE. For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined with MAP_LOOKUP_CACHE is: diff of scores (higher is better) N=2000 M=2000 bccache -> attrmapcache diff diff% (error%) bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%) bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%) bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%) bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%) bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%) bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%) bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%) misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%) misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%) misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%) misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%) This shows that the new optimisations are at least as good as the existing inline-bytecode-caching, and are sometimes much better (because the new ones apply caching to a wider variety of map lookups). The new optimisations can also benefit code generated by the native emitter, because they apply to the runtime rather than the generated code. The improvement for the native emitter when LOAD_ATTR_FAST_PATH and MAP_LOOKUP_CACHE are enabled is (same Linux environment as above): diff of scores (higher is better) N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%) bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%) bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%) bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%) bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%) bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%) bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%) bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%) misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%) misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%) misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%) misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%) In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options: - are simpler; - take less code size; - are faster (generally); - work with code generated by the native emitter; - can be used on embedded targets with a small and constant RAM overhead; - allow the same .mpy bytecode to run on all targets. See #7680 for further discussion. And see also #7653 for a discussion about simplifying mpy-cross options. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
305 lines
9.2 KiB
C
305 lines
9.2 KiB
C
/*
|
|
* This file is part of the MicroPython project, http://micropython.org/
|
|
*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2014 Damien P. George
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
#ifndef MICROPY_INCLUDED_PY_MPSTATE_H
|
|
#define MICROPY_INCLUDED_PY_MPSTATE_H
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "py/mpconfig.h"
|
|
#include "py/mpthread.h"
|
|
#include "py/misc.h"
|
|
#include "py/nlr.h"
|
|
#include "py/obj.h"
|
|
#include "py/objlist.h"
|
|
#include "py/objexcept.h"
|
|
|
|
// This file contains structures defining the state of the MicroPython
|
|
// memory system, runtime and virtual machine. The state is a global
|
|
// variable, but in the future it is hoped that the state can become local.
|
|
|
|
// This structure contains dynamic configuration for the compiler.
|
|
#if MICROPY_DYNAMIC_COMPILER
|
|
typedef struct mp_dynamic_compiler_t {
|
|
uint8_t small_int_bits; // must be <= host small_int_bits
|
|
bool py_builtins_str_unicode;
|
|
uint8_t native_arch;
|
|
uint8_t nlr_buf_num_regs;
|
|
} mp_dynamic_compiler_t;
|
|
extern mp_dynamic_compiler_t mp_dynamic_compiler;
|
|
#endif
|
|
|
|
// These are the values for sched_state
|
|
#define MP_SCHED_IDLE (1)
|
|
#define MP_SCHED_LOCKED (-1)
|
|
#define MP_SCHED_PENDING (0) // 0 so it's a quick check in the VM
|
|
|
|
typedef struct _mp_sched_item_t {
|
|
mp_obj_t func;
|
|
mp_obj_t arg;
|
|
} mp_sched_item_t;
|
|
|
|
// This structure hold information about the memory allocation system.
|
|
typedef struct _mp_state_mem_t {
|
|
#if MICROPY_MEM_STATS
|
|
size_t total_bytes_allocated;
|
|
size_t current_bytes_allocated;
|
|
size_t peak_bytes_allocated;
|
|
#endif
|
|
|
|
byte *gc_alloc_table_start;
|
|
size_t gc_alloc_table_byte_len;
|
|
#if MICROPY_ENABLE_FINALISER
|
|
byte *gc_finaliser_table_start;
|
|
#endif
|
|
byte *gc_pool_start;
|
|
byte *gc_pool_end;
|
|
|
|
int gc_stack_overflow;
|
|
MICROPY_GC_STACK_ENTRY_TYPE gc_stack[MICROPY_ALLOC_GC_STACK_SIZE];
|
|
|
|
// This variable controls auto garbage collection. If set to 0 then the
|
|
// GC won't automatically run when gc_alloc can't find enough blocks. But
|
|
// you can still allocate/free memory and also explicitly call gc_collect.
|
|
uint16_t gc_auto_collect_enabled;
|
|
|
|
#if MICROPY_GC_ALLOC_THRESHOLD
|
|
size_t gc_alloc_amount;
|
|
size_t gc_alloc_threshold;
|
|
#endif
|
|
|
|
size_t gc_last_free_atb_index;
|
|
|
|
#if MICROPY_PY_GC_COLLECT_RETVAL
|
|
size_t gc_collected;
|
|
#endif
|
|
|
|
#if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
|
|
// This is a global mutex used to make the GC thread-safe.
|
|
mp_thread_mutex_t gc_mutex;
|
|
#endif
|
|
} mp_state_mem_t;
|
|
|
|
// This structure hold runtime and VM information. It includes a section
|
|
// which contains root pointers that must be scanned by the GC.
|
|
typedef struct _mp_state_vm_t {
|
|
//
|
|
// CONTINUE ROOT POINTER SECTION
|
|
// This must start at the start of this structure and follows
|
|
// the state in the mp_state_thread_t structure, continuing
|
|
// the root pointer section from there.
|
|
//
|
|
|
|
qstr_pool_t *last_pool;
|
|
|
|
// non-heap memory for creating an exception if we can't allocate RAM
|
|
mp_obj_exception_t mp_emergency_exception_obj;
|
|
|
|
// memory for exception arguments if we can't allocate RAM
|
|
#if MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF
|
|
#if MICROPY_EMERGENCY_EXCEPTION_BUF_SIZE > 0
|
|
// statically allocated buf (needs to be aligned to mp_obj_t)
|
|
mp_obj_t mp_emergency_exception_buf[MICROPY_EMERGENCY_EXCEPTION_BUF_SIZE / sizeof(mp_obj_t)];
|
|
#else
|
|
// dynamically allocated buf
|
|
byte *mp_emergency_exception_buf;
|
|
#endif
|
|
#endif
|
|
|
|
#if MICROPY_KBD_EXCEPTION
|
|
// exception object of type KeyboardInterrupt
|
|
mp_obj_exception_t mp_kbd_exception;
|
|
#endif
|
|
|
|
// dictionary with loaded modules (may be exposed as sys.modules)
|
|
mp_obj_dict_t mp_loaded_modules_dict;
|
|
|
|
#if MICROPY_ENABLE_SCHEDULER
|
|
mp_sched_item_t sched_queue[MICROPY_SCHEDULER_DEPTH];
|
|
#endif
|
|
|
|
// current exception being handled, for sys.exc_info()
|
|
#if MICROPY_PY_SYS_EXC_INFO
|
|
mp_obj_base_t *cur_exception;
|
|
#endif
|
|
|
|
#if MICROPY_PY_SYS_ATEXIT
|
|
// exposed through sys.atexit function
|
|
mp_obj_t sys_exitfunc;
|
|
#endif
|
|
|
|
// dictionary for the __main__ module
|
|
mp_obj_dict_t dict_main;
|
|
|
|
// these two lists must be initialised per port, after the call to mp_init
|
|
mp_obj_list_t mp_sys_path_obj;
|
|
mp_obj_list_t mp_sys_argv_obj;
|
|
|
|
// dictionary for overridden builtins
|
|
#if MICROPY_CAN_OVERRIDE_BUILTINS
|
|
mp_obj_dict_t *mp_module_builtins_override_dict;
|
|
#endif
|
|
|
|
#if MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
|
|
// An mp_obj_list_t that tracks relocated native code to prevent the GC from reclaiming them.
|
|
mp_obj_t track_reloc_code_list;
|
|
#endif
|
|
|
|
// include any root pointers defined by a port
|
|
MICROPY_PORT_ROOT_POINTERS
|
|
|
|
// root pointers for extmod
|
|
|
|
#if MICROPY_REPL_EVENT_DRIVEN
|
|
vstr_t *repl_line;
|
|
#endif
|
|
|
|
#if MICROPY_PY_OS_DUPTERM
|
|
mp_obj_t dupterm_objs[MICROPY_PY_OS_DUPTERM];
|
|
#endif
|
|
|
|
#if MICROPY_PY_LWIP_SLIP
|
|
mp_obj_t lwip_slip_stream;
|
|
#endif
|
|
|
|
#if MICROPY_VFS
|
|
struct _mp_vfs_mount_t *vfs_cur;
|
|
struct _mp_vfs_mount_t *vfs_mount_table;
|
|
#endif
|
|
|
|
#if MICROPY_PY_BLUETOOTH
|
|
mp_obj_t bluetooth;
|
|
#endif
|
|
|
|
//
|
|
// END ROOT POINTER SECTION
|
|
////////////////////////////////////////////////////////////
|
|
|
|
// pointer and sizes to store interned string data
|
|
// (qstr_last_chunk can be root pointer but is also stored in qstr pool)
|
|
byte *qstr_last_chunk;
|
|
size_t qstr_last_alloc;
|
|
size_t qstr_last_used;
|
|
|
|
#if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
|
|
// This is a global mutex used to make qstr interning thread-safe.
|
|
mp_thread_mutex_t qstr_mutex;
|
|
#endif
|
|
|
|
#if MICROPY_ENABLE_COMPILER
|
|
mp_uint_t mp_optimise_value;
|
|
#if MICROPY_EMIT_NATIVE
|
|
uint8_t default_emit_opt; // one of MP_EMIT_OPT_xxx
|
|
#endif
|
|
#endif
|
|
|
|
// size of the emergency exception buf, if it's dynamically allocated
|
|
#if MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF && MICROPY_EMERGENCY_EXCEPTION_BUF_SIZE == 0
|
|
mp_int_t mp_emergency_exception_buf_size;
|
|
#endif
|
|
|
|
#if MICROPY_ENABLE_SCHEDULER
|
|
volatile int16_t sched_state;
|
|
uint8_t sched_len;
|
|
uint8_t sched_idx;
|
|
#endif
|
|
|
|
#if MICROPY_PY_THREAD_GIL
|
|
// This is a global mutex used to make the VM/runtime thread-safe.
|
|
mp_thread_mutex_t gil_mutex;
|
|
#endif
|
|
|
|
#if MICROPY_OPT_MAP_LOOKUP_CACHE
|
|
// See mp_map_lookup.
|
|
uint8_t map_lookup_cache[MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE];
|
|
#endif
|
|
} mp_state_vm_t;
|
|
|
|
// This structure holds state that is specific to a given thread.
|
|
// Everything in this structure is scanned for root pointers.
|
|
typedef struct _mp_state_thread_t {
|
|
// Stack top at the start of program
|
|
char *stack_top;
|
|
|
|
#if MICROPY_STACK_CHECK
|
|
size_t stack_limit;
|
|
#endif
|
|
|
|
#if MICROPY_ENABLE_PYSTACK
|
|
uint8_t *pystack_start;
|
|
uint8_t *pystack_end;
|
|
uint8_t *pystack_cur;
|
|
#endif
|
|
|
|
// Locking of the GC is done per thread.
|
|
uint16_t gc_lock_depth;
|
|
|
|
////////////////////////////////////////////////////////////
|
|
// START ROOT POINTER SECTION
|
|
// Everything that needs GC scanning must start here, and
|
|
// is followed by state in the mp_state_vm_t structure.
|
|
//
|
|
|
|
mp_obj_dict_t *dict_locals;
|
|
mp_obj_dict_t *dict_globals;
|
|
|
|
nlr_buf_t *nlr_top;
|
|
|
|
// pending exception object (MP_OBJ_NULL if not pending)
|
|
volatile mp_obj_t mp_pending_exception;
|
|
|
|
// If MP_OBJ_STOP_ITERATION is propagated then this holds its argument.
|
|
mp_obj_t stop_iteration_arg;
|
|
|
|
#if MICROPY_PY_SYS_SETTRACE
|
|
mp_obj_t prof_trace_callback;
|
|
bool prof_callback_is_executing;
|
|
struct _mp_code_state_t *current_code_state;
|
|
#endif
|
|
} mp_state_thread_t;
|
|
|
|
// This structure combines the above 3 structures.
|
|
// The order of the entries are important for root pointer scanning in the GC to work.
|
|
typedef struct _mp_state_ctx_t {
|
|
mp_state_thread_t thread;
|
|
mp_state_vm_t vm;
|
|
mp_state_mem_t mem;
|
|
} mp_state_ctx_t;
|
|
|
|
extern mp_state_ctx_t mp_state_ctx;
|
|
|
|
#define MP_STATE_VM(x) (mp_state_ctx.vm.x)
|
|
#define MP_STATE_MEM(x) (mp_state_ctx.mem.x)
|
|
#define MP_STATE_MAIN_THREAD(x) (mp_state_ctx.thread.x)
|
|
|
|
#if MICROPY_PY_THREAD
|
|
extern mp_state_thread_t *mp_thread_get_state(void);
|
|
#define MP_STATE_THREAD(x) (mp_thread_get_state()->x)
|
|
#else
|
|
#define MP_STATE_THREAD(x) MP_STATE_MAIN_THREAD(x)
|
|
#endif
|
|
|
|
#endif // MICROPY_INCLUDED_PY_MPSTATE_H
|