py/map: Add an optional cache of (map+index) to speed up map lookups.

The existing inline bytecode caching optimisation, selected by
MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, reserves an extra byte in the
bytecode after certain opcodes, which at runtime stores a map index of the
likely location of this field when looking up the qstr.  This scheme is
incompatible with bytecode-in-ROM, and doesn't work with native generated
code.  It also stores bytecode in .mpy files which is of a different format
to when the feature is disabled, making generation of .mpy files more
complex.

This commit provides an alternative optimisation via an approach that adds
a global cache for map offsets, then all mp_map_lookup operations use it.
It's less precise than bytecode caching, but allows the cache to be
independent and external to the bytecode that is executing.  It also works
for the native emitter and adds a similar performance boost on top of the
gain already provided by the native emitter.

Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
This commit is contained in:
Jim Mussared 2021-08-18 14:52:48 +10:00 committed by Damien George
parent 7b89ad8dbf
commit 11ef8f22fe
3 changed files with 54 additions and 0 deletions

View File

@ -40,6 +40,27 @@
#define DEBUG_printf(...) (void)0 #define DEBUG_printf(...) (void)0
#endif #endif
#if MICROPY_OPT_MAP_LOOKUP_CACHE
// MP_STATE_VM(map_lookup_cache) provides a cache of index to the last known
// position of that index in any map. On a cache hit, this allows
// short-circuiting the full linear search in the case of an ordered map
// (i.e. all builtin modules and objects' locals dicts), and computation of
// the hash (and potentially some linear probing) in the case of a regular
// map. Note the same cache is shared across all maps.
// Gets the index into the cache for this index. Shift down by two to remove
// mp_obj_t tag bits.
#define MAP_CACHE_OFFSET(index) ((((uintptr_t)(index)) >> 2) % MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE)
// Gets the map cache entry for the corresponding index.
#define MAP_CACHE_ENTRY(index) (MP_STATE_VM(map_lookup_cache)[MAP_CACHE_OFFSET(index)])
// Retrieve the mp_obj_t at the location suggested by the cache.
#define MAP_CACHE_GET(map, index) (&(map)->table[MAP_CACHE_ENTRY(index) % (map)->alloc])
// Update the cache for this index.
#define MAP_CACHE_SET(index, pos) MAP_CACHE_ENTRY(index) = (pos) & 0xff;
#else
#define MAP_CACHE_SET(index, pos)
#endif
// This table of sizes is used to control the growth of hash tables. // This table of sizes is used to control the growth of hash tables.
// The first set of sizes are chosen so the allocation fits exactly in a // The first set of sizes are chosen so the allocation fits exactly in a
// 4-word GC block, and it's not so important for these small values to be // 4-word GC block, and it's not so important for these small values to be
@ -136,6 +157,18 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
// If the map is a fixed array then we must only be called for a lookup // If the map is a fixed array then we must only be called for a lookup
assert(!map->is_fixed || lookup_kind == MP_MAP_LOOKUP); assert(!map->is_fixed || lookup_kind == MP_MAP_LOOKUP);
#if MICROPY_OPT_MAP_LOOKUP_CACHE
// Try the cache for lookup or add-if-not-found.
if (lookup_kind != MP_MAP_LOOKUP_REMOVE_IF_FOUND && map->alloc) {
mp_map_elem_t *slot = MAP_CACHE_GET(map, index);
// Note: Just comparing key for value equality will have false negatives, but
// these will be handled by the regular path below.
if (slot->key == index) {
return slot;
}
}
#endif
// Work out if we can compare just pointers // Work out if we can compare just pointers
bool compare_only_ptrs = map->all_keys_are_qstrs; bool compare_only_ptrs = map->all_keys_are_qstrs;
if (compare_only_ptrs) { if (compare_only_ptrs) {
@ -172,6 +205,7 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
elem->value = value; elem->value = value;
} }
#endif #endif
MAP_CACHE_SET(index, elem - map->table);
return elem; return elem;
} }
} }
@ -254,6 +288,7 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
} }
// keep slot->value so that caller can access it if needed // keep slot->value so that caller can access it if needed
} }
MAP_CACHE_SET(index, pos);
return slot; return slot;
} }

View File

@ -533,6 +533,20 @@
#define MICROPY_OPT_LOAD_ATTR_FAST_PATH (0) #define MICROPY_OPT_LOAD_ATTR_FAST_PATH (0)
#endif #endif
// Use extra RAM to cache map lookups by remembering the likely location of
// the index. Avoids the hash computation on unordered maps, and avoids the
// linear search on ordered (especially in-ROM) maps. Can provide a +10-15%
// performance improvement on benchmarks involving lots of attribute access
// or dictionary lookup.
#ifndef MICROPY_OPT_MAP_LOOKUP_CACHE
#define MICROPY_OPT_MAP_LOOKUP_CACHE (0)
#endif
// How much RAM (in bytes) to use for the map lookup cache.
#ifndef MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE
#define MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE (128)
#endif
// Whether to use fast versions of bitwise operations (and, or, xor) when the // Whether to use fast versions of bitwise operations (and, or, xor) when the
// arguments are both positive. Increases Thumb2 code size by about 250 bytes. // arguments are both positive. Increases Thumb2 code size by about 250 bytes.
#ifndef MICROPY_OPT_MPZ_BITWISE #ifndef MICROPY_OPT_MPZ_BITWISE

View File

@ -231,6 +231,11 @@ typedef struct _mp_state_vm_t {
// This is a global mutex used to make the VM/runtime thread-safe. // This is a global mutex used to make the VM/runtime thread-safe.
mp_thread_mutex_t gil_mutex; mp_thread_mutex_t gil_mutex;
#endif #endif
#if MICROPY_OPT_MAP_LOOKUP_CACHE
// See mp_map_lookup.
uint8_t map_lookup_cache[MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE];
#endif
} mp_state_vm_t; } mp_state_vm_t;
// This structure holds state that is specific to a given thread. // This structure holds state that is specific to a given thread.