py/map: Add an optional cache of (map+index) to speed up map lookups.
The existing inline bytecode caching optimisation, selected by MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, reserves an extra byte in the bytecode after certain opcodes, which at runtime stores a map index of the likely location of this field when looking up the qstr. This scheme is incompatible with bytecode-in-ROM, and doesn't work with native generated code. It also stores bytecode in .mpy files which is of a different format to when the feature is disabled, making generation of .mpy files more complex. This commit provides an alternative optimisation via an approach that adds a global cache for map offsets, then all mp_map_lookup operations use it. It's less precise than bytecode caching, but allows the cache to be independent and external to the bytecode that is executing. It also works for the native emitter and adds a similar performance boost on top of the gain already provided by the native emitter. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
This commit is contained in:
parent
7b89ad8dbf
commit
11ef8f22fe
35
py/map.c
35
py/map.c
@ -40,6 +40,27 @@
|
||||
#define DEBUG_printf(...) (void)0
|
||||
#endif
|
||||
|
||||
#if MICROPY_OPT_MAP_LOOKUP_CACHE
|
||||
// MP_STATE_VM(map_lookup_cache) provides a cache of index to the last known
|
||||
// position of that index in any map. On a cache hit, this allows
|
||||
// short-circuiting the full linear search in the case of an ordered map
|
||||
// (i.e. all builtin modules and objects' locals dicts), and computation of
|
||||
// the hash (and potentially some linear probing) in the case of a regular
|
||||
// map. Note the same cache is shared across all maps.
|
||||
|
||||
// Gets the index into the cache for this index. Shift down by two to remove
|
||||
// mp_obj_t tag bits.
|
||||
#define MAP_CACHE_OFFSET(index) ((((uintptr_t)(index)) >> 2) % MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE)
|
||||
// Gets the map cache entry for the corresponding index.
|
||||
#define MAP_CACHE_ENTRY(index) (MP_STATE_VM(map_lookup_cache)[MAP_CACHE_OFFSET(index)])
|
||||
// Retrieve the mp_obj_t at the location suggested by the cache.
|
||||
#define MAP_CACHE_GET(map, index) (&(map)->table[MAP_CACHE_ENTRY(index) % (map)->alloc])
|
||||
// Update the cache for this index.
|
||||
#define MAP_CACHE_SET(index, pos) MAP_CACHE_ENTRY(index) = (pos) & 0xff;
|
||||
#else
|
||||
#define MAP_CACHE_SET(index, pos)
|
||||
#endif
|
||||
|
||||
// This table of sizes is used to control the growth of hash tables.
|
||||
// The first set of sizes are chosen so the allocation fits exactly in a
|
||||
// 4-word GC block, and it's not so important for these small values to be
|
||||
@ -136,6 +157,18 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
|
||||
// If the map is a fixed array then we must only be called for a lookup
|
||||
assert(!map->is_fixed || lookup_kind == MP_MAP_LOOKUP);
|
||||
|
||||
#if MICROPY_OPT_MAP_LOOKUP_CACHE
|
||||
// Try the cache for lookup or add-if-not-found.
|
||||
if (lookup_kind != MP_MAP_LOOKUP_REMOVE_IF_FOUND && map->alloc) {
|
||||
mp_map_elem_t *slot = MAP_CACHE_GET(map, index);
|
||||
// Note: Just comparing key for value equality will have false negatives, but
|
||||
// these will be handled by the regular path below.
|
||||
if (slot->key == index) {
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Work out if we can compare just pointers
|
||||
bool compare_only_ptrs = map->all_keys_are_qstrs;
|
||||
if (compare_only_ptrs) {
|
||||
@ -172,6 +205,7 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
|
||||
elem->value = value;
|
||||
}
|
||||
#endif
|
||||
MAP_CACHE_SET(index, elem - map->table);
|
||||
return elem;
|
||||
}
|
||||
}
|
||||
@ -254,6 +288,7 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
|
||||
}
|
||||
// keep slot->value so that caller can access it if needed
|
||||
}
|
||||
MAP_CACHE_SET(index, pos);
|
||||
return slot;
|
||||
}
|
||||
|
||||
|
@ -533,6 +533,20 @@
|
||||
#define MICROPY_OPT_LOAD_ATTR_FAST_PATH (0)
|
||||
#endif
|
||||
|
||||
// Use extra RAM to cache map lookups by remembering the likely location of
|
||||
// the index. Avoids the hash computation on unordered maps, and avoids the
|
||||
// linear search on ordered (especially in-ROM) maps. Can provide a +10-15%
|
||||
// performance improvement on benchmarks involving lots of attribute access
|
||||
// or dictionary lookup.
|
||||
#ifndef MICROPY_OPT_MAP_LOOKUP_CACHE
|
||||
#define MICROPY_OPT_MAP_LOOKUP_CACHE (0)
|
||||
#endif
|
||||
|
||||
// How much RAM (in bytes) to use for the map lookup cache.
|
||||
#ifndef MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE
|
||||
#define MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE (128)
|
||||
#endif
|
||||
|
||||
// Whether to use fast versions of bitwise operations (and, or, xor) when the
|
||||
// arguments are both positive. Increases Thumb2 code size by about 250 bytes.
|
||||
#ifndef MICROPY_OPT_MPZ_BITWISE
|
||||
|
@ -231,6 +231,11 @@ typedef struct _mp_state_vm_t {
|
||||
// This is a global mutex used to make the VM/runtime thread-safe.
|
||||
mp_thread_mutex_t gil_mutex;
|
||||
#endif
|
||||
|
||||
#if MICROPY_OPT_MAP_LOOKUP_CACHE
|
||||
// See mp_map_lookup.
|
||||
uint8_t map_lookup_cache[MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE];
|
||||
#endif
|
||||
} mp_state_vm_t;
|
||||
|
||||
// This structure holds state that is specific to a given thread.
|
||||
|
Loading…
Reference in New Issue
Block a user