py: Improve allocation policy of qstr data.
Previous to this patch all interned strings lived in their own malloc'd chunk. On average this wastes N/2 bytes per interned string, where N is the number-of-bytes for a quanta of the memory allocator (16 bytes on 32 bit archs). With this patch interned strings are concatenated into the same malloc'd chunk when possible. Such chunks are enlarged inplace when possible, and shrunk to fit when a new chunk is needed. RAM savings with this patch are highly varied, but should always show an improvement (unless only 3 or 4 strings are interned). New version typically uses about 70% of previous memory for the qstr data, and can lead to savings of around 10% of total memory footprint of a running script. Costs about 120 bytes code size on Thumb2 archs (depends on how many calls to gc_realloc are made).
This commit is contained in:
parent
c48740e20b
commit
ade9a05236
7
py/gc.c
7
py/gc.c
|
@ -522,7 +522,7 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) {
|
|||
|
||||
#else // Alternative gc_realloc impl
|
||||
|
||||
void *gc_realloc(void *ptr_in, mp_uint_t n_bytes) {
|
||||
void *gc_realloc(void *ptr_in, mp_uint_t n_bytes, bool allow_move) {
|
||||
if (MP_STATE_MEM(gc_lock_depth) > 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -624,6 +624,11 @@ void *gc_realloc(void *ptr_in, mp_uint_t n_bytes) {
|
|||
return ptr_in;
|
||||
}
|
||||
|
||||
if (!allow_move) {
|
||||
// not allowed to move memory block so return failure
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// can't resize inplace; try to find a new contiguous chain
|
||||
void *ptr_out = gc_alloc(n_bytes,
|
||||
#if MICROPY_ENABLE_FINALISER
|
||||
|
|
2
py/gc.h
2
py/gc.h
|
@ -48,7 +48,7 @@ void gc_collect_end(void);
|
|||
void *gc_alloc(mp_uint_t n_bytes, bool has_finaliser);
|
||||
void gc_free(void *ptr);
|
||||
mp_uint_t gc_nbytes(const void *ptr);
|
||||
void *gc_realloc(void *ptr, mp_uint_t n_bytes);
|
||||
void *gc_realloc(void *ptr, mp_uint_t n_bytes, bool allow_move);
|
||||
|
||||
typedef struct _gc_info_t {
|
||||
mp_uint_t total;
|
||||
|
|
11
py/malloc.c
11
py/malloc.c
|
@ -56,7 +56,10 @@
|
|||
#define malloc(b) gc_alloc((b), false)
|
||||
#define malloc_with_finaliser(b) gc_alloc((b), true)
|
||||
#define free gc_free
|
||||
#define realloc gc_realloc
|
||||
#define realloc(ptr, n) gc_realloc(ptr, n, true)
|
||||
#define realloc_ext(ptr, n, mv) gc_realloc(ptr, n, mv)
|
||||
#else
|
||||
#define realloc_ext(ptr, n, mv) realloc(ptr, n)
|
||||
#endif // MICROPY_ENABLE_GC
|
||||
|
||||
void *m_malloc(size_t num_bytes) {
|
||||
|
@ -134,11 +137,11 @@ void *m_realloc(void *ptr, size_t new_num_bytes) {
|
|||
}
|
||||
|
||||
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
|
||||
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes) {
|
||||
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move) {
|
||||
#else
|
||||
void *m_realloc_maybe(void *ptr, size_t new_num_bytes) {
|
||||
void *m_realloc_maybe(void *ptr, size_t new_num_bytes, bool allow_move) {
|
||||
#endif
|
||||
void *new_ptr = realloc(ptr, new_num_bytes);
|
||||
void *new_ptr = realloc_ext(ptr, new_num_bytes, allow_move);
|
||||
#if MICROPY_MEM_STATS
|
||||
// At first thought, "Total bytes allocated" should only grow,
|
||||
// after all, it's *total*. But consider for example 2K block
|
||||
|
|
|
@ -63,12 +63,12 @@ typedef unsigned int uint;
|
|||
#endif
|
||||
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
|
||||
#define m_renew(type, ptr, old_num, new_num) ((type*)(m_realloc((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
|
||||
#define m_renew_maybe(type, ptr, old_num, new_num) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
|
||||
#define m_renew_maybe(type, ptr, old_num, new_num, allow_move) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num), (allow_move))))
|
||||
#define m_del(type, ptr, num) m_free(ptr, sizeof(type) * (num))
|
||||
#define m_del_var(obj_type, var_type, var_num, ptr) (m_free(ptr, sizeof(obj_type) + sizeof(var_type) * (var_num)))
|
||||
#else
|
||||
#define m_renew(type, ptr, old_num, new_num) ((type*)(m_realloc((ptr), sizeof(type) * (new_num))))
|
||||
#define m_renew_maybe(type, ptr, old_num, new_num) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (new_num))))
|
||||
#define m_renew_maybe(type, ptr, old_num, new_num, allow_move) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (new_num), (allow_move))))
|
||||
#define m_del(type, ptr, num) ((void)(num), m_free(ptr))
|
||||
#define m_del_var(obj_type, var_type, var_num, ptr) ((void)(var_num), m_free(ptr))
|
||||
#endif
|
||||
|
@ -80,11 +80,11 @@ void *m_malloc_with_finaliser(size_t num_bytes);
|
|||
void *m_malloc0(size_t num_bytes);
|
||||
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
|
||||
void *m_realloc(void *ptr, size_t old_num_bytes, size_t new_num_bytes);
|
||||
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes);
|
||||
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move);
|
||||
void m_free(void *ptr, size_t num_bytes);
|
||||
#else
|
||||
void *m_realloc(void *ptr, size_t new_num_bytes);
|
||||
void *m_realloc_maybe(void *ptr, size_t new_num_bytes);
|
||||
void *m_realloc_maybe(void *ptr, size_t new_num_bytes, bool allow_move);
|
||||
void m_free(void *ptr);
|
||||
#endif
|
||||
void *m_malloc_fail(size_t num_bytes);
|
||||
|
|
|
@ -75,6 +75,14 @@
|
|||
#define MICROPY_ALLOC_GC_STACK_SIZE (64)
|
||||
#endif
|
||||
|
||||
// Number of bytes to allocate initially when creating new chunks to store
|
||||
// interned string data. Smaller numbers lead to more chunks being needed
|
||||
// and more wastage at the end of the chunk. Larger numbers lead to wasted
|
||||
// space at the end when no more strings need interning.
|
||||
#ifndef MICROPY_ALLOC_QSTR_CHUNK_INIT
|
||||
#define MICROPY_ALLOC_QSTR_CHUNK_INIT (128)
|
||||
#endif
|
||||
|
||||
// Initial amount for lexer indentation level
|
||||
#ifndef MICROPY_ALLOC_LEXER_INDENT_INIT
|
||||
#define MICROPY_ALLOC_LEXER_INDENT_INIT (10)
|
||||
|
|
|
@ -131,6 +131,12 @@ typedef struct _mp_state_vm_t {
|
|||
// END ROOT POINTER SECTION
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
// pointer and sizes to store interned string data
|
||||
// (qstr_last_chunk can be root pointer but is also stored in qstr pool)
|
||||
byte *qstr_last_chunk;
|
||||
mp_uint_t qstr_last_alloc;
|
||||
mp_uint_t qstr_last_used;
|
||||
|
||||
// Stack top at the start of program
|
||||
// Note: this entry is used to locate the end of the root pointer section.
|
||||
char *stack_top;
|
||||
|
|
|
@ -444,7 +444,7 @@ void mp_obj_exception_add_traceback(mp_obj_t self_in, qstr file, mp_uint_t line,
|
|||
self->traceback_len = 0;
|
||||
} else if (self->traceback_len + 3 > self->traceback_alloc) {
|
||||
// be conservative with growing traceback data
|
||||
mp_uint_t *tb_data = m_renew_maybe(mp_uint_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + 3);
|
||||
mp_uint_t *tb_data = m_renew_maybe(mp_uint_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + 3, true);
|
||||
if (tb_data == NULL) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -135,7 +135,7 @@ STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule,
|
|||
return;
|
||||
}
|
||||
if (parser->rule_stack_top >= parser->rule_stack_alloc) {
|
||||
rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
|
||||
rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
|
||||
if (rs == NULL) {
|
||||
memory_error(parser);
|
||||
return;
|
||||
|
@ -293,7 +293,7 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
|
|||
return;
|
||||
}
|
||||
if (parser->result_stack_top >= parser->result_stack_alloc) {
|
||||
mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
|
||||
mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
|
||||
if (stack == NULL) {
|
||||
memory_error(parser);
|
||||
return;
|
||||
|
|
41
py/qstr.c
41
py/qstr.c
|
@ -92,6 +92,7 @@ STATIC const qstr_pool_t const_pool = {
|
|||
|
||||
void qstr_init(void) {
|
||||
MP_STATE_VM(last_pool) = (qstr_pool_t*)&const_pool; // we won't modify the const_pool since it has no allocated room left
|
||||
MP_STATE_VM(qstr_last_chunk) = NULL;
|
||||
}
|
||||
|
||||
STATIC const byte *find_qstr(qstr q) {
|
||||
|
@ -152,8 +153,46 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
|
|||
assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN)));
|
||||
qstr q = qstr_find_strn(str, len);
|
||||
if (q == 0) {
|
||||
// qstr does not exist in interned pool so need to add it
|
||||
|
||||
// compute number of bytes needed to intern this string
|
||||
mp_uint_t n_bytes = 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
|
||||
|
||||
if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
|
||||
// not enough room at end of previously interned string so try to grow
|
||||
byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
|
||||
if (new_p == NULL) {
|
||||
// could not grow existing memory; shrink it to fit previous
|
||||
(void)m_renew(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used));
|
||||
MP_STATE_VM(qstr_last_chunk) = NULL;
|
||||
} else {
|
||||
// could grow existing memory
|
||||
MP_STATE_VM(qstr_last_alloc) += n_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
|
||||
// no existing memory for the interned string so allocate a new chunk
|
||||
mp_uint_t al = n_bytes;
|
||||
if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
|
||||
al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
|
||||
}
|
||||
MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, al);
|
||||
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
|
||||
// failed to allocate a large chunk so try with exact size
|
||||
MP_STATE_VM(qstr_last_chunk) = m_new(byte, n_bytes);
|
||||
al = n_bytes;
|
||||
}
|
||||
MP_STATE_VM(qstr_last_alloc) = al;
|
||||
MP_STATE_VM(qstr_last_used) = 0;
|
||||
}
|
||||
|
||||
// allocate memory from the chunk for this new interned string's data
|
||||
byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
|
||||
MP_STATE_VM(qstr_last_used) += n_bytes;
|
||||
|
||||
// store the interned strings' data
|
||||
mp_uint_t hash = qstr_compute_hash((const byte*)str, len);
|
||||
byte *q_ptr = m_new(byte, 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
|
||||
q_ptr[0] = hash;
|
||||
q_ptr[1] = hash >> 8;
|
||||
Q_SET_LENGTH(q_ptr, len);
|
||||
|
|
Loading…
Reference in New Issue