py: Improve allocation policy of qstr data.

Previous to this patch all interned strings lived in their own malloc'd
chunk.  On average this wastes N/2 bytes per interned string, where N is
the number-of-bytes for a quanta of the memory allocator (16 bytes on 32
bit archs).

With this patch interned strings are concatenated into the same malloc'd
chunk when possible.  Such chunks are enlarged inplace when possible,
and shrunk to fit when a new chunk is needed.

RAM savings with this patch are highly varied, but should always show an
improvement (unless only 3 or 4 strings are interned).  New version
typically uses about 70% of previous memory for the qstr data, and can
lead to savings of around 10% of total memory footprint of a running
script.

Costs about 120 bytes code size on Thumb2 archs (depends on how many
calls to gc_realloc are made).
This commit is contained in:
Damien George 2015-06-13 21:53:22 +01:00
parent c48740e20b
commit ade9a05236
9 changed files with 75 additions and 14 deletions

View File

@ -522,7 +522,7 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) {
#else // Alternative gc_realloc impl
void *gc_realloc(void *ptr_in, mp_uint_t n_bytes) {
void *gc_realloc(void *ptr_in, mp_uint_t n_bytes, bool allow_move) {
if (MP_STATE_MEM(gc_lock_depth) > 0) {
return NULL;
}
@ -624,6 +624,11 @@ void *gc_realloc(void *ptr_in, mp_uint_t n_bytes) {
return ptr_in;
}
if (!allow_move) {
// not allowed to move memory block so return failure
return NULL;
}
// can't resize inplace; try to find a new contiguous chain
void *ptr_out = gc_alloc(n_bytes,
#if MICROPY_ENABLE_FINALISER

View File

@ -48,7 +48,7 @@ void gc_collect_end(void);
void *gc_alloc(mp_uint_t n_bytes, bool has_finaliser);
void gc_free(void *ptr);
mp_uint_t gc_nbytes(const void *ptr);
void *gc_realloc(void *ptr, mp_uint_t n_bytes);
void *gc_realloc(void *ptr, mp_uint_t n_bytes, bool allow_move);
typedef struct _gc_info_t {
mp_uint_t total;

View File

@ -56,7 +56,10 @@
#define malloc(b) gc_alloc((b), false)
#define malloc_with_finaliser(b) gc_alloc((b), true)
#define free gc_free
#define realloc gc_realloc
#define realloc(ptr, n) gc_realloc(ptr, n, true)
#define realloc_ext(ptr, n, mv) gc_realloc(ptr, n, mv)
#else
#define realloc_ext(ptr, n, mv) realloc(ptr, n)
#endif // MICROPY_ENABLE_GC
void *m_malloc(size_t num_bytes) {
@ -134,11 +137,11 @@ void *m_realloc(void *ptr, size_t new_num_bytes) {
}
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes) {
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move) {
#else
void *m_realloc_maybe(void *ptr, size_t new_num_bytes) {
void *m_realloc_maybe(void *ptr, size_t new_num_bytes, bool allow_move) {
#endif
void *new_ptr = realloc(ptr, new_num_bytes);
void *new_ptr = realloc_ext(ptr, new_num_bytes, allow_move);
#if MICROPY_MEM_STATS
// At first thought, "Total bytes allocated" should only grow,
// after all, it's *total*. But consider for example 2K block

View File

@ -63,12 +63,12 @@ typedef unsigned int uint;
#endif
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
#define m_renew(type, ptr, old_num, new_num) ((type*)(m_realloc((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
#define m_renew_maybe(type, ptr, old_num, new_num) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
#define m_renew_maybe(type, ptr, old_num, new_num, allow_move) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num), (allow_move))))
#define m_del(type, ptr, num) m_free(ptr, sizeof(type) * (num))
#define m_del_var(obj_type, var_type, var_num, ptr) (m_free(ptr, sizeof(obj_type) + sizeof(var_type) * (var_num)))
#else
#define m_renew(type, ptr, old_num, new_num) ((type*)(m_realloc((ptr), sizeof(type) * (new_num))))
#define m_renew_maybe(type, ptr, old_num, new_num) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (new_num))))
#define m_renew_maybe(type, ptr, old_num, new_num, allow_move) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (new_num), (allow_move))))
#define m_del(type, ptr, num) ((void)(num), m_free(ptr))
#define m_del_var(obj_type, var_type, var_num, ptr) ((void)(var_num), m_free(ptr))
#endif
@ -80,11 +80,11 @@ void *m_malloc_with_finaliser(size_t num_bytes);
void *m_malloc0(size_t num_bytes);
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
void *m_realloc(void *ptr, size_t old_num_bytes, size_t new_num_bytes);
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes);
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move);
void m_free(void *ptr, size_t num_bytes);
#else
void *m_realloc(void *ptr, size_t new_num_bytes);
void *m_realloc_maybe(void *ptr, size_t new_num_bytes);
void *m_realloc_maybe(void *ptr, size_t new_num_bytes, bool allow_move);
void m_free(void *ptr);
#endif
void *m_malloc_fail(size_t num_bytes);

View File

@ -75,6 +75,14 @@
#define MICROPY_ALLOC_GC_STACK_SIZE (64)
#endif
// Number of bytes to allocate initially when creating new chunks to store
// interned string data. Smaller numbers lead to more chunks being needed
// and more wastage at the end of the chunk. Larger numbers lead to wasted
// space at the end when no more strings need interning.
#ifndef MICROPY_ALLOC_QSTR_CHUNK_INIT
#define MICROPY_ALLOC_QSTR_CHUNK_INIT (128)
#endif
// Initial amount for lexer indentation level
#ifndef MICROPY_ALLOC_LEXER_INDENT_INIT
#define MICROPY_ALLOC_LEXER_INDENT_INIT (10)

View File

@ -131,6 +131,12 @@ typedef struct _mp_state_vm_t {
// END ROOT POINTER SECTION
////////////////////////////////////////////////////////////
// pointer and sizes to store interned string data
// (qstr_last_chunk can be root pointer but is also stored in qstr pool)
byte *qstr_last_chunk;
mp_uint_t qstr_last_alloc;
mp_uint_t qstr_last_used;
// Stack top at the start of program
// Note: this entry is used to locate the end of the root pointer section.
char *stack_top;

View File

@ -444,7 +444,7 @@ void mp_obj_exception_add_traceback(mp_obj_t self_in, qstr file, mp_uint_t line,
self->traceback_len = 0;
} else if (self->traceback_len + 3 > self->traceback_alloc) {
// be conservative with growing traceback data
mp_uint_t *tb_data = m_renew_maybe(mp_uint_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + 3);
mp_uint_t *tb_data = m_renew_maybe(mp_uint_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + 3, true);
if (tb_data == NULL) {
return;
}

View File

@ -135,7 +135,7 @@ STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule,
return;
}
if (parser->rule_stack_top >= parser->rule_stack_alloc) {
rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
if (rs == NULL) {
memory_error(parser);
return;
@ -293,7 +293,7 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
return;
}
if (parser->result_stack_top >= parser->result_stack_alloc) {
mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
if (stack == NULL) {
memory_error(parser);
return;

View File

@ -92,6 +92,7 @@ STATIC const qstr_pool_t const_pool = {
void qstr_init(void) {
MP_STATE_VM(last_pool) = (qstr_pool_t*)&const_pool; // we won't modify the const_pool since it has no allocated room left
MP_STATE_VM(qstr_last_chunk) = NULL;
}
STATIC const byte *find_qstr(qstr q) {
@ -152,8 +153,46 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN)));
qstr q = qstr_find_strn(str, len);
if (q == 0) {
// qstr does not exist in interned pool so need to add it
// compute number of bytes needed to intern this string
mp_uint_t n_bytes = 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
// not enough room at end of previously interned string so try to grow
byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
if (new_p == NULL) {
// could not grow existing memory; shrink it to fit previous
(void)m_renew(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used));
MP_STATE_VM(qstr_last_chunk) = NULL;
} else {
// could grow existing memory
MP_STATE_VM(qstr_last_alloc) += n_bytes;
}
}
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
// no existing memory for the interned string so allocate a new chunk
mp_uint_t al = n_bytes;
if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
}
MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, al);
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
// failed to allocate a large chunk so try with exact size
MP_STATE_VM(qstr_last_chunk) = m_new(byte, n_bytes);
al = n_bytes;
}
MP_STATE_VM(qstr_last_alloc) = al;
MP_STATE_VM(qstr_last_used) = 0;
}
// allocate memory from the chunk for this new interned string's data
byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
MP_STATE_VM(qstr_last_used) += n_bytes;
// store the interned strings' data
mp_uint_t hash = qstr_compute_hash((const byte*)str, len);
byte *q_ptr = m_new(byte, 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
q_ptr[0] = hash;
q_ptr[1] = hash >> 8;
Q_SET_LENGTH(q_ptr, len);