py: Create str/bytes objects in the parser, not the compiler.
Previous to this patch any non-interned str/bytes objects would create a special parse node that held a copy of the str/bytes data. Then in the compiler this data would be turned into a str/bytes object. This actually lead to 2 copies of the data, one in the parse node and one in the object. The parse node's copy of the data would be freed at the end of the compile stage but nevertheless it meant that the peak memory usage of the parse/compile stage was higher than it needed to be (by an amount equal to the number of bytes in all the non-interned str/bytes objects). This patch changes the behaviour so that str/bytes objects are created directly in the parser and the object stored in a const-object parse node (which already exists for bignum, float and complex const objects). This reduces peak RAM usage of the parse/compile stage, simplifies the parser and compiler, and reduces code size by about 170 bytes on Thumb2 archs, and by about 300 bytes on Xtensa archs.
This commit is contained in:
parent
f62503dc47
commit
5255255fb9
39
py/compile.c
39
py/compile.c
|
@ -47,8 +47,6 @@ typedef enum {
|
|||
#include "py/grammar.h"
|
||||
#undef DEF_RULE
|
||||
#undef DEF_RULE_NC
|
||||
PN_string, // special node for non-interned string
|
||||
PN_bytes, // special node for non-interned bytes
|
||||
PN_const_object, // special node for a constant, generic Python object
|
||||
// define rules without a compile function
|
||||
#define DEF_RULE(rule, comp, kind, ...)
|
||||
|
@ -1880,8 +1878,6 @@ STATIC void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
|
|||
} else {
|
||||
// for non-REPL, evaluate then discard the expression
|
||||
if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0]))
|
||||
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)
|
||||
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_bytes)
|
||||
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)) {
|
||||
// do nothing with a lonely constant
|
||||
} else {
|
||||
|
@ -2600,31 +2596,17 @@ STATIC void compile_atom_expr_await(compiler_t *comp, mp_parse_node_struct_t *pn
|
|||
}
|
||||
#endif
|
||||
|
||||
STATIC void compile_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
|
||||
// only create and load the actual str object on the last pass
|
||||
if (comp->pass != MP_PASS_EMIT) {
|
||||
EMIT_ARG(load_const_obj, mp_const_none);
|
||||
} else {
|
||||
EMIT_ARG(load_const_obj, mp_obj_new_str((const char*)pns->nodes[0], pns->nodes[1], false));
|
||||
}
|
||||
}
|
||||
|
||||
STATIC void compile_bytes(compiler_t *comp, mp_parse_node_struct_t *pns) {
|
||||
// only create and load the actual bytes object on the last pass
|
||||
if (comp->pass != MP_PASS_EMIT) {
|
||||
EMIT_ARG(load_const_obj, mp_const_none);
|
||||
} else {
|
||||
EMIT_ARG(load_const_obj, mp_obj_new_bytes((const byte*)pns->nodes[0], pns->nodes[1]));
|
||||
}
|
||||
STATIC mp_obj_t get_const_object(mp_parse_node_struct_t *pns) {
|
||||
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
|
||||
// nodes are 32-bit pointers, but need to extract 64-bit object
|
||||
return (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32);
|
||||
#else
|
||||
return (mp_obj_t)pns->nodes[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
|
||||
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
|
||||
// nodes are 32-bit pointers, but need to extract 64-bit object
|
||||
EMIT_ARG(load_const_obj, (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
|
||||
#else
|
||||
EMIT_ARG(load_const_obj, (mp_obj_t)pns->nodes[0]);
|
||||
#endif
|
||||
EMIT_ARG(load_const_obj, get_const_object(pns));
|
||||
}
|
||||
|
||||
typedef void (*compile_function_t)(compiler_t*, mp_parse_node_struct_t*);
|
||||
|
@ -2637,8 +2619,6 @@ STATIC const compile_function_t compile_function[] = {
|
|||
#undef c
|
||||
#undef DEF_RULE
|
||||
#undef DEF_RULE_NC
|
||||
compile_string,
|
||||
compile_bytes,
|
||||
compile_const_object,
|
||||
};
|
||||
|
||||
|
@ -2891,7 +2871,8 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) {
|
|||
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
|
||||
if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0])
|
||||
&& MP_PARSE_NODE_LEAF_KIND(pns->nodes[0]) == MP_PARSE_NODE_STRING)
|
||||
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)) {
|
||||
|| (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)
|
||||
&& MP_OBJ_IS_STR(get_const_object((mp_parse_node_struct_t*)pns->nodes[0])))) {
|
||||
// compile the doc string
|
||||
compile_node(comp, pns->nodes[0]);
|
||||
// store the doc string
|
||||
|
|
|
@ -42,8 +42,6 @@ typedef enum {
|
|||
#include "py/grammar.h"
|
||||
#undef DEF_RULE
|
||||
#undef DEF_RULE_NC
|
||||
PN_string, // special node for non-interned string
|
||||
PN_bytes, // special node for non-interned bytes
|
||||
PN_const_object, // special node for a constant, generic Python object
|
||||
// define rules without a compile function
|
||||
#define DEF_RULE(rule, comp, kind, ...)
|
||||
|
|
47
py/parse.c
47
py/parse.c
|
@ -38,6 +38,7 @@
|
|||
#include "py/runtime0.h"
|
||||
#include "py/runtime.h"
|
||||
#include "py/objint.h"
|
||||
#include "py/objstr.h"
|
||||
#include "py/builtin.h"
|
||||
|
||||
#if MICROPY_ENABLE_COMPILER
|
||||
|
@ -75,8 +76,6 @@ enum {
|
|||
#include "py/grammar.h"
|
||||
#undef DEF_RULE
|
||||
#undef DEF_RULE_NC
|
||||
RULE_string, // special node for non-interned string
|
||||
RULE_bytes, // special node for non-interned bytes
|
||||
RULE_const_object, // special node for a constant, generic Python object
|
||||
|
||||
// define rules without a compile function
|
||||
|
@ -123,8 +122,6 @@ STATIC const rule_t *const rules[] = {
|
|||
#include "py/grammar.h"
|
||||
#undef DEF_RULE
|
||||
#undef DEF_RULE_NC
|
||||
NULL, // RULE_string
|
||||
NULL, // RULE_bytes
|
||||
NULL, // RULE_const_object
|
||||
|
||||
// define rules without a compile function
|
||||
|
@ -326,11 +323,7 @@ void mp_parse_node_print(mp_parse_node_t pn, size_t indent) {
|
|||
} else {
|
||||
// node must be a mp_parse_node_struct_t
|
||||
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
|
||||
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
|
||||
printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
|
||||
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
|
||||
printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
|
||||
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
|
||||
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
|
||||
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
|
||||
printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
|
||||
#else
|
||||
|
@ -392,21 +385,6 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
|
|||
parser->result_stack[parser->result_stack_top++] = pn;
|
||||
}
|
||||
|
||||
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) {
|
||||
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
|
||||
if (pn == NULL) {
|
||||
parser->parse_error = PARSE_ERROR_MEMORY;
|
||||
return MP_PARSE_NODE_NULL;
|
||||
}
|
||||
pn->source_line = src_line;
|
||||
pn->kind_num_nodes = rule_kind | (2 << 8);
|
||||
char *p = m_new(char, len);
|
||||
memcpy(p, str, len);
|
||||
pn->nodes[0] = (uintptr_t)p;
|
||||
pn->nodes[1] = len;
|
||||
return (mp_parse_node_t)pn;
|
||||
}
|
||||
|
||||
STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) {
|
||||
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
|
||||
if (pn == NULL) {
|
||||
|
@ -473,8 +451,11 @@ STATIC void push_result_token(parser_t *parser, const rule_t *rule) {
|
|||
// qstr exists, make a leaf node
|
||||
pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
|
||||
} else {
|
||||
// not interned, make a node holding a pointer to the string/bytes data
|
||||
pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
|
||||
// not interned, make a node holding a pointer to the string/bytes object
|
||||
mp_obj_t o = mp_obj_new_str_of_type(
|
||||
lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes,
|
||||
(const byte*)lex->vstr.buf, lex->vstr.len);
|
||||
pn = make_node_const_object(parser, lex->tok_line, o);
|
||||
}
|
||||
} else {
|
||||
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
|
||||
|
@ -934,15 +915,13 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
|
|||
// this code discards lonely statements, such as doc strings
|
||||
if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
|
||||
mp_parse_node_t p = peek_result(&parser, 1);
|
||||
if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
|
||||
if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p))
|
||||
|| MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) {
|
||||
pop_result(&parser); // MP_PARSE_NODE_NULL
|
||||
mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
|
||||
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
|
||||
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
|
||||
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
|
||||
m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]);
|
||||
}
|
||||
}
|
||||
pop_result(&parser); // const expression (leaf or RULE_const_object)
|
||||
// Pushing the "pass" rule here will overwrite any RULE_const_object
|
||||
// entry that was on the result stack, allowing the GC to reclaim
|
||||
// the memory from the const object when needed.
|
||||
push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -15,13 +15,13 @@
|
|||
str(str)
|
||||
[ 8] rule(5) (n=2)
|
||||
id(c)
|
||||
[ 8] literal str(a very long str that will not be interned)
|
||||
[ 8] literal \.\+
|
||||
[ 9] rule(5) (n=2)
|
||||
id(d)
|
||||
bytes(bytes)
|
||||
[ 10] rule(5) (n=2)
|
||||
id(e)
|
||||
[ 10] literal bytes(a very long bytes that will not be interned)
|
||||
[ 10] literal \.\+
|
||||
[ 11] rule(5) (n=2)
|
||||
id(f)
|
||||
[ 11] literal \.\+
|
||||
|
|
Loading…
Reference in New Issue