parse: Refactor parse node encoding to support full range of small ints.
Based on suggestion by @dpgeorge at https://github.com/micropython/micropython/pull/313
This commit is contained in:
parent
bbf0e2fe12
commit
56e5ef203b
37
py/compile.c
37
py/compile.c
@ -86,8 +86,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
|
||||
switch (MP_PARSE_NODE_STRUCT_KIND(pns)) {
|
||||
case PN_shift_expr:
|
||||
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
|
||||
int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
|
||||
int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
|
||||
int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
|
||||
int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
|
||||
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_DBL_LESS)) {
|
||||
#if MICROPY_EMIT_CPYTHON
|
||||
// can overflow; enabled only to compare with CPython
|
||||
@ -105,8 +105,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
|
||||
case PN_arith_expr:
|
||||
// overflow checking here relies on SMALL_INT being strictly smaller than machine_int_t
|
||||
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
|
||||
machine_int_t arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
|
||||
machine_int_t arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
|
||||
machine_int_t arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
|
||||
machine_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
|
||||
machine_int_t res;
|
||||
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_PLUS)) {
|
||||
res = arg0 + arg1;
|
||||
@ -125,8 +125,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
|
||||
|
||||
case PN_term:
|
||||
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
|
||||
int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
|
||||
int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
|
||||
int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
|
||||
int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
|
||||
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_STAR)) {
|
||||
#if MICROPY_EMIT_CPYTHON
|
||||
// can overflow; enabled only to compare with CPython
|
||||
@ -149,7 +149,7 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
|
||||
|
||||
case PN_factor_2:
|
||||
if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) {
|
||||
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pns->nodes[1]);
|
||||
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[1]);
|
||||
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_PLUS)) {
|
||||
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg);
|
||||
} else if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_MINUS)) {
|
||||
@ -169,10 +169,10 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
|
||||
if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_NULL(pns->nodes[1]) && !MP_PARSE_NODE_IS_NULL(pns->nodes[2])) {
|
||||
mp_parse_node_struct_t* pns2 = (mp_parse_node_struct_t*)pns->nodes[2];
|
||||
if (MP_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) {
|
||||
int power = MP_PARSE_NODE_LEAF_ARG(pns2->nodes[0]);
|
||||
int power = MP_PARSE_NODE_LEAF_SMALL_INT(pns2->nodes[0]);
|
||||
if (power >= 0) {
|
||||
int ans = 1;
|
||||
int base = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
|
||||
int base = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
|
||||
for (; power > 0; power--) {
|
||||
ans *= base;
|
||||
}
|
||||
@ -320,10 +320,14 @@ STATIC void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) {
|
||||
|
||||
STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vstr_t *vstr) {
|
||||
assert(MP_PARSE_NODE_IS_LEAF(pn));
|
||||
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
|
||||
vstr_printf(vstr, INT_FMT, MP_PARSE_NODE_LEAF_SMALL_INT(pn));
|
||||
return;
|
||||
}
|
||||
|
||||
int arg = MP_PARSE_NODE_LEAF_ARG(pn);
|
||||
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
|
||||
case MP_PARSE_NODE_ID: assert(0);
|
||||
case MP_PARSE_NODE_SMALL_INT: vstr_printf(vstr, "%d", arg); break;
|
||||
case MP_PARSE_NODE_INTEGER: vstr_printf(vstr, "%s", qstr_str(arg)); break;
|
||||
case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break;
|
||||
case MP_PARSE_NODE_STRING: cpython_c_print_quoted_str(vstr, arg, false); break;
|
||||
@ -421,11 +425,11 @@ void compile_generic_tuple(compiler_t *comp, mp_parse_node_struct_t *pns) {
|
||||
|
||||
STATIC bool node_is_const_false(mp_parse_node_t pn) {
|
||||
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_FALSE);
|
||||
// untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1);
|
||||
// untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 0);
|
||||
}
|
||||
|
||||
STATIC bool node_is_const_true(mp_parse_node_t pn) {
|
||||
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1);
|
||||
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 1);
|
||||
}
|
||||
|
||||
#if MICROPY_EMIT_CPYTHON
|
||||
@ -1464,7 +1468,8 @@ void compile_for_stmt_optimised_range(compiler_t *comp, mp_parse_node_t pn_var,
|
||||
// compile: if var <cond> end: goto top
|
||||
compile_node(comp, pn_var);
|
||||
compile_node(comp, pn_end);
|
||||
if (MP_PARSE_NODE_LEAF_ARG(pn_step) >= 0) {
|
||||
assert(MP_PARSE_NODE_IS_SMALL_INT(pn_step));
|
||||
if (MP_PARSE_NODE_LEAF_SMALL_INT(pn_step) >= 0) {
|
||||
EMIT_ARG(binary_op, RT_BINARY_OP_LESS);
|
||||
} else {
|
||||
EMIT_ARG(binary_op, RT_BINARY_OP_MORE);
|
||||
@ -2514,11 +2519,13 @@ STATIC compile_function_t compile_function[] = {
|
||||
void compile_node(compiler_t *comp, mp_parse_node_t pn) {
|
||||
if (MP_PARSE_NODE_IS_NULL(pn)) {
|
||||
// pass
|
||||
} else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
|
||||
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
|
||||
EMIT_ARG(load_const_small_int, arg);
|
||||
} else if (MP_PARSE_NODE_IS_LEAF(pn)) {
|
||||
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
|
||||
machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
|
||||
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
|
||||
case MP_PARSE_NODE_ID: EMIT_ARG(load_id, arg); break;
|
||||
case MP_PARSE_NODE_SMALL_INT: EMIT_ARG(load_const_small_int, arg); break;
|
||||
case MP_PARSE_NODE_INTEGER: EMIT_ARG(load_const_int, arg); break;
|
||||
case MP_PARSE_NODE_DECIMAL: EMIT_ARG(load_const_dec, arg); break;
|
||||
case MP_PARSE_NODE_STRING: EMIT_ARG(load_const_str, arg, false); break;
|
||||
|
@ -110,7 +110,7 @@ STATIC int get_arg_i(qstr op, mp_parse_node_t *pn_args, int wanted_arg_num, int
|
||||
printf("SyntaxError: '%s' expects an integer in position %d\n", qstr_str(op), wanted_arg_num);
|
||||
return 0;
|
||||
}
|
||||
int i = MP_PARSE_NODE_LEAF_ARG(pn_args[wanted_arg_num]);
|
||||
int i = MP_PARSE_NODE_LEAF_SMALL_INT(pn_args[wanted_arg_num]);
|
||||
if ((i & (~fit_mask)) != 0) {
|
||||
printf("SyntaxError: '%s' integer 0x%x does not fit in mask 0x%x\n", qstr_str(op), i, fit_mask);
|
||||
return 0;
|
||||
|
11
py/parse.c
11
py/parse.c
@ -125,7 +125,10 @@ STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *s
|
||||
}
|
||||
|
||||
mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
|
||||
return (mp_parse_node_t)(kind | (arg << 4));
|
||||
if (kind == MP_PARSE_NODE_SMALL_INT) {
|
||||
return (mp_parse_node_t)(kind | (arg << 1));
|
||||
}
|
||||
return (mp_parse_node_t)(kind | (arg << 5));
|
||||
}
|
||||
|
||||
//int num_parse_nodes_allocated = 0;
|
||||
@ -171,11 +174,13 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) {
|
||||
}
|
||||
if (MP_PARSE_NODE_IS_NULL(pn)) {
|
||||
printf("NULL\n");
|
||||
} else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
|
||||
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
|
||||
printf("int(" INT_FMT ")\n", arg);
|
||||
} else if (MP_PARSE_NODE_IS_LEAF(pn)) {
|
||||
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
|
||||
machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
|
||||
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
|
||||
case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
|
||||
case MP_PARSE_NODE_SMALL_INT: printf("int(" INT_FMT ")\n", arg); break;
|
||||
case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
|
||||
case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
|
||||
case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
|
||||
|
56
py/parse.h
56
py/parse.h
@ -2,29 +2,30 @@ struct _mp_lexer_t;
|
||||
|
||||
// a mp_parse_node_t is:
|
||||
// - 0000...0000: no node
|
||||
// - xxxx...0001: an identifier; bits 4 and above are the qstr
|
||||
// - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement
|
||||
// - xxxx...0101: an integer; bits 4 and above are the qstr holding the value
|
||||
// - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value
|
||||
// - xxxx...1001: a string; bits 4 and above are the qstr holding the value
|
||||
// - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value
|
||||
// - xxxx...1101: a token; bits 4 and above are mp_token_kind_t
|
||||
// - xxxx...xxx0: pointer to mp_parse_node_struct_t
|
||||
// - xxxx...xxx1: a small integer; bits 1 and above are the signed value, 2's complement
|
||||
// - xxxx...xx00: pointer to mp_parse_node_struct_t
|
||||
// - xx...x00010: an identifier; bits 5 and above are the qstr
|
||||
// - xx...x00110: an integer; bits 5 and above are the qstr holding the value
|
||||
// - xx...x01010: a decimal; bits 5 and above are the qstr holding the value
|
||||
// - xx...x01110: a string; bits 5 and above are the qstr holding the value
|
||||
// - xx...x10010: a string with triple quotes; bits 5 and above are the qstr holding the value
|
||||
// - xx...x10110: a token; bits 5 and above are mp_token_kind_t
|
||||
|
||||
// makes sure the top 5 bits of x are all cleared (positive number) or all set (negavite number)
|
||||
// TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x)
|
||||
// makes sure the top 2 bits of x are all cleared (positive number) or all set (negavite number)
|
||||
// these macros can probably go somewhere else because they are used more than just in the parser
|
||||
#define MP_UINT_HIGH_5_BITS (~((~((machine_uint_t)0)) >> 5))
|
||||
#define MP_UINT_HIGH_2_BITS (~((~((machine_uint_t)0)) >> 2))
|
||||
// parser's small ints are different from VM small int
|
||||
#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == MP_UINT_HIGH_5_BITS))
|
||||
#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == MP_UINT_HIGH_2_BITS))
|
||||
|
||||
#define MP_PARSE_NODE_NULL (0)
|
||||
#define MP_PARSE_NODE_ID (0x1)
|
||||
#define MP_PARSE_NODE_SMALL_INT (0x3)
|
||||
#define MP_PARSE_NODE_INTEGER (0x5)
|
||||
#define MP_PARSE_NODE_DECIMAL (0x7)
|
||||
#define MP_PARSE_NODE_STRING (0x9)
|
||||
#define MP_PARSE_NODE_BYTES (0xb)
|
||||
#define MP_PARSE_NODE_TOKEN (0xd)
|
||||
#define MP_PARSE_NODE_SMALL_INT (0x1)
|
||||
#define MP_PARSE_NODE_ID (0x02)
|
||||
#define MP_PARSE_NODE_INTEGER (0x06)
|
||||
#define MP_PARSE_NODE_DECIMAL (0x0a)
|
||||
#define MP_PARSE_NODE_STRING (0x0e)
|
||||
#define MP_PARSE_NODE_BYTES (0x12)
|
||||
#define MP_PARSE_NODE_TOKEN (0x16)
|
||||
|
||||
typedef machine_uint_t mp_parse_node_t; // must be pointer size
|
||||
|
||||
@ -38,18 +39,19 @@ typedef struct _mp_parse_node_struct_t {
|
||||
// some of these evaluate their argument more than once
|
||||
|
||||
#define MP_PARSE_NODE_IS_NULL(pn) ((pn) == MP_PARSE_NODE_NULL)
|
||||
#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 1)
|
||||
#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0)
|
||||
#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k))
|
||||
#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 3)
|
||||
#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0)
|
||||
#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k))
|
||||
|
||||
#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == MP_PARSE_NODE_ID)
|
||||
#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == MP_PARSE_NODE_SMALL_INT)
|
||||
#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == MP_PARSE_NODE_TOKEN)
|
||||
#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | (k << 4)))
|
||||
#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0x1) == MP_PARSE_NODE_SMALL_INT)
|
||||
#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0x1f) == MP_PARSE_NODE_ID)
|
||||
#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0x1f) == MP_PARSE_NODE_TOKEN)
|
||||
#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | ((k) << 5)))
|
||||
|
||||
#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf)
|
||||
#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0x1f)
|
||||
// TODO should probably have int and uint versions of this macro
|
||||
#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4)
|
||||
#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_uint_t)(pn)) >> 5)
|
||||
#define MP_PARSE_NODE_LEAF_SMALL_INT(pn) (((machine_int_t)(pn)) >> 1)
|
||||
#define MP_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff)
|
||||
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
|
||||
|
||||
|
@ -1,5 +1,29 @@
|
||||
# This tests small int range for 32-bit machine
|
||||
|
||||
# Small ints are variable-length encoded in MicroPython, so first
|
||||
# test that encoding works as expected.
|
||||
|
||||
print(0)
|
||||
print(1)
|
||||
print(-1)
|
||||
# Value is split in 7-bit "subwords", and taking into account that all
|
||||
# ints in Python are signed, there're 6 bits of magnitude. So, around 2^6
|
||||
# there's "turning point"
|
||||
print(63)
|
||||
print(64)
|
||||
print(65)
|
||||
print(-63)
|
||||
print(-64)
|
||||
print(-65)
|
||||
# Maximum values of small ints on 32-bit platform
|
||||
print(1073741823)
|
||||
# Per python semantics, lexical integer is without a sign (i.e. positive)
|
||||
# and '-' is unary minus operation applied to it. That's why -1073741824
|
||||
# (min two-complement's negative value) is not allowed.
|
||||
print(-1073741823)
|
||||
|
||||
# Operations tests
|
||||
|
||||
a = 0x3fffff
|
||||
print(a)
|
||||
a *= 0x10
|
||||
|
Loading…
Reference in New Issue
Block a user