Merge pull request #314 from pfalcon/parse-node-refactor

parse: Refactor parse node encoding to support full range of small ints.
This commit is contained in:
Damien George 2014-02-22 17:07:01 +00:00
commit b25ef4db3b
5 changed files with 84 additions and 46 deletions

View File

@ -86,8 +86,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
switch (MP_PARSE_NODE_STRUCT_KIND(pns)) {
case PN_shift_expr:
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_DBL_LESS)) {
#if MICROPY_EMIT_CPYTHON
// can overflow; enabled only to compare with CPython
@ -105,8 +105,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
case PN_arith_expr:
// overflow checking here relies on SMALL_INT being strictly smaller than machine_int_t
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
machine_int_t arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
machine_int_t arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
machine_int_t arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
machine_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
machine_int_t res;
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_PLUS)) {
res = arg0 + arg1;
@ -125,8 +125,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
case PN_term:
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_STAR)) {
#if MICROPY_EMIT_CPYTHON
// can overflow; enabled only to compare with CPython
@ -149,7 +149,7 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
case PN_factor_2:
if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) {
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pns->nodes[1]);
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[1]);
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_PLUS)) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg);
} else if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_MINUS)) {
@ -169,10 +169,10 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_NULL(pns->nodes[1]) && !MP_PARSE_NODE_IS_NULL(pns->nodes[2])) {
mp_parse_node_struct_t* pns2 = (mp_parse_node_struct_t*)pns->nodes[2];
if (MP_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) {
int power = MP_PARSE_NODE_LEAF_ARG(pns2->nodes[0]);
int power = MP_PARSE_NODE_LEAF_SMALL_INT(pns2->nodes[0]);
if (power >= 0) {
int ans = 1;
int base = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
int base = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
for (; power > 0; power--) {
ans *= base;
}
@ -320,10 +320,14 @@ STATIC void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) {
STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vstr_t *vstr) {
assert(MP_PARSE_NODE_IS_LEAF(pn));
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
vstr_printf(vstr, INT_FMT, MP_PARSE_NODE_LEAF_SMALL_INT(pn));
return;
}
int arg = MP_PARSE_NODE_LEAF_ARG(pn);
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
case MP_PARSE_NODE_ID: assert(0);
case MP_PARSE_NODE_SMALL_INT: vstr_printf(vstr, "%d", arg); break;
case MP_PARSE_NODE_INTEGER: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_STRING: cpython_c_print_quoted_str(vstr, arg, false); break;
@ -421,11 +425,11 @@ void compile_generic_tuple(compiler_t *comp, mp_parse_node_struct_t *pns) {
STATIC bool node_is_const_false(mp_parse_node_t pn) {
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_FALSE);
// untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1);
// untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 0);
}
STATIC bool node_is_const_true(mp_parse_node_t pn) {
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1);
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 1);
}
#if MICROPY_EMIT_CPYTHON
@ -1464,7 +1468,8 @@ void compile_for_stmt_optimised_range(compiler_t *comp, mp_parse_node_t pn_var,
// compile: if var <cond> end: goto top
compile_node(comp, pn_var);
compile_node(comp, pn_end);
if (MP_PARSE_NODE_LEAF_ARG(pn_step) >= 0) {
assert(MP_PARSE_NODE_IS_SMALL_INT(pn_step));
if (MP_PARSE_NODE_LEAF_SMALL_INT(pn_step) >= 0) {
EMIT_ARG(binary_op, RT_BINARY_OP_LESS);
} else {
EMIT_ARG(binary_op, RT_BINARY_OP_MORE);
@ -2514,11 +2519,13 @@ STATIC compile_function_t compile_function[] = {
void compile_node(compiler_t *comp, mp_parse_node_t pn) {
if (MP_PARSE_NODE_IS_NULL(pn)) {
// pass
} else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
EMIT_ARG(load_const_small_int, arg);
} else if (MP_PARSE_NODE_IS_LEAF(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
case MP_PARSE_NODE_ID: EMIT_ARG(load_id, arg); break;
case MP_PARSE_NODE_SMALL_INT: EMIT_ARG(load_const_small_int, arg); break;
case MP_PARSE_NODE_INTEGER: EMIT_ARG(load_const_int, arg); break;
case MP_PARSE_NODE_DECIMAL: EMIT_ARG(load_const_dec, arg); break;
case MP_PARSE_NODE_STRING: EMIT_ARG(load_const_str, arg, false); break;

View File

@ -110,7 +110,7 @@ STATIC int get_arg_i(qstr op, mp_parse_node_t *pn_args, int wanted_arg_num, int
printf("SyntaxError: '%s' expects an integer in position %d\n", qstr_str(op), wanted_arg_num);
return 0;
}
int i = MP_PARSE_NODE_LEAF_ARG(pn_args[wanted_arg_num]);
int i = MP_PARSE_NODE_LEAF_SMALL_INT(pn_args[wanted_arg_num]);
if ((i & (~fit_mask)) != 0) {
printf("SyntaxError: '%s' integer 0x%x does not fit in mask 0x%x\n", qstr_str(op), i, fit_mask);
return 0;

View File

@ -125,7 +125,10 @@ STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *s
}
mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
return (mp_parse_node_t)(kind | (arg << 4));
if (kind == MP_PARSE_NODE_SMALL_INT) {
return (mp_parse_node_t)(kind | (arg << 1));
}
return (mp_parse_node_t)(kind | (arg << 5));
}
//int num_parse_nodes_allocated = 0;
@ -171,11 +174,13 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) {
}
if (MP_PARSE_NODE_IS_NULL(pn)) {
printf("NULL\n");
} else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
printf("int(" INT_FMT ")\n", arg);
} else if (MP_PARSE_NODE_IS_LEAF(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
case MP_PARSE_NODE_SMALL_INT: printf("int(" INT_FMT ")\n", arg); break;
case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;

View File

@ -2,29 +2,30 @@ struct _mp_lexer_t;
// a mp_parse_node_t is:
// - 0000...0000: no node
// - xxxx...0001: an identifier; bits 4 and above are the qstr
// - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement
// - xxxx...0101: an integer; bits 4 and above are the qstr holding the value
// - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value
// - xxxx...1001: a string; bits 4 and above are the qstr holding the value
// - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value
// - xxxx...1101: a token; bits 4 and above are mp_token_kind_t
// - xxxx...xxx0: pointer to mp_parse_node_struct_t
// - xxxx...xxx1: a small integer; bits 1 and above are the signed value, 2's complement
// - xxxx...xx00: pointer to mp_parse_node_struct_t
// - xx...x00010: an identifier; bits 5 and above are the qstr
// - xx...x00110: an integer; bits 5 and above are the qstr holding the value
// - xx...x01010: a decimal; bits 5 and above are the qstr holding the value
// - xx...x01110: a string; bits 5 and above are the qstr holding the value
// - xx...x10010: a string with triple quotes; bits 5 and above are the qstr holding the value
// - xx...x10110: a token; bits 5 and above are mp_token_kind_t
// makes sure the top 5 bits of x are all cleared (positive number) or all set (negavite number)
// TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x)
// makes sure the top 2 bits of x are all cleared (positive number) or all set (negavite number)
// these macros can probably go somewhere else because they are used more than just in the parser
#define MP_UINT_HIGH_5_BITS (~((~((machine_uint_t)0)) >> 5))
#define MP_UINT_HIGH_2_BITS (~((~((machine_uint_t)0)) >> 2))
// parser's small ints are different from VM small int
#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == MP_UINT_HIGH_5_BITS))
#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == MP_UINT_HIGH_2_BITS))
#define MP_PARSE_NODE_NULL (0)
#define MP_PARSE_NODE_ID (0x1)
#define MP_PARSE_NODE_SMALL_INT (0x3)
#define MP_PARSE_NODE_INTEGER (0x5)
#define MP_PARSE_NODE_DECIMAL (0x7)
#define MP_PARSE_NODE_STRING (0x9)
#define MP_PARSE_NODE_BYTES (0xb)
#define MP_PARSE_NODE_TOKEN (0xd)
#define MP_PARSE_NODE_SMALL_INT (0x1)
#define MP_PARSE_NODE_ID (0x02)
#define MP_PARSE_NODE_INTEGER (0x06)
#define MP_PARSE_NODE_DECIMAL (0x0a)
#define MP_PARSE_NODE_STRING (0x0e)
#define MP_PARSE_NODE_BYTES (0x12)
#define MP_PARSE_NODE_TOKEN (0x16)
typedef machine_uint_t mp_parse_node_t; // must be pointer size
@ -38,18 +39,19 @@ typedef struct _mp_parse_node_struct_t {
// some of these evaluate their argument more than once
#define MP_PARSE_NODE_IS_NULL(pn) ((pn) == MP_PARSE_NODE_NULL)
#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 1)
#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0)
#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k))
#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 3)
#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0)
#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k))
#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == MP_PARSE_NODE_ID)
#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == MP_PARSE_NODE_SMALL_INT)
#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == MP_PARSE_NODE_TOKEN)
#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | (k << 4)))
#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0x1) == MP_PARSE_NODE_SMALL_INT)
#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0x1f) == MP_PARSE_NODE_ID)
#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0x1f) == MP_PARSE_NODE_TOKEN)
#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | ((k) << 5)))
#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf)
#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0x1f)
// TODO should probably have int and uint versions of this macro
#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4)
#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_uint_t)(pn)) >> 5)
#define MP_PARSE_NODE_LEAF_SMALL_INT(pn) (((machine_int_t)(pn)) >> 1)
#define MP_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff)
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)

View File

@ -1,5 +1,29 @@
# This tests small int range for 32-bit machine
# Small ints are variable-length encoded in MicroPython, so first
# test that encoding works as expected.
print(0)
print(1)
print(-1)
# Value is split in 7-bit "subwords", and taking into account that all
# ints in Python are signed, there're 6 bits of magnitude. So, around 2^6
# there's "turning point"
print(63)
print(64)
print(65)
print(-63)
print(-64)
print(-65)
# Maximum values of small ints on 32-bit platform
print(1073741823)
# Per python semantics, lexical integer is without a sign (i.e. positive)
# and '-' is unary minus operation applied to it. That's why -1073741824
# (min two-complement's negative value) is not allowed.
print(-1073741823)
# Operations tests
a = 0x3fffff
print(a)
a *= 0x10