py/runtime: Allow multiple *args in a function call.

This is a partial implementation of PEP 448 to allow unpacking multiple
star args in a function or method call.

This is implemented by changing the emitted bytecodes so that both
positional args and star args are stored as positional args.  A bitmap is
added to indicate if an argument at a given position is a positional
argument or a star arg.

In the generated code, this new bitmap takes the place of the old star arg.
It is stored as a small int, so this means only the first N arguments can
be star args where N is the number of bits in a small int.

The runtime is modified to interpret this new bytecode format while still
trying to perform as few memory reallocations as possible.

Signed-off-by: David Lechner <david@pybricks.com>
This commit is contained in:
David Lechner 2020-03-24 23:54:45 -05:00 committed by Damien George
parent 1e99d29f36
commit 783b1a868f
10 changed files with 151 additions and 69 deletions

View File

@ -8,7 +8,7 @@ Below is a list of finalised/accepted PEPs for Python 3.5 grouped into their imp
+----------------------------------------------------------------------------------------------------------+---------------+
| **Extensions to the syntax:** | **Status** |
+--------------------------------------------------------+-------------------------------------------------+---------------+
| `PEP 448 <https://www.python.org/dev/peps/pep-0448/>`_ | additional unpacking generalizations | |
| `PEP 448 <https://www.python.org/dev/peps/pep-0448/>`_ | additional unpacking generalizations | Partial |
+--------------------------------------------------------+-------------------------------------------------+---------------+
| `PEP 465 <https://www.python.org/dev/peps/pep-0465/>`_ | a new matrix multiplication operator | Completed |
+--------------------------------------------------------+-------------------------------------------------+---------------+

View File

@ -37,6 +37,7 @@
#include "py/asmbase.h"
#include "py/nativeglue.h"
#include "py/persistentcode.h"
#include "py/smallint.h"
#if MICROPY_ENABLE_COMPILER
@ -2397,17 +2398,30 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
int n_positional = n_positional_extra;
uint n_keyword = 0;
uint star_flags = 0;
mp_parse_node_struct_t *star_args_node = NULL;
mp_uint_t star_args = 0;
for (size_t i = 0; i < n_args; i++) {
if (MP_PARSE_NODE_IS_STRUCT(args[i])) {
mp_parse_node_struct_t *pns_arg = (mp_parse_node_struct_t *)args[i];
if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_star) {
if (star_flags & MP_EMIT_STAR_FLAG_SINGLE) {
compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("can't have multiple *x"));
if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) {
compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("* arg after **"));
return;
}
#if MICROPY_DYNAMIC_COMPILER
if (i > mp_dynamic_compiler.small_int_bits)
#else
if (i > MP_SMALL_INT_BITS)
#endif
{
// If there are not enough bits in a small int to fit the flag, then we consider
// it a syntax error. It should be unlikely to have this many args in practice.
compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("too many args"));
return;
}
star_flags |= MP_EMIT_STAR_FLAG_SINGLE;
star_args_node = pns_arg;
star_args |= 1 << i;
compile_node(comp, pns_arg->nodes[0]);
n_positional++;
} else if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_dbl_star) {
star_flags |= MP_EMIT_STAR_FLAG_DOUBLE;
// double-star args are stored as kw arg with key of None
@ -2438,12 +2452,12 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
}
} else {
normal_argument:
if (star_flags) {
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after */**"));
if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) {
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after **"));
return;
}
if (n_keyword > 0) {
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after keyword arg"));
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after keyword arg"));
return;
}
compile_node(comp, args[i]);
@ -2451,14 +2465,9 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
}
}
// compile the star/double-star arguments if we had them
// if we had one but not the other then we load "null" as a place holder
if (star_flags != 0) {
if (star_args_node == NULL) {
EMIT(load_null);
} else {
compile_node(comp, star_args_node->nodes[0]);
}
// one extra object that contains the star_args map
EMIT_ARG(load_const_small_int, star_args);
}
// emit the function/method call

View File

@ -701,9 +701,9 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
}
uint n_args = n_args_n_kw & 0xff;
uint n_kw = (n_args_n_kw >> 8) & 0xff;
mp_obj_t pos_seq = args[n_args + 2 * n_kw]; // may be MP_OBJ_NULL
mp_uint_t star_args = mp_obj_get_int_truncated(args[n_args + 2 * n_kw]);
DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, seq=%p)\n", fun, self, n_args, n_kw, args, pos_seq);
DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, map=%u)\n", fun, self, n_args, n_kw, args, star_args);
// We need to create the following array of objects:
// args[0 .. n_args] unpacked(pos_seq) args[n_args .. n_args + 2 * n_kw] unpacked(kw_dict)
@ -714,6 +714,20 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
uint args2_alloc;
uint args2_len = 0;
// Try to get a hint for unpacked * args length
uint list_len = 0;
if (star_args != 0) {
for (uint i = 0; i < n_args; i++) {
if (star_args & (1 << i)) {
mp_obj_t len = mp_obj_len_maybe(args[i]);
if (len != MP_OBJ_NULL) {
list_len += mp_obj_get_int(len);
}
}
}
}
// Try to get a hint for the size of the kw_dict
uint kw_dict_len = 0;
@ -727,8 +741,8 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
// Extract the pos_seq sequence to the new args array.
// Note that it can be arbitrary iterator.
if (pos_seq == MP_OBJ_NULL) {
// no sequence
if (star_args == 0) {
// no star args to unpack
// allocate memory for the new array of args
args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len);
@ -742,60 +756,69 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
// copy the fixed pos args
mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t);
args2_len += n_args;
} else {
// at least one star arg to unpack
} else if (mp_obj_is_type(pos_seq, &mp_type_tuple) || mp_obj_is_type(pos_seq, &mp_type_list)) {
// allocate memory for the new array of args
args2_alloc = 1 + n_args + list_len + 2 * (n_kw + kw_dict_len);
args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t));
// copy the self
if (self != MP_OBJ_NULL) {
args2[args2_len++] = self;
}
for (uint i = 0; i < n_args; i++) {
mp_obj_t arg = args[i];
if (star_args & (1 << i)) {
// star arg
if (mp_obj_is_type(arg, &mp_type_tuple) || mp_obj_is_type(arg, &mp_type_list)) {
// optimise the case of a tuple and list
// get the items
size_t len;
mp_obj_t *items;
mp_obj_get_array(pos_seq, &len, &items);
// allocate memory for the new array of args
args2_alloc = 1 + n_args + len + 2 * (n_kw + kw_dict_len);
args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t));
// copy the self
if (self != MP_OBJ_NULL) {
args2[args2_len++] = self;
}
// copy the fixed and variable position args
mp_seq_cat(args2 + args2_len, args, n_args, items, len, mp_obj_t);
args2_len += n_args + len;
mp_obj_get_array(arg, &len, &items);
// copy the items
assert(args2_len + len <= args2_alloc);
mp_seq_copy(args2 + args2_len, items, len, mp_obj_t);
args2_len += len;
} else {
// generic iterator
// allocate memory for the new array of args
args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len) + 3;
args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t));
// copy the self
if (self != MP_OBJ_NULL) {
args2[args2_len++] = self;
}
// copy the fixed position args
mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t);
args2_len += n_args;
// extract the variable position args from the iterator
mp_obj_iter_buf_t iter_buf;
mp_obj_t iterable = mp_getiter(pos_seq, &iter_buf);
mp_obj_t iterable = mp_getiter(arg, &iter_buf);
mp_obj_t item;
while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
if (args2_len >= args2_alloc) {
args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t), args2_alloc * 2 * sizeof(mp_obj_t));
args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t),
args2_alloc * 2 * sizeof(mp_obj_t));
args2_alloc *= 2;
}
args2[args2_len++] = item;
}
}
} else {
// normal argument
assert(args2_len < args2_alloc);
args2[args2_len++] = arg;
}
}
}
// The size of the args2 array now is the number of positional args.
uint pos_args_len = args2_len;
// ensure there is still enough room for kw args
if (args2_len + 2 * (n_kw + kw_dict_len) > args2_alloc) {
uint new_alloc = args2_len + 2 * (n_kw + kw_dict_len);
args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t),
new_alloc * sizeof(mp_obj_t));
args2_alloc = new_alloc;
}
// Copy the kw args.
for (uint i = 0; i < n_kw; i++) {
mp_obj_t kw_key = args[n_args + i * 2];

View File

@ -949,7 +949,7 @@ unwind_jump:;
// unum & 0xff == n_positional
// (unum >> 8) & 0xff == n_keyword
// We have following stack layout here:
// fun arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS
// fun arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS
sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 1;
#if MICROPY_STACKLESS
if (mp_obj_get_type(*sp) == &mp_type_fun_bc) {
@ -1034,7 +1034,7 @@ unwind_jump:;
// unum & 0xff == n_positional
// (unum >> 8) & 0xff == n_keyword
// We have following stack layout here:
// fun self arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS
// fun self arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS
sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 2;
#if MICROPY_STACKLESS
if (mp_obj_get_type(*sp) == &mp_type_fun_bc) {

View File

@ -3,10 +3,16 @@
def foo(a, b, c):
print(a, b, c)
foo(*(), 1, 2, 3)
foo(*(1,), 2, 3)
foo(*(1, 2), 3)
foo(*(1, 2, 3))
foo(1, *(2, 3))
foo(1, 2, *(3,))
foo(1, 2, 3, *())
foo(*(1,), 2, *(3,))
foo(*(1, 2), *(3,))
foo(*(1,), *(2, 3))
# Another sequence type
foo(1, 2, *[100])
@ -29,10 +35,16 @@ class A:
print(a, b, c)
a = A()
a.foo(*(), 1, 2, 3)
a.foo(*(1,), 2, 3)
a.foo(*(1, 2), 3)
a.foo(*(1, 2, 3))
a.foo(1, *(2, 3))
a.foo(1, 2, *(3,))
a.foo(1, 2, 3, *())
a.foo(*(1,), 2, *(3,))
a.foo(*(1, 2), *(3,))
a.foo(*(1,), *(2, 3))
# Another sequence type
a.foo(1, 2, *[100])

View File

@ -6,6 +6,11 @@ def f(a, b, c, d):
f(*(1, 2), **{'c':3, 'd':4})
f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)})
try:
eval("f(**{'a': 1}, *(2, 3, 4))")
except SyntaxError:
print("SyntaxError")
# test calling a method with *tuple and **dict
class A:
@ -15,3 +20,8 @@ class A:
a = A()
a.f(*(1, 2), **{'c':3, 'd':4})
a.f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)})
try:
eval("a.f(**{'a': 1}, *(2, 3, 4))")
except SyntaxError:
print("SyntaxError")

View File

@ -23,3 +23,16 @@ def f4(*vargs, **kwargs):
f4(*(1, 2))
f4(kw_arg=3)
f4(*(1, 2), kw_arg=3)
# test evaluation order of arguments
def f5(*vargs, **kwargs):
print(vargs, kwargs)
def print_ret(x):
print(x)
return x
f5(*print_ret(["a", "b"]), kw_arg=print_ret(None))

View File

@ -6,26 +6,23 @@ except NameError:
print("SKIP")
raise SystemExit
# from basics/fun_kwvarargs.py
# test evaluation order of arguments (in 3.4 it's backwards, 3.5 it's fixed)
def f4(*vargs, **kwargs):
print(vargs, kwargs)
def print_ret(x):
print(x)
return x
f4(*print_ret(['a', 'b']), kw_arg=print_ret(None))
# test evaluation order of dictionary key/value pair (in 3.4 it's backwards)
{print_ret(1):print_ret(2)}
# from basics/syntaxerror.py
def test_syntax(code):
try:
exec(code)
except SyntaxError:
print("SyntaxError")
test_syntax("f(*a, *b)") # can't have multiple * (in 3.5 we can)
test_syntax("f(*a, b)") # can't have positional after *
test_syntax("f(**a, b)") # can't have positional after **
test_syntax("() = []") # can't assign to empty tuple (in 3.6 we can)
test_syntax("del ()") # can't delete empty tuple (in 3.6 we can)

View File

@ -1,13 +1,8 @@
None
['a', 'b']
('a', 'b') {'kw_arg': None}
2
1
SyntaxError
SyntaxError
SyntaxError
SyntaxError
SyntaxError
3.4
3 4
IndexError('foo',)

View File

@ -0,0 +1,23 @@
"""
categories: Syntax
description: Argument unpacking does not work if the argument being unpacked is the nth or greater argument where n is the number of bits in an MP_SMALL_INT.
cause: The implementation uses an MP_SMALL_INT to flag args that need to be unpacked.
workaround: Use fewer arguments.
"""
def example(*args):
print(len(args))
MORE = ["a", "b", "c"]
# fmt: off
example(
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
*MORE,
)
# fmt: on