From 54d97251fe2dd4939652a186bf703885e654b4d1 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Sat, 12 Sep 2020 13:57:31 -0500 Subject: [PATCH] modstruct: Improve compliance with python3 While checking whether we can enable -Wimplicit-fallthrough, I encountered a diagnostic in mp_binary_set_val_array_from_int which led to discovering the following bug: ``` >>> struct.pack("xb", 3) b'\x03\x03' ``` That is, the next value (3) was used as the value of a padding byte, while standard Python always fills "x" bytes with zeros. I initially thought this had to do with the unintentional fallthrough, but it doesn't. Instead, this code would relate to an array.array with a typecode of padding ('x'), which is ALSO not desktop Python compliant: ``` >>> array.array('x', (1, 2, 3)) array('x', [1, 0, 0]) ``` Possibly this is dead code that used to be shared between struct-setting and array-setting, but it no longer is. I also discovered that the argument list length for struct.pack and struct.pack_into were not checked, and that the length of binary data passed to array.array was not checked to be a multiple of the element size. I have corrected all of these to conform more closely to standard Python and revised some tests where necessary. Some tests for micropython-specific behavior that does not conform to standard Python and is not present in CircuitPython was deleted outright. --- py/binary.c | 9 +++--- py/modstruct.c | 17 ++++++----- py/objarray.c | 7 ++++- tests/basics/struct1.py | 25 ++++++++++++++++ tests/basics/struct_micropython.py | 41 -------------------------- tests/basics/struct_micropython.py.exp | 2 -- tests/misc/non_compliant.py | 6 ---- tests/misc/non_compliant.py.exp | 2 -- 8 files changed, 46 insertions(+), 63 deletions(-) delete mode 100644 tests/basics/struct_micropython.py delete mode 100644 tests/basics/struct_micropython.py.exp diff --git a/py/binary.c b/py/binary.c index cd0f1aa4df..b85edba625 100644 --- a/py/binary.c +++ b/py/binary.c @@ -126,7 +126,6 @@ mp_obj_t mp_binary_get_val_array(char typecode, void *p, mp_uint_t index) { break; case BYTEARRAY_TYPECODE: case 'B': - case 'x': // value will be discarded val = ((unsigned char*)p)[index]; break; case 'h': @@ -330,7 +329,11 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte ** } } - mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val); + if (val_type == 'x') { + memset(p, 0, 1); + } else { + mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val); + } } void mp_binary_set_val_array(char typecode, void *p, mp_uint_t index, mp_obj_t val_in) { @@ -379,8 +382,6 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, mp_uint_t index, m case 'B': ((unsigned char*)p)[index] = val; break; - case 'x': - ((unsigned char*)p)[index] = 0; case 'h': ((short*)p)[index] = val; break; diff --git a/py/modstruct.c b/py/modstruct.c index fe766a4deb..7675de275d 100644 --- a/py/modstruct.c +++ b/py/modstruct.c @@ -183,16 +183,21 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_from_obj, 2, 3, struct_unpack_ // This function assumes there is enough room in p to store all the values STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, const mp_obj_t *args) { + size_t size; + size_t count = calc_size_items(mp_obj_str_get_str(fmt_in), &size); + if (count != n_args) { +#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE + mp_raise_ValueError(NULL); +#else + mp_raise_ValueError_varg(translate("pack expected %d items for packing (got %d)"), count, n_args); +#endif + } const char *fmt = mp_obj_str_get_str(fmt_in); char fmt_type = get_fmt_type(&fmt); size_t i; for (i = 0; i < n_args;) { mp_uint_t cnt = 1; - if (*fmt == '\0') { - // more arguments given than used by format string; CPython raises struct.error here - break; - } if (unichar_isdigit(*fmt)) { cnt = get_fmt_num(&fmt); } @@ -208,8 +213,7 @@ STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, c memset(p + to_copy, 0, cnt - to_copy); p += cnt; } else { - // If we run out of args then we just finish; CPython would raise struct.error - while (cnt-- && i < n_args) { + while (cnt--) { mp_binary_set_val(fmt_type, *fmt, args[i], &p); // Pad bytes don't have a corresponding argument. if (*fmt != 'x') { @@ -222,7 +226,6 @@ STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, c } STATIC mp_obj_t struct_pack(size_t n_args, const mp_obj_t *args) { - // TODO: "The arguments must match the values required by the format exactly." mp_int_t size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0])); vstr_t vstr; vstr_init_len(&vstr, size); diff --git a/py/objarray.c b/py/objarray.c index 5d83f06977..e0b4cbd55f 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -97,6 +97,9 @@ STATIC void array_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t #if MICROPY_PY_BUILTINS_BYTEARRAY || MICROPY_PY_ARRAY STATIC mp_obj_array_t *array_new(char typecode, size_t n) { + if (typecode == 'x') { + mp_raise_ValueError(translate("bad typecode")); + } int typecode_size = mp_binary_get_size('@', typecode, NULL); mp_obj_array_t *o = m_new_obj(mp_obj_array_t); #if MICROPY_PY_BUILTINS_BYTEARRAY && MICROPY_PY_ARRAY @@ -126,8 +129,10 @@ STATIC mp_obj_t array_construct(char typecode, mp_obj_t initializer) { || (MICROPY_PY_BUILTINS_BYTEARRAY && MP_OBJ_IS_TYPE(initializer, &mp_type_bytearray))))) && mp_get_buffer(initializer, &bufinfo, MP_BUFFER_READ)) { // construct array from raw bytes - // we round-down the len to make it a multiple of sz (CPython raises error) size_t sz = mp_binary_get_size('@', typecode, NULL); + if (bufinfo.len % sz) { + mp_raise_ValueError(translate("bytes length not a multiple of item size")); + } size_t len = bufinfo.len / sz; mp_obj_array_t *o = array_new(typecode, len); memcpy(o->items, bufinfo.buf, len * sz); diff --git a/tests/basics/struct1.py b/tests/basics/struct1.py index db34342a17..107006cc3f 100644 --- a/tests/basics/struct1.py +++ b/tests/basics/struct1.py @@ -39,6 +39,28 @@ print(v == (10, 100, 200, 300)) # network byte order print(struct.pack('!i', 123)) +# too short / too long arguments +buf = bytearray(b'>>>123<<<') +try: + struct.pack_into('bb', buf, 0, 3) +except: + print('struct.error') + +try: + struct.pack_into('bb', buf, 0, 3, 1, 4) +except: + print('struct.error') + +try: + struct.pack('bb', 3) +except: + print('struct.error') + +try: + struct.pack('bb', 3, 1, 4) +except: + print('struct.error') + # check that we get an error if the buffer is too small try: struct.unpack('I', b'\x00\x00\x00') @@ -96,3 +118,6 @@ try: print(struct.unpack_from('