modstruct: Improve compliance with python3

While checking whether we can enable -Wimplicit-fallthrough, I encountered
a diagnostic in mp_binary_set_val_array_from_int which led to discovering
the following bug:
```
>>> struct.pack("xb", 3)
b'\x03\x03'
```
That is, the next value (3) was used as the value of a padding byte, while
standard Python always fills "x" bytes with zeros.  I initially thought
this had to do with the unintentional fallthrough, but it doesn't.
Instead, this code would relate to an array.array with a typecode of
padding ('x'), which is ALSO not desktop Python compliant:
```
>>> array.array('x', (1, 2, 3))
array('x', [1, 0, 0])
```
Possibly this is dead code that used to be shared between struct-setting
and array-setting, but it no longer is.

I also discovered that the argument list length for struct.pack
and struct.pack_into were not checked, and that the length of binary data
passed to array.array was not checked to be a multiple of the element
size.

I have corrected all of these to conform more closely to standard Python
and revised some tests where necessary.  Some tests for micropython-specific
behavior that does not conform to standard Python and is not present
in CircuitPython was deleted outright.
This commit is contained in:
Jeff Epler 2020-09-12 13:57:31 -05:00
parent b24d3b886a
commit 54d97251fe
8 changed files with 46 additions and 63 deletions

View File

@ -126,7 +126,6 @@ mp_obj_t mp_binary_get_val_array(char typecode, void *p, mp_uint_t index) {
break; break;
case BYTEARRAY_TYPECODE: case BYTEARRAY_TYPECODE:
case 'B': case 'B':
case 'x': // value will be discarded
val = ((unsigned char*)p)[index]; val = ((unsigned char*)p)[index];
break; break;
case 'h': case 'h':
@ -330,7 +329,11 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte **
} }
} }
mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val); if (val_type == 'x') {
memset(p, 0, 1);
} else {
mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val);
}
} }
void mp_binary_set_val_array(char typecode, void *p, mp_uint_t index, mp_obj_t val_in) { void mp_binary_set_val_array(char typecode, void *p, mp_uint_t index, mp_obj_t val_in) {
@ -379,8 +382,6 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, mp_uint_t index, m
case 'B': case 'B':
((unsigned char*)p)[index] = val; ((unsigned char*)p)[index] = val;
break; break;
case 'x':
((unsigned char*)p)[index] = 0;
case 'h': case 'h':
((short*)p)[index] = val; ((short*)p)[index] = val;
break; break;

View File

@ -183,16 +183,21 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_from_obj, 2, 3, struct_unpack_
// This function assumes there is enough room in p to store all the values // This function assumes there is enough room in p to store all the values
STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, const mp_obj_t *args) { STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, const mp_obj_t *args) {
size_t size;
size_t count = calc_size_items(mp_obj_str_get_str(fmt_in), &size);
if (count != n_args) {
#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE
mp_raise_ValueError(NULL);
#else
mp_raise_ValueError_varg(translate("pack expected %d items for packing (got %d)"), count, n_args);
#endif
}
const char *fmt = mp_obj_str_get_str(fmt_in); const char *fmt = mp_obj_str_get_str(fmt_in);
char fmt_type = get_fmt_type(&fmt); char fmt_type = get_fmt_type(&fmt);
size_t i; size_t i;
for (i = 0; i < n_args;) { for (i = 0; i < n_args;) {
mp_uint_t cnt = 1; mp_uint_t cnt = 1;
if (*fmt == '\0') {
// more arguments given than used by format string; CPython raises struct.error here
break;
}
if (unichar_isdigit(*fmt)) { if (unichar_isdigit(*fmt)) {
cnt = get_fmt_num(&fmt); cnt = get_fmt_num(&fmt);
} }
@ -208,8 +213,7 @@ STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, c
memset(p + to_copy, 0, cnt - to_copy); memset(p + to_copy, 0, cnt - to_copy);
p += cnt; p += cnt;
} else { } else {
// If we run out of args then we just finish; CPython would raise struct.error while (cnt--) {
while (cnt-- && i < n_args) {
mp_binary_set_val(fmt_type, *fmt, args[i], &p); mp_binary_set_val(fmt_type, *fmt, args[i], &p);
// Pad bytes don't have a corresponding argument. // Pad bytes don't have a corresponding argument.
if (*fmt != 'x') { if (*fmt != 'x') {
@ -222,7 +226,6 @@ STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, c
} }
STATIC mp_obj_t struct_pack(size_t n_args, const mp_obj_t *args) { STATIC mp_obj_t struct_pack(size_t n_args, const mp_obj_t *args) {
// TODO: "The arguments must match the values required by the format exactly."
mp_int_t size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0])); mp_int_t size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0]));
vstr_t vstr; vstr_t vstr;
vstr_init_len(&vstr, size); vstr_init_len(&vstr, size);

View File

@ -97,6 +97,9 @@ STATIC void array_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
#if MICROPY_PY_BUILTINS_BYTEARRAY || MICROPY_PY_ARRAY #if MICROPY_PY_BUILTINS_BYTEARRAY || MICROPY_PY_ARRAY
STATIC mp_obj_array_t *array_new(char typecode, size_t n) { STATIC mp_obj_array_t *array_new(char typecode, size_t n) {
if (typecode == 'x') {
mp_raise_ValueError(translate("bad typecode"));
}
int typecode_size = mp_binary_get_size('@', typecode, NULL); int typecode_size = mp_binary_get_size('@', typecode, NULL);
mp_obj_array_t *o = m_new_obj(mp_obj_array_t); mp_obj_array_t *o = m_new_obj(mp_obj_array_t);
#if MICROPY_PY_BUILTINS_BYTEARRAY && MICROPY_PY_ARRAY #if MICROPY_PY_BUILTINS_BYTEARRAY && MICROPY_PY_ARRAY
@ -126,8 +129,10 @@ STATIC mp_obj_t array_construct(char typecode, mp_obj_t initializer) {
|| (MICROPY_PY_BUILTINS_BYTEARRAY && MP_OBJ_IS_TYPE(initializer, &mp_type_bytearray))))) || (MICROPY_PY_BUILTINS_BYTEARRAY && MP_OBJ_IS_TYPE(initializer, &mp_type_bytearray)))))
&& mp_get_buffer(initializer, &bufinfo, MP_BUFFER_READ)) { && mp_get_buffer(initializer, &bufinfo, MP_BUFFER_READ)) {
// construct array from raw bytes // construct array from raw bytes
// we round-down the len to make it a multiple of sz (CPython raises error)
size_t sz = mp_binary_get_size('@', typecode, NULL); size_t sz = mp_binary_get_size('@', typecode, NULL);
if (bufinfo.len % sz) {
mp_raise_ValueError(translate("bytes length not a multiple of item size"));
}
size_t len = bufinfo.len / sz; size_t len = bufinfo.len / sz;
mp_obj_array_t *o = array_new(typecode, len); mp_obj_array_t *o = array_new(typecode, len);
memcpy(o->items, bufinfo.buf, len * sz); memcpy(o->items, bufinfo.buf, len * sz);

View File

@ -39,6 +39,28 @@ print(v == (10, 100, 200, 300))
# network byte order # network byte order
print(struct.pack('!i', 123)) print(struct.pack('!i', 123))
# too short / too long arguments
buf = bytearray(b'>>>123<<<')
try:
struct.pack_into('bb', buf, 0, 3)
except:
print('struct.error')
try:
struct.pack_into('bb', buf, 0, 3, 1, 4)
except:
print('struct.error')
try:
struct.pack('bb', 3)
except:
print('struct.error')
try:
struct.pack('bb', 3, 1, 4)
except:
print('struct.error')
# check that we get an error if the buffer is too small # check that we get an error if the buffer is too small
try: try:
struct.unpack('I', b'\x00\x00\x00') struct.unpack('I', b'\x00\x00\x00')
@ -96,3 +118,6 @@ try:
print(struct.unpack_from('<b', buf, -11)) print(struct.unpack_from('<b', buf, -11))
except: except:
print('struct.error') print('struct.error')
# check padding bytes
print(struct.pack("xb", 3))

View File

@ -1,41 +0,0 @@
# test MicroPython-specific features of struct
try:
import ustruct as struct
except:
try:
import struct
except ImportError:
print("SKIP")
raise SystemExit
try:
struct.pack('O', None)
except ValueError:
print("SKIP")
raise SystemExit
class A():
pass
# pack and unpack objects
o = A()
s = struct.pack("<O", o)
o2 = struct.unpack("<O", s)
print(o is o2[0])
# pack can accept less arguments than required for the format spec
print(struct.pack('<2I', 1))
# pack and unpack pointer to a string
# This requires uctypes to get the address of the string and instead of
# putting this in a dedicated test that can be skipped we simply pass
# if the import fails.
try:
import uctypes
o = uctypes.addressof('abc')
s = struct.pack("<S", o)
o2 = struct.unpack("<S", s)
assert o2[0] == 'abc'
except ImportError:
pass

View File

@ -1,2 +0,0 @@
True
b'\x01\x00\x00\x00\x00\x00\x00\x00'

View File

@ -105,12 +105,6 @@ try:
except NotImplementedError: except NotImplementedError:
print('NotImplementedError') print('NotImplementedError')
# struct pack with too many args, not checked by uPy
print(struct.pack('bb', 1, 2, 3))
# struct pack with too few args, not checked by uPy
print(struct.pack('bb', 1))
# array slice assignment with unsupported RHS # array slice assignment with unsupported RHS
try: try:
bytearray(4)[0:1] = [1, 2] bytearray(4)[0:1] = [1, 2]

View File

@ -14,8 +14,6 @@ NotImplementedError
NotImplementedError NotImplementedError
NotImplementedError NotImplementedError
NotImplementedError NotImplementedError
b'\x01\x02'
b'\x01\x00'
NotImplementedError NotImplementedError
AttributeError AttributeError
TypeError TypeError