From 7a09af73ec7f4a11a1404dff6276e09072d06983 Mon Sep 17 00:00:00 2001 From: Dan Halbert Date: Sun, 20 Jan 2019 15:10:09 -0500 Subject: [PATCH] Improve struct compatibility with CPython --- py/binary.c | 7 +- py/modstruct.c | 16 ++++- shared-bindings/struct/__init__.c | 79 +++++++++++++-------- shared-bindings/struct/__init__.h | 2 +- shared-module/struct/__init__.c | 113 ++++++++++++++++++------------ 5 files changed, 134 insertions(+), 83 deletions(-) diff --git a/py/binary.c b/py/binary.c index ca851c9369..9c3a49e8f9 100644 --- a/py/binary.c +++ b/py/binary.c @@ -49,7 +49,7 @@ size_t mp_binary_get_size(char struct_type, char val_type, mp_uint_t *palign) { switch (struct_type) { case '<': case '>': switch (val_type) { - case 'b': case 'B': + case 'b': case 'B': case 'x': size = 1; break; case 'h': case 'H': size = 2; break; @@ -79,7 +79,7 @@ size_t mp_binary_get_size(char struct_type, char val_type, mp_uint_t *palign) { // particular (or any) ABI. switch (val_type) { case BYTEARRAY_TYPECODE: - case 'b': case 'B': + case 'b': case 'B': case 'x': align = size = 1; break; case 'h': case 'H': align = alignof(short); @@ -126,6 +126,7 @@ mp_obj_t mp_binary_get_val_array(char typecode, void *p, mp_uint_t index) { break; case BYTEARRAY_TYPECODE: case 'B': + case 'x': // value will be discarded val = ((unsigned char*)p)[index]; break; case 'h': @@ -364,6 +365,8 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, mp_uint_t index, m case 'B': ((unsigned char*)p)[index] = val; break; + case 'x': + ((unsigned char*)p)[index] = 0; case 'h': ((short*)p)[index] = val; break; diff --git a/py/modstruct.c b/py/modstruct.c index 3f1b2f8b8c..a238d3935a 100644 --- a/py/modstruct.c +++ b/py/modstruct.c @@ -97,7 +97,10 @@ STATIC size_t calc_size_items(const char *fmt, size_t *total_sz) { total_cnt += 1; size += cnt; } else { - total_cnt += cnt; + // Pad bytes are skipped and don't get included in the item count. + if (*fmt != 'x') { + total_cnt += cnt; + } mp_uint_t align; size_t sz = mp_binary_get_size(fmt_type, *fmt, &align); while (cnt--) { @@ -166,7 +169,10 @@ STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) { } else { while (cnt--) { item = mp_binary_get_val(fmt_type, *fmt, &p); - res->items[i++] = item; + // Pad bytes ('x') are just skipped. + if (*fmt != 'x') { + res->items[i++] = item; + } } } fmt++; @@ -204,7 +210,11 @@ STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, c } else { // If we run out of args then we just finish; CPython would raise struct.error while (cnt-- && i < n_args) { - mp_binary_set_val(fmt_type, *fmt, args[i++], &p); + mp_binary_set_val(fmt_type, *fmt, args[i], &p); + // Pad bytes don't have a corresponding argument. + if (*fmt != 'x') { + i++; + } } } fmt++; diff --git a/shared-bindings/struct/__init__.c b/shared-bindings/struct/__init__.c index 0935977859..9240a15bb1 100644 --- a/shared-bindings/struct/__init__.c +++ b/shared-bindings/struct/__init__.c @@ -51,7 +51,7 @@ //| //| Supported size/byte order prefixes: *@*, *<*, *>*, *!*. //| -//| Supported format codes: *b*, *B*, *h*, *H*, *i*, *I*, *l*, *L*, *q*, *Q*, +//| Supported format codes: *b*, *B*, *x*, *h*, *H*, *i*, *I*, *l*, *L*, *q*, *Q*, //| *s*, *P*, *f*, *d* (the latter 2 depending on the floating-point support). //| @@ -74,7 +74,6 @@ MP_DEFINE_CONST_FUN_OBJ_1(struct_calcsize_obj, struct_calcsize); //| STATIC mp_obj_t struct_pack(size_t n_args, const mp_obj_t *args) { - // TODO: "The arguments must match the values required by the format exactly." mp_int_t size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0])); vstr_t vstr; vstr_init_len(&vstr, size); @@ -115,49 +114,67 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_pack_into_obj, 3, MP_OBJ_FUN_ARGS_MAX //| .. function:: unpack(fmt, data) //| //| Unpack from the data according to the format string fmt. The return value -//| is a tuple of the unpacked values. +//| is a tuple of the unpacked values. The buffer size must match the size +//| required by the format. //| -//| .. function:: unpack_from(fmt, data, offset) -//| -//| Unpack from the data starting at offset according to the format string fmt. -//| offset may be negative to count from the end of buffer. The return value is -//| a tuple of the unpacked values. -//| - -STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) { - // unpack requires that the buffer be exactly the right size. - // unpack_from requires that the buffer be "big enough". - // Since we implement unpack and unpack_from using the same function - // we relax the "exact" requirement, and only implement "big enough". +STATIC mp_obj_t struct_unpack(size_t n_args, const mp_obj_t *args) { mp_buffer_info_t bufinfo; mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ); byte *p = bufinfo.buf; byte *end_p = &p[bufinfo.len]; - if (n_args > 2) { - mp_int_t offset = mp_obj_get_int(args[2]); - // offset arg provided - if (offset < 0) { - // negative offsets are relative to the end of the buffer - offset = bufinfo.len + offset; - if (offset < 0) { - mp_raise_RuntimeError(translate("buffer too small")); - } - } - p += offset; - } - - return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[0] , p, end_p)); + // true means check the size must be exactly right. + return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[0] , p, end_p, true)); } -MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_from_obj, 2, 3, struct_unpack_from); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_obj, 2, 3, struct_unpack); + +//| .. function:: unpack_from(fmt, data, offset=0) +//| +//| Unpack from the data starting at offset according to the format string fmt. +//| offset may be negative to count from the end of buffer. The return value is +//| a tuple of the unpacked values. The buffer size must be at least as big +//| as the size required by the form. +//| + +STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + enum { ARG_format, ARG_buffer, ARG_offset }; + static const mp_arg_t allowed_args[] = { + { MP_QSTR_format, MP_ARG_REQUIRED | MP_ARG_OBJ }, + { MP_QSTR_buffer, MP_ARG_REQUIRED | MP_ARG_OBJ }, + { MP_QSTR_offset, MP_ARG_INT, {.u_int = 0} }, + }; + + mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; + mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + + mp_buffer_info_t bufinfo; + mp_get_buffer_raise(args[ARG_buffer].u_obj, &bufinfo, MP_BUFFER_READ); + byte *p = bufinfo.buf; + byte *end_p = &p[bufinfo.len]; + + mp_int_t offset = args[ARG_offset].u_int; + if (offset < 0) { + // negative offsets are relative to the end of the buffer + offset = bufinfo.len + offset; + if (offset < 0) { + mp_raise_RuntimeError(translate("buffer too small")); + } + } + p += offset; + + // false means the size doesn't have to be exact. struct.unpack_from() only requires + // that be buffer be big enough. + return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[ARG_format].u_obj, p, end_p, false)); +} +MP_DEFINE_CONST_FUN_OBJ_KW(struct_unpack_from_obj, 0, struct_unpack_from); STATIC const mp_rom_map_elem_t mp_module_struct_globals_table[] = { { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_struct) }, { MP_ROM_QSTR(MP_QSTR_calcsize), MP_ROM_PTR(&struct_calcsize_obj) }, { MP_ROM_QSTR(MP_QSTR_pack), MP_ROM_PTR(&struct_pack_obj) }, { MP_ROM_QSTR(MP_QSTR_pack_into), MP_ROM_PTR(&struct_pack_into_obj) }, - { MP_ROM_QSTR(MP_QSTR_unpack), MP_ROM_PTR(&struct_unpack_from_obj) }, + { MP_ROM_QSTR(MP_QSTR_unpack), MP_ROM_PTR(&struct_unpack_obj) }, { MP_ROM_QSTR(MP_QSTR_unpack_from), MP_ROM_PTR(&struct_unpack_from_obj) }, }; diff --git a/shared-bindings/struct/__init__.h b/shared-bindings/struct/__init__.h index c4e867aaaf..a7e72b11c7 100644 --- a/shared-bindings/struct/__init__.h +++ b/shared-bindings/struct/__init__.h @@ -29,6 +29,6 @@ void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args); mp_uint_t shared_modules_struct_calcsize(mp_obj_t fmt_in); -mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p); +mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p, bool exact_size); #endif // MICROPY_INCLUDED_SHARED_BINDINGS_RANDOM___INIT___H diff --git a/shared-module/struct/__init__.c b/shared-module/struct/__init__.c index 78c04f07ba..28e7c0c3f9 100644 --- a/shared-module/struct/__init__.c +++ b/shared-module/struct/__init__.c @@ -71,45 +71,6 @@ mp_uint_t get_fmt_num(const char **p) { return val; } -void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args) { - const char *fmt = mp_obj_str_get_str(fmt_in); - char fmt_type = get_fmt_type(&fmt); - - size_t i; - for (i = 0; i < n_args;) { - mp_uint_t sz = 1; - if (*fmt == '\0') { - // more arguments given than used by format string; CPython raises struct.error here - mp_raise_RuntimeError(translate("too many arguments provided with the given format")); - } - struct_validate_format(*fmt); - - if (unichar_isdigit(*fmt)) { - sz = get_fmt_num(&fmt); - } - if (p + sz > end_p) { - mp_raise_RuntimeError(translate("buffer too small")); - } - - if (*fmt == 's') { - mp_buffer_info_t bufinfo; - mp_get_buffer_raise(args[i++], &bufinfo, MP_BUFFER_READ); - mp_uint_t to_copy = sz; - if (bufinfo.len < to_copy) { - to_copy = bufinfo.len; - } - memcpy(p, bufinfo.buf, to_copy); - memset(p + to_copy, 0, sz - to_copy); - p += sz; - } else { - while (sz--) { - mp_binary_set_val(fmt_type, *fmt, args[i++], &p); - } - } - fmt++; - } -} - mp_uint_t calcsize_items(const char *fmt) { mp_uint_t cnt = 0; while (*fmt) { @@ -120,7 +81,10 @@ mp_uint_t calcsize_items(const char *fmt) { num = 1; } } - cnt += num; + // Pad bytes are skipped and don't get included in the item count. + if (*fmt != 'x') { + cnt += num; + } fmt++; } return cnt; @@ -155,14 +119,71 @@ mp_uint_t shared_modules_struct_calcsize(mp_obj_t fmt_in) { return size; } +void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args) { + const char *fmt = mp_obj_str_get_str(fmt_in); + char fmt_type = get_fmt_type(&fmt); + const mp_uint_t total_sz = shared_modules_struct_calcsize(fmt_in); -mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p) { + if (p + total_sz != end_p) { + mp_raise_msg_varg(&mp_type_RuntimeError, translate("unpack requires a buffer of %d bytes"), total_sz); + } + + size_t i; + for (i = 0; i < n_args;) { + mp_uint_t sz = 1; + if (*fmt == '\0') { + // more arguments given than used by format string; CPython raises struct.error here + mp_raise_RuntimeError(translate("too many arguments provided with the given format")); + } + struct_validate_format(*fmt); + + if (unichar_isdigit(*fmt)) { + sz = get_fmt_num(&fmt); + } + + if (*fmt == 's') { + mp_buffer_info_t bufinfo; + mp_get_buffer_raise(args[i++], &bufinfo, MP_BUFFER_READ); + mp_uint_t to_copy = sz; + if (bufinfo.len < to_copy) { + to_copy = bufinfo.len; + } + memcpy(p, bufinfo.buf, to_copy); + memset(p + to_copy, 0, sz - to_copy); + p += sz; + } else { + while (sz--) { + mp_binary_set_val(fmt_type, *fmt, args[i], &p); + // Pad bytes don't have a corresponding argument. + if (*fmt != 'x') { + i++; + } + } + } + fmt++; + } +} + +mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p, bool exact_size) { const char *fmt = mp_obj_str_get_str(fmt_in); char fmt_type = get_fmt_type(&fmt); - mp_uint_t num_items = calcsize_items(fmt); + const mp_uint_t num_items = calcsize_items(fmt); + const mp_uint_t total_sz = shared_modules_struct_calcsize(fmt_in); mp_obj_tuple_t *res = MP_OBJ_TO_PTR(mp_obj_new_tuple(num_items, NULL)); + // If exact_size, make sure the buffer is exactly the right size. + // Otherwise just make sure it's big enough. + if (exact_size) { + if (p + total_sz != end_p) { + mp_raise_RuntimeError(translate("buffer size must match format")); + } + } else { + if (p + total_sz > end_p) { + mp_raise_RuntimeError(translate("buffer too small")); + } + } + for (uint i = 0; i < num_items;) { mp_uint_t sz = 1; @@ -171,9 +192,6 @@ mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byt if (unichar_isdigit(*fmt)) { sz = get_fmt_num(&fmt); } - if (p + sz > end_p) { - mp_raise_RuntimeError(translate("buffer too small")); - } mp_obj_t item; if (*fmt == 's') { item = mp_obj_new_bytes(p, sz); @@ -182,7 +200,10 @@ mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byt } else { while (sz--) { item = mp_binary_get_val(fmt_type, *fmt, &p); - res->items[i++] = item; + // Pad bytes are not stored. + if (*fmt != 'x') { + res->items[i++] = item; + } } } fmt++;