Improve struct compatibility with CPython

This commit is contained in:
Dan Halbert 2019-01-20 15:10:09 -05:00
parent d2a7cd6c5a
commit 7a09af73ec
5 changed files with 134 additions and 83 deletions

View File

@ -49,7 +49,7 @@ size_t mp_binary_get_size(char struct_type, char val_type, mp_uint_t *palign) {
switch (struct_type) {
case '<': case '>':
switch (val_type) {
case 'b': case 'B':
case 'b': case 'B': case 'x':
size = 1; break;
case 'h': case 'H':
size = 2; break;
@ -79,7 +79,7 @@ size_t mp_binary_get_size(char struct_type, char val_type, mp_uint_t *palign) {
// particular (or any) ABI.
switch (val_type) {
case BYTEARRAY_TYPECODE:
case 'b': case 'B':
case 'b': case 'B': case 'x':
align = size = 1; break;
case 'h': case 'H':
align = alignof(short);
@ -126,6 +126,7 @@ mp_obj_t mp_binary_get_val_array(char typecode, void *p, mp_uint_t index) {
break;
case BYTEARRAY_TYPECODE:
case 'B':
case 'x': // value will be discarded
val = ((unsigned char*)p)[index];
break;
case 'h':
@ -364,6 +365,8 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, mp_uint_t index, m
case 'B':
((unsigned char*)p)[index] = val;
break;
case 'x':
((unsigned char*)p)[index] = 0;
case 'h':
((short*)p)[index] = val;
break;

View File

@ -97,7 +97,10 @@ STATIC size_t calc_size_items(const char *fmt, size_t *total_sz) {
total_cnt += 1;
size += cnt;
} else {
total_cnt += cnt;
// Pad bytes are skipped and don't get included in the item count.
if (*fmt != 'x') {
total_cnt += cnt;
}
mp_uint_t align;
size_t sz = mp_binary_get_size(fmt_type, *fmt, &align);
while (cnt--) {
@ -166,7 +169,10 @@ STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) {
} else {
while (cnt--) {
item = mp_binary_get_val(fmt_type, *fmt, &p);
res->items[i++] = item;
// Pad bytes ('x') are just skipped.
if (*fmt != 'x') {
res->items[i++] = item;
}
}
}
fmt++;
@ -204,7 +210,11 @@ STATIC void struct_pack_into_internal(mp_obj_t fmt_in, byte *p, size_t n_args, c
} else {
// If we run out of args then we just finish; CPython would raise struct.error
while (cnt-- && i < n_args) {
mp_binary_set_val(fmt_type, *fmt, args[i++], &p);
mp_binary_set_val(fmt_type, *fmt, args[i], &p);
// Pad bytes don't have a corresponding argument.
if (*fmt != 'x') {
i++;
}
}
}
fmt++;

View File

@ -51,7 +51,7 @@
//|
//| Supported size/byte order prefixes: *@*, *<*, *>*, *!*.
//|
//| Supported format codes: *b*, *B*, *h*, *H*, *i*, *I*, *l*, *L*, *q*, *Q*,
//| Supported format codes: *b*, *B*, *x*, *h*, *H*, *i*, *I*, *l*, *L*, *q*, *Q*,
//| *s*, *P*, *f*, *d* (the latter 2 depending on the floating-point support).
//|
@ -74,7 +74,6 @@ MP_DEFINE_CONST_FUN_OBJ_1(struct_calcsize_obj, struct_calcsize);
//|
STATIC mp_obj_t struct_pack(size_t n_args, const mp_obj_t *args) {
// TODO: "The arguments must match the values required by the format exactly."
mp_int_t size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0]));
vstr_t vstr;
vstr_init_len(&vstr, size);
@ -115,49 +114,67 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_pack_into_obj, 3, MP_OBJ_FUN_ARGS_MAX
//| .. function:: unpack(fmt, data)
//|
//| Unpack from the data according to the format string fmt. The return value
//| is a tuple of the unpacked values.
//| is a tuple of the unpacked values. The buffer size must match the size
//| required by the format.
//|
//| .. function:: unpack_from(fmt, data, offset)
//|
//| Unpack from the data starting at offset according to the format string fmt.
//| offset may be negative to count from the end of buffer. The return value is
//| a tuple of the unpacked values.
//|
STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) {
// unpack requires that the buffer be exactly the right size.
// unpack_from requires that the buffer be "big enough".
// Since we implement unpack and unpack_from using the same function
// we relax the "exact" requirement, and only implement "big enough".
STATIC mp_obj_t struct_unpack(size_t n_args, const mp_obj_t *args) {
mp_buffer_info_t bufinfo;
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
byte *p = bufinfo.buf;
byte *end_p = &p[bufinfo.len];
if (n_args > 2) {
mp_int_t offset = mp_obj_get_int(args[2]);
// offset arg provided
if (offset < 0) {
// negative offsets are relative to the end of the buffer
offset = bufinfo.len + offset;
if (offset < 0) {
mp_raise_RuntimeError(translate("buffer too small"));
}
}
p += offset;
}
return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[0] , p, end_p));
// true means check the size must be exactly right.
return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[0] , p, end_p, true));
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_from_obj, 2, 3, struct_unpack_from);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_unpack_obj, 2, 3, struct_unpack);
//| .. function:: unpack_from(fmt, data, offset=0)
//|
//| Unpack from the data starting at offset according to the format string fmt.
//| offset may be negative to count from the end of buffer. The return value is
//| a tuple of the unpacked values. The buffer size must be at least as big
//| as the size required by the form.
//|
STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
enum { ARG_format, ARG_buffer, ARG_offset };
static const mp_arg_t allowed_args[] = {
{ MP_QSTR_format, MP_ARG_REQUIRED | MP_ARG_OBJ },
{ MP_QSTR_buffer, MP_ARG_REQUIRED | MP_ARG_OBJ },
{ MP_QSTR_offset, MP_ARG_INT, {.u_int = 0} },
};
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
mp_buffer_info_t bufinfo;
mp_get_buffer_raise(args[ARG_buffer].u_obj, &bufinfo, MP_BUFFER_READ);
byte *p = bufinfo.buf;
byte *end_p = &p[bufinfo.len];
mp_int_t offset = args[ARG_offset].u_int;
if (offset < 0) {
// negative offsets are relative to the end of the buffer
offset = bufinfo.len + offset;
if (offset < 0) {
mp_raise_RuntimeError(translate("buffer too small"));
}
}
p += offset;
// false means the size doesn't have to be exact. struct.unpack_from() only requires
// that be buffer be big enough.
return MP_OBJ_FROM_PTR(shared_modules_struct_unpack_from(args[ARG_format].u_obj, p, end_p, false));
}
MP_DEFINE_CONST_FUN_OBJ_KW(struct_unpack_from_obj, 0, struct_unpack_from);
STATIC const mp_rom_map_elem_t mp_module_struct_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_struct) },
{ MP_ROM_QSTR(MP_QSTR_calcsize), MP_ROM_PTR(&struct_calcsize_obj) },
{ MP_ROM_QSTR(MP_QSTR_pack), MP_ROM_PTR(&struct_pack_obj) },
{ MP_ROM_QSTR(MP_QSTR_pack_into), MP_ROM_PTR(&struct_pack_into_obj) },
{ MP_ROM_QSTR(MP_QSTR_unpack), MP_ROM_PTR(&struct_unpack_from_obj) },
{ MP_ROM_QSTR(MP_QSTR_unpack), MP_ROM_PTR(&struct_unpack_obj) },
{ MP_ROM_QSTR(MP_QSTR_unpack_from), MP_ROM_PTR(&struct_unpack_from_obj) },
};

View File

@ -29,6 +29,6 @@
void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args);
mp_uint_t shared_modules_struct_calcsize(mp_obj_t fmt_in);
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p);
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p, bool exact_size);
#endif // MICROPY_INCLUDED_SHARED_BINDINGS_RANDOM___INIT___H

View File

@ -71,45 +71,6 @@ mp_uint_t get_fmt_num(const char **p) {
return val;
}
void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args) {
const char *fmt = mp_obj_str_get_str(fmt_in);
char fmt_type = get_fmt_type(&fmt);
size_t i;
for (i = 0; i < n_args;) {
mp_uint_t sz = 1;
if (*fmt == '\0') {
// more arguments given than used by format string; CPython raises struct.error here
mp_raise_RuntimeError(translate("too many arguments provided with the given format"));
}
struct_validate_format(*fmt);
if (unichar_isdigit(*fmt)) {
sz = get_fmt_num(&fmt);
}
if (p + sz > end_p) {
mp_raise_RuntimeError(translate("buffer too small"));
}
if (*fmt == 's') {
mp_buffer_info_t bufinfo;
mp_get_buffer_raise(args[i++], &bufinfo, MP_BUFFER_READ);
mp_uint_t to_copy = sz;
if (bufinfo.len < to_copy) {
to_copy = bufinfo.len;
}
memcpy(p, bufinfo.buf, to_copy);
memset(p + to_copy, 0, sz - to_copy);
p += sz;
} else {
while (sz--) {
mp_binary_set_val(fmt_type, *fmt, args[i++], &p);
}
}
fmt++;
}
}
mp_uint_t calcsize_items(const char *fmt) {
mp_uint_t cnt = 0;
while (*fmt) {
@ -120,7 +81,10 @@ mp_uint_t calcsize_items(const char *fmt) {
num = 1;
}
}
cnt += num;
// Pad bytes are skipped and don't get included in the item count.
if (*fmt != 'x') {
cnt += num;
}
fmt++;
}
return cnt;
@ -155,14 +119,71 @@ mp_uint_t shared_modules_struct_calcsize(mp_obj_t fmt_in) {
return size;
}
void shared_modules_struct_pack_into(mp_obj_t fmt_in, byte *p, byte* end_p, size_t n_args, const mp_obj_t *args) {
const char *fmt = mp_obj_str_get_str(fmt_in);
char fmt_type = get_fmt_type(&fmt);
const mp_uint_t total_sz = shared_modules_struct_calcsize(fmt_in);
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p) {
if (p + total_sz != end_p) {
mp_raise_msg_varg(&mp_type_RuntimeError, translate("unpack requires a buffer of %d bytes"), total_sz);
}
size_t i;
for (i = 0; i < n_args;) {
mp_uint_t sz = 1;
if (*fmt == '\0') {
// more arguments given than used by format string; CPython raises struct.error here
mp_raise_RuntimeError(translate("too many arguments provided with the given format"));
}
struct_validate_format(*fmt);
if (unichar_isdigit(*fmt)) {
sz = get_fmt_num(&fmt);
}
if (*fmt == 's') {
mp_buffer_info_t bufinfo;
mp_get_buffer_raise(args[i++], &bufinfo, MP_BUFFER_READ);
mp_uint_t to_copy = sz;
if (bufinfo.len < to_copy) {
to_copy = bufinfo.len;
}
memcpy(p, bufinfo.buf, to_copy);
memset(p + to_copy, 0, sz - to_copy);
p += sz;
} else {
while (sz--) {
mp_binary_set_val(fmt_type, *fmt, args[i], &p);
// Pad bytes don't have a corresponding argument.
if (*fmt != 'x') {
i++;
}
}
}
fmt++;
}
}
mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byte *end_p, bool exact_size) {
const char *fmt = mp_obj_str_get_str(fmt_in);
char fmt_type = get_fmt_type(&fmt);
mp_uint_t num_items = calcsize_items(fmt);
const mp_uint_t num_items = calcsize_items(fmt);
const mp_uint_t total_sz = shared_modules_struct_calcsize(fmt_in);
mp_obj_tuple_t *res = MP_OBJ_TO_PTR(mp_obj_new_tuple(num_items, NULL));
// If exact_size, make sure the buffer is exactly the right size.
// Otherwise just make sure it's big enough.
if (exact_size) {
if (p + total_sz != end_p) {
mp_raise_RuntimeError(translate("buffer size must match format"));
}
} else {
if (p + total_sz > end_p) {
mp_raise_RuntimeError(translate("buffer too small"));
}
}
for (uint i = 0; i < num_items;) {
mp_uint_t sz = 1;
@ -171,9 +192,6 @@ mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byt
if (unichar_isdigit(*fmt)) {
sz = get_fmt_num(&fmt);
}
if (p + sz > end_p) {
mp_raise_RuntimeError(translate("buffer too small"));
}
mp_obj_t item;
if (*fmt == 's') {
item = mp_obj_new_bytes(p, sz);
@ -182,7 +200,10 @@ mp_obj_tuple_t * shared_modules_struct_unpack_from(mp_obj_t fmt_in, byte *p, byt
} else {
while (sz--) {
item = mp_binary_get_val(fmt_type, *fmt, &p);
res->items[i++] = item;
// Pad bytes are not stored.
if (*fmt != 'x') {
res->items[i++] = item;
}
}
}
fmt++;