py/objstr: Consolidate methods for str/bytes/bytearray/array.

This commit adds the bytes methods to bytearray, matching CPython.  The
existing implementations of these methods for str/bytes are reused for
bytearray with minor updates to match CPython return types.

For details on the CPython behaviour see
https://docs.python.org/3/library/stdtypes.html#bytes-and-bytearray-operations

The work to merge locals tables for str/bytes/bytearray/array was done by
@jimmo.  Because of this merging of locals the change in code size for this
commit is mostly negative:

       bare-arm:    +0 +0.000%
    minimal x86:   +29 +0.018%
       unix x64:  -792 -0.128% standard[incl -448(data)]
    unix nanbox:  -436 -0.078% nanbox[incl -448(data)]
          stm32:   -40 -0.010% PYBV10
         cc3200:   -32 -0.017%
        esp8266:   -28 -0.004% GENERIC
          esp32:   -72 -0.005% GENERIC[incl -200(data)]
         mimxrt:   -40 -0.011% TEENSY40
     renesas-ra:   -40 -0.006% RA6M2_EK
            nrf:   -16 -0.009% pca10040
            rp2:   -64 -0.013% PICO
           samd:  +148 +0.105% ADAFRUIT_ITSYBITSY_M4_EXPRESS
This commit is contained in:
Andrew Leech 2022-08-10 14:13:17 +10:00 committed by Damien George
parent 82b3500724
commit f7f56d4285
11 changed files with 225 additions and 109 deletions

View File

@ -394,19 +394,21 @@ typedef struct _mp_rom_obj_t { mp_const_obj_t o; } mp_rom_obj_t;
.table = (mp_map_elem_t *)(mp_rom_map_elem_t *)table_name, \ .table = (mp_map_elem_t *)(mp_rom_map_elem_t *)table_name, \
} }
#define MP_DEFINE_CONST_DICT(dict_name, table_name) \ #define MP_DEFINE_CONST_DICT_WITH_SIZE(dict_name, table_name, n) \
const mp_obj_dict_t dict_name = { \ const mp_obj_dict_t dict_name = { \
.base = {&mp_type_dict}, \ .base = {&mp_type_dict}, \
.map = { \ .map = { \
.all_keys_are_qstrs = 1, \ .all_keys_are_qstrs = 1, \
.is_fixed = 1, \ .is_fixed = 1, \
.is_ordered = 1, \ .is_ordered = 1, \
.used = MP_ARRAY_SIZE(table_name), \ .used = n, \
.alloc = MP_ARRAY_SIZE(table_name), \ .alloc = n, \
.table = (mp_map_elem_t *)(mp_rom_map_elem_t *)table_name, \ .table = (mp_map_elem_t *)(mp_rom_map_elem_t *)table_name, \
}, \ }, \
} }
#define MP_DEFINE_CONST_DICT(dict_name, table_name) MP_DEFINE_CONST_DICT_WITH_SIZE(dict_name, table_name, MP_ARRAY_SIZE(table_name))
// These macros are used to declare and define constant staticmethond and classmethod objects // These macros are used to declare and define constant staticmethond and classmethod objects
// You can put "static" in front of the definitions to make them local // You can put "static" in front of the definitions to make them local
@ -789,7 +791,7 @@ mp_obj_t mp_obj_new_str(const char *data, size_t len);
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len); mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len);
mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr); mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr);
mp_obj_t mp_obj_new_bytes(const byte *data, size_t len); mp_obj_t mp_obj_new_bytes(const byte *data, size_t len);
mp_obj_t mp_obj_new_bytearray(size_t n, void *items); mp_obj_t mp_obj_new_bytearray(size_t n, const void *items);
mp_obj_t mp_obj_new_bytearray_by_ref(size_t n, void *items); mp_obj_t mp_obj_new_bytearray_by_ref(size_t n, void *items);
#if MICROPY_PY_BUILTINS_FLOAT #if MICROPY_PY_BUILTINS_FLOAT
mp_obj_t mp_obj_new_int_from_float(mp_float_t val); mp_obj_t mp_obj_new_int_from_float(mp_float_t val);

View File

@ -381,7 +381,7 @@ STATIC mp_obj_t array_append(mp_obj_t self_in, mp_obj_t arg) {
self->free--; self->free--;
return mp_const_none; // return None, as per CPython return mp_const_none; // return None, as per CPython
} }
STATIC MP_DEFINE_CONST_FUN_OBJ_2(array_append_obj, array_append); MP_DEFINE_CONST_FUN_OBJ_2(mp_obj_array_append_obj, array_append);
STATIC mp_obj_t array_extend(mp_obj_t self_in, mp_obj_t arg_in) { STATIC mp_obj_t array_extend(mp_obj_t self_in, mp_obj_t arg_in) {
// self is not a memoryview, so we don't need to use (& TYPECODE_MASK) // self is not a memoryview, so we don't need to use (& TYPECODE_MASK)
@ -413,7 +413,7 @@ STATIC mp_obj_t array_extend(mp_obj_t self_in, mp_obj_t arg_in) {
return mp_const_none; return mp_const_none;
} }
STATIC MP_DEFINE_CONST_FUN_OBJ_2(array_extend_obj, array_extend); MP_DEFINE_CONST_FUN_OBJ_2(mp_obj_array_extend_obj, array_extend);
#endif #endif
STATIC mp_obj_t array_subscr(mp_obj_t self_in, mp_obj_t index_in, mp_obj_t value) { STATIC mp_obj_t array_subscr(mp_obj_t self_in, mp_obj_t index_in, mp_obj_t value) {
@ -564,18 +564,6 @@ STATIC mp_int_t array_get_buffer(mp_obj_t o_in, mp_buffer_info_t *bufinfo, mp_ui
return 0; return 0;
} }
#if MICROPY_PY_BUILTINS_BYTEARRAY || MICROPY_PY_ARRAY
STATIC const mp_rom_map_elem_t array_locals_dict_table[] = {
{ MP_ROM_QSTR(MP_QSTR_append), MP_ROM_PTR(&array_append_obj) },
{ MP_ROM_QSTR(MP_QSTR_extend), MP_ROM_PTR(&array_extend_obj) },
#if MICROPY_CPYTHON_COMPAT
{ MP_ROM_QSTR(MP_QSTR_decode), MP_ROM_PTR(&bytes_decode_obj) },
#endif
};
STATIC MP_DEFINE_CONST_DICT(array_locals_dict, array_locals_dict_table);
#endif
#if MICROPY_PY_ARRAY #if MICROPY_PY_ARRAY
const mp_obj_type_t mp_type_array = { const mp_obj_type_t mp_type_array = {
{ &mp_type_type }, { &mp_type_type },
@ -587,7 +575,7 @@ const mp_obj_type_t mp_type_array = {
.binary_op = array_binary_op, .binary_op = array_binary_op,
.subscr = array_subscr, .subscr = array_subscr,
.buffer_p = { .get_buffer = array_get_buffer }, .buffer_p = { .get_buffer = array_get_buffer },
.locals_dict = (mp_obj_dict_t *)&array_locals_dict, .locals_dict = (mp_obj_dict_t *)&mp_obj_array_locals_dict,
}; };
#endif #endif
@ -603,7 +591,7 @@ const mp_obj_type_t mp_type_bytearray = {
.binary_op = array_binary_op, .binary_op = array_binary_op,
.subscr = array_subscr, .subscr = array_subscr,
.buffer_p = { .get_buffer = array_get_buffer }, .buffer_p = { .get_buffer = array_get_buffer },
.locals_dict = (mp_obj_dict_t *)&array_locals_dict, .locals_dict = (mp_obj_dict_t *)&mp_obj_bytearray_locals_dict,
}; };
#endif #endif
@ -631,7 +619,7 @@ size_t mp_obj_array_len(mp_obj_t self_in) {
*/ */
#if MICROPY_PY_BUILTINS_BYTEARRAY #if MICROPY_PY_BUILTINS_BYTEARRAY
mp_obj_t mp_obj_new_bytearray(size_t n, void *items) { mp_obj_t mp_obj_new_bytearray(size_t n, const void *items) {
mp_obj_array_t *o = array_new(BYTEARRAY_TYPECODE, n); mp_obj_array_t *o = array_new(BYTEARRAY_TYPECODE, n);
memcpy(o->items, items, n); memcpy(o->items, items, n);
return MP_OBJ_FROM_PTR(o); return MP_OBJ_FROM_PTR(o);

View File

@ -59,4 +59,9 @@ static inline void mp_obj_memoryview_init(mp_obj_array_t *self, size_t typecode,
} }
#endif #endif
#if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY
MP_DECLARE_CONST_FUN_OBJ_2(mp_obj_array_append_obj);
MP_DECLARE_CONST_FUN_OBJ_2(mp_obj_array_extend_obj);
#endif
#endif // MICROPY_INCLUDED_PY_OBJARRAY_H #endif // MICROPY_INCLUDED_PY_OBJARRAY_H

View File

@ -41,6 +41,26 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in); STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
STATIC void str_check_arg_type(const mp_obj_type_t *self_type, const mp_obj_t arg) {
// String operations generally need the args type to match the object they're called on,
// e.g. str.find(str), byte.startswith(byte)
// with the exception that bytes may be used for bytearray and vice versa.
const mp_obj_type_t *arg_type = mp_obj_get_type(arg);
#if MICROPY_PY_BUILTINS_BYTEARRAY
if (arg_type == &mp_type_bytearray) {
arg_type = &mp_type_bytes;
}
if (self_type == &mp_type_bytearray) {
self_type = &mp_type_bytes;
}
#endif
if (arg_type != self_type) {
bad_implicit_conversion(arg);
}
}
/******************************************************************************/ /******************************************************************************/
/* str */ /* str */
@ -452,6 +472,7 @@ STATIC mp_obj_t bytes_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
mp_check_self(mp_obj_is_str_or_bytes(self_in)); mp_check_self(mp_obj_is_str_or_bytes(self_in));
const mp_obj_type_t *self_type = mp_obj_get_type(self_in); const mp_obj_type_t *self_type = mp_obj_get_type(self_in);
const mp_obj_type_t *ret_type = self_type;
// get separation string // get separation string
GET_STR_DATA_LEN(self_in, sep_str, sep_len); GET_STR_DATA_LEN(self_in, sep_str, sep_len);
@ -469,8 +490,19 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
// count required length // count required length
size_t required_len = 0; size_t required_len = 0;
#if MICROPY_PY_BUILTINS_BYTEARRAY
if (self_type == &mp_type_bytearray) {
self_type = &mp_type_bytes;
}
#endif
for (size_t i = 0; i < seq_len; i++) { for (size_t i = 0; i < seq_len; i++) {
if (mp_obj_get_type(seq_items[i]) != self_type) { const mp_obj_type_t *seq_type = mp_obj_get_type(seq_items[i]);
#if MICROPY_PY_BUILTINS_BYTEARRAY
if (seq_type == &mp_type_bytearray) {
seq_type = &mp_type_bytes;
}
#endif
if (seq_type != self_type) {
mp_raise_TypeError( mp_raise_TypeError(
MP_ERROR_TEXT("join expects a list of str/bytes objects consistent with self object")); MP_ERROR_TEXT("join expects a list of str/bytes objects consistent with self object"));
} }
@ -496,7 +528,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
} }
// return joined string // return joined string
return mp_obj_new_str_from_vstr(self_type, &vstr); return mp_obj_new_str_from_vstr(ret_type, &vstr);
} }
MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
@ -545,9 +577,7 @@ mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
} else { } else {
// sep given // sep given
if (mp_obj_get_type(sep) != self_type) { str_check_arg_type(self_type, sep);
bad_implicit_conversion(sep);
}
size_t sep_len; size_t sep_len;
const char *sep_str = mp_obj_str_get_data(sep, &sep_len); const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
@ -699,9 +729,7 @@ STATIC mp_obj_t str_finder(size_t n_args, const mp_obj_t *args, int direction, b
mp_check_self(mp_obj_is_str_or_bytes(args[0])); mp_check_self(mp_obj_is_str_or_bytes(args[0]));
// check argument type // check argument type
if (mp_obj_get_type(args[1]) != self_type) { str_check_arg_type(self_type, args[1]);
bad_implicit_conversion(args[1]);
}
GET_STR_DATA_LEN(args[0], haystack, haystack_len); GET_STR_DATA_LEN(args[0], haystack, haystack_len);
GET_STR_DATA_LEN(args[1], needle, needle_len); GET_STR_DATA_LEN(args[1], needle, needle_len);
@ -805,9 +833,7 @@ STATIC mp_obj_t str_uni_strip(int type, size_t n_args, const mp_obj_t *args) {
chars_to_del = whitespace; chars_to_del = whitespace;
chars_to_del_len = sizeof(whitespace) - 1; chars_to_del_len = sizeof(whitespace) - 1;
} else { } else {
if (mp_obj_get_type(args[1]) != self_type) { str_check_arg_type(self_type, args[1]);
bad_implicit_conversion(args[1]);
}
GET_STR_DATA_LEN(args[1], s, l); GET_STR_DATA_LEN(args[1], s, l);
chars_to_del = s; chars_to_del = s;
chars_to_del_len = l; chars_to_del_len = l;
@ -1633,13 +1659,8 @@ STATIC mp_obj_t str_replace(size_t n_args, const mp_obj_t *args) {
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
if (mp_obj_get_type(args[1]) != self_type) { str_check_arg_type(self_type, args[1]);
bad_implicit_conversion(args[1]); str_check_arg_type(self_type, args[2]);
}
if (mp_obj_get_type(args[2]) != self_type) {
bad_implicit_conversion(args[2]);
}
// extract string data // extract string data
@ -1726,9 +1747,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
mp_check_self(mp_obj_is_str_or_bytes(args[0])); mp_check_self(mp_obj_is_str_or_bytes(args[0]));
// check argument type // check argument type
if (mp_obj_get_type(args[1]) != self_type) { str_check_arg_type(self_type, args[1]);
bad_implicit_conversion(args[1]);
}
GET_STR_DATA_LEN(args[0], haystack, haystack_len); GET_STR_DATA_LEN(args[0], haystack, haystack_len);
GET_STR_DATA_LEN(args[1], needle, needle_len); GET_STR_DATA_LEN(args[1], needle, needle_len);
@ -1767,9 +1786,7 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) { STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) {
mp_check_self(mp_obj_is_str_or_bytes(self_in)); mp_check_self(mp_obj_is_str_or_bytes(self_in));
const mp_obj_type_t *self_type = mp_obj_get_type(self_in); const mp_obj_type_t *self_type = mp_obj_get_type(self_in);
if (self_type != mp_obj_get_type(arg)) { str_check_arg_type(self_type, arg);
bad_implicit_conversion(arg);
}
GET_STR_DATA_LEN(self_in, str, str_len); GET_STR_DATA_LEN(self_in, str, str_len);
GET_STR_DATA_LEN(arg, sep, sep_len); GET_STR_DATA_LEN(arg, sep, sep_len);
@ -1795,6 +1812,12 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) {
result[2] = self_in; result[2] = self_in;
} }
#if MICROPY_PY_BUILTINS_BYTEARRAY
if (mp_obj_get_type(arg) != self_type) {
arg = mp_obj_new_str_of_type(self_type, sep, sep_len);
}
#endif
const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction); const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
if (position_ptr != NULL) { if (position_ptr != NULL) {
size_t position = position_ptr - str; size_t position = position_ptr - str;
@ -1940,17 +1963,15 @@ mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_u
} }
} }
STATIC const mp_rom_map_elem_t str8_locals_dict_table[] = { // This locals table is used for the following types: str, bytes, bytearray, array.array.
// Each type takes a different section (start to end offset) of this table.
STATIC const mp_rom_map_elem_t array_bytearray_str_bytes_locals_table[] = {
#if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY
{ MP_ROM_QSTR(MP_QSTR_append), MP_ROM_PTR(&mp_obj_array_append_obj) },
{ MP_ROM_QSTR(MP_QSTR_extend), MP_ROM_PTR(&mp_obj_array_extend_obj) },
#endif
#if MICROPY_CPYTHON_COMPAT #if MICROPY_CPYTHON_COMPAT
{ MP_ROM_QSTR(MP_QSTR_decode), MP_ROM_PTR(&bytes_decode_obj) }, { MP_ROM_QSTR(MP_QSTR_decode), MP_ROM_PTR(&bytes_decode_obj) },
#if !MICROPY_PY_BUILTINS_STR_UNICODE
// If we have separate unicode type, then here we have methods only
// for bytes type, and it should not have encode() methods. Otherwise,
// we have non-compliant-but-practical bytestring type, which shares
// method table with bytes, so they both have encode() and decode()
// methods (which should do type checking at runtime).
{ MP_ROM_QSTR(MP_QSTR_encode), MP_ROM_PTR(&str_encode_obj) },
#endif
#endif #endif
{ MP_ROM_QSTR(MP_QSTR_find), MP_ROM_PTR(&str_find_obj) }, { MP_ROM_QSTR(MP_QSTR_find), MP_ROM_PTR(&str_find_obj) },
{ MP_ROM_QSTR(MP_QSTR_rfind), MP_ROM_PTR(&str_rfind_obj) }, { MP_ROM_QSTR(MP_QSTR_rfind), MP_ROM_PTR(&str_rfind_obj) },
@ -1986,9 +2007,46 @@ STATIC const mp_rom_map_elem_t str8_locals_dict_table[] = {
{ MP_ROM_QSTR(MP_QSTR_isdigit), MP_ROM_PTR(&str_isdigit_obj) }, { MP_ROM_QSTR(MP_QSTR_isdigit), MP_ROM_PTR(&str_isdigit_obj) },
{ MP_ROM_QSTR(MP_QSTR_isupper), MP_ROM_PTR(&str_isupper_obj) }, { MP_ROM_QSTR(MP_QSTR_isupper), MP_ROM_PTR(&str_isupper_obj) },
{ MP_ROM_QSTR(MP_QSTR_islower), MP_ROM_PTR(&str_islower_obj) }, { MP_ROM_QSTR(MP_QSTR_islower), MP_ROM_PTR(&str_islower_obj) },
#if MICROPY_CPYTHON_COMPAT
{ MP_ROM_QSTR(MP_QSTR_encode), MP_ROM_PTR(&str_encode_obj) },
#endif
}; };
STATIC MP_DEFINE_CONST_DICT(str8_locals_dict, str8_locals_dict_table); #if MICROPY_CPYTHON_COMPAT
#define TABLE_ENTRIES_COMPAT 1
#else
#define TABLE_ENTRIES_COMPAT 0
#endif
#if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY
#define TABLE_ENTRIES_ARRAY 2
#else
#define TABLE_ENTRIES_ARRAY 0
#endif
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_str_locals_dict,
array_bytearray_str_bytes_locals_table + TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT,
MP_ARRAY_SIZE(array_bytearray_str_bytes_locals_table) - (TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT));
#if TABLE_ENTRIES_COMPAT == 0
#define mp_obj_bytes_locals_dict mp_obj_str_locals_dict
#else
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_bytes_locals_dict,
array_bytearray_str_bytes_locals_table + TABLE_ENTRIES_ARRAY,
MP_ARRAY_SIZE(array_bytearray_str_bytes_locals_table) - (TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT));
#endif
#if MICROPY_PY_BUILTINS_BYTEARRAY
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_bytearray_locals_dict,
array_bytearray_str_bytes_locals_table,
MP_ARRAY_SIZE(array_bytearray_str_bytes_locals_table) - TABLE_ENTRIES_COMPAT);
#endif
#if MICROPY_PY_ARRAY
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_array_locals_dict,
array_bytearray_str_bytes_locals_table,
TABLE_ENTRIES_ARRAY);
#endif
#if !MICROPY_PY_BUILTINS_STR_UNICODE #if !MICROPY_PY_BUILTINS_STR_UNICODE
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
@ -2002,9 +2060,9 @@ const mp_obj_type_t mp_type_str = {
.subscr = bytes_subscr, .subscr = bytes_subscr,
.getiter = mp_obj_new_str_iterator, .getiter = mp_obj_new_str_iterator,
.buffer_p = { .get_buffer = mp_obj_str_get_buffer }, .buffer_p = { .get_buffer = mp_obj_str_get_buffer },
.locals_dict = (mp_obj_dict_t *)&str8_locals_dict, .locals_dict = (mp_obj_dict_t *)&mp_obj_str_locals_dict,
}; };
#endif #endif // !MICROPY_PY_BUILTINS_STR_UNICODE
// Reuses most of methods from str // Reuses most of methods from str
const mp_obj_type_t mp_type_bytes = { const mp_obj_type_t mp_type_bytes = {
@ -2016,7 +2074,7 @@ const mp_obj_type_t mp_type_bytes = {
.subscr = bytes_subscr, .subscr = bytes_subscr,
.getiter = mp_obj_new_bytes_iterator, .getiter = mp_obj_new_bytes_iterator,
.buffer_p = { .get_buffer = mp_obj_str_get_buffer }, .buffer_p = { .get_buffer = mp_obj_str_get_buffer },
.locals_dict = (mp_obj_dict_t *)&str8_locals_dict, .locals_dict = (mp_obj_dict_t *)&mp_obj_bytes_locals_dict,
}; };
// The zero-length bytes object, with data that includes a null-terminating byte // The zero-length bytes object, with data that includes a null-terminating byte
@ -2044,6 +2102,10 @@ mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len) { mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len) {
if (type == &mp_type_str) { if (type == &mp_type_str) {
return mp_obj_new_str((const char *)data, len); return mp_obj_new_str((const char *)data, len);
#if MICROPY_PY_BUILTINS_BYTEARRAY
} else if (type == &mp_type_bytearray) {
return mp_obj_new_bytearray(len, data);
#endif
} else { } else {
return mp_obj_new_bytes(data, len); return mp_obj_new_bytes(data, len);
} }
@ -2068,18 +2130,24 @@ mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr) {
} }
} }
// make a new str/bytes object byte *data;
mp_obj_str_t *o = mp_obj_malloc(mp_obj_str_t, type);
o->len = vstr->len;
o->hash = qstr_compute_hash((byte *)vstr->buf, vstr->len);
if (vstr->len + 1 == vstr->alloc) { if (vstr->len + 1 == vstr->alloc) {
o->data = (byte *)vstr->buf; data = (byte *)vstr->buf;
} else { } else {
o->data = (byte *)m_renew(char, vstr->buf, vstr->alloc, vstr->len + 1); data = (byte *)m_renew(char, vstr->buf, vstr->alloc, vstr->len + 1);
} }
((byte *)o->data)[o->len] = '\0'; // add null byte data[vstr->len] = '\0'; // add null byte
vstr->buf = NULL; vstr->buf = NULL;
vstr->alloc = 0; vstr->alloc = 0;
#if MICROPY_PY_BUILTINS_BYTEARRAY
if (type == &mp_type_bytearray) {
return mp_obj_new_bytearray_by_ref(vstr->len, data);
}
#endif
mp_obj_str_t *o = mp_obj_malloc(mp_obj_str_t, type);
o->len = vstr->len;
o->hash = qstr_compute_hash(data, vstr->len);
o->data = data;
return MP_OBJ_FROM_PTR(o); return MP_OBJ_FROM_PTR(o);
} }
@ -2179,6 +2247,7 @@ const byte *mp_obj_str_get_data_no_check(mp_obj_t self_in, size_t *len) {
if (mp_obj_is_qstr(self_in)) { if (mp_obj_is_qstr(self_in)) {
return qstr_data(MP_OBJ_QSTR_VALUE(self_in), len); return qstr_data(MP_OBJ_QSTR_VALUE(self_in), len);
} else { } else {
MP_STATIC_ASSERT_STR_ARRAY_COMPATIBLE;
*len = ((mp_obj_str_t *)MP_OBJ_TO_PTR(self_in))->len; *len = ((mp_obj_str_t *)MP_OBJ_TO_PTR(self_in))->len;
return ((mp_obj_str_t *)MP_OBJ_TO_PTR(self_in))->data; return ((mp_obj_str_t *)MP_OBJ_TO_PTR(self_in))->data;
} }

View File

@ -27,6 +27,7 @@
#define MICROPY_INCLUDED_PY_OBJSTR_H #define MICROPY_INCLUDED_PY_OBJSTR_H
#include "py/obj.h" #include "py/obj.h"
#include "py/objarray.h"
typedef struct _mp_obj_str_t { typedef struct _mp_obj_str_t {
mp_obj_base_t base; mp_obj_base_t base;
@ -36,6 +37,13 @@ typedef struct _mp_obj_str_t {
const byte *data; const byte *data;
} mp_obj_str_t; } mp_obj_str_t;
// This static assert is used to ensure that mp_obj_str_t and mp_obj_array_t are compatible,
// meaning that their len and data/items entries are at the same offsets in the struct.
// This allows the same code to be used for str/bytes and bytearray.
#define MP_STATIC_ASSERT_STR_ARRAY_COMPATIBLE \
MP_STATIC_ASSERT(offsetof(mp_obj_str_t, len) == offsetof(mp_obj_array_t, len) \
&& offsetof(mp_obj_str_t, data) == offsetof(mp_obj_array_t, items))
#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte *)str} #define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte *)str}
// use this macro to extract the string hash // use this macro to extract the string hash
@ -70,6 +78,7 @@ const byte *mp_obj_str_get_data_no_check(mp_obj_t self_in, size_t *len);
if (mp_obj_is_qstr(str_obj_in)) { \ if (mp_obj_is_qstr(str_obj_in)) { \
str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); \ str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); \
} else { \ } else { \
MP_STATIC_ASSERT_STR_ARRAY_COMPATIBLE; \
str_len = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->len; \ str_len = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->len; \
str_data = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->data; \ str_data = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->data; \
} }
@ -118,4 +127,14 @@ MP_DECLARE_CONST_FUN_OBJ_1(str_isupper_obj);
MP_DECLARE_CONST_FUN_OBJ_1(str_islower_obj); MP_DECLARE_CONST_FUN_OBJ_1(str_islower_obj);
MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(bytes_decode_obj); MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(bytes_decode_obj);
extern const mp_obj_dict_t mp_obj_str_locals_dict;
#if MICROPY_PY_BUILTINS_BYTEARRAY
extern const mp_obj_dict_t mp_obj_bytearray_locals_dict;
#endif
#if MICROPY_PY_ARRAY
extern const mp_obj_dict_t mp_obj_array_locals_dict;
#endif
#endif // MICROPY_INCLUDED_PY_OBJSTR_H #endif // MICROPY_INCLUDED_PY_OBJSTR_H

View File

@ -116,7 +116,11 @@ const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, s
mp_obj_t index, bool is_slice) { mp_obj_t index, bool is_slice) {
// All str functions also handle bytes objects, and they call str_index_to_ptr(), // All str functions also handle bytes objects, and they call str_index_to_ptr(),
// so it must handle bytes. // so it must handle bytes.
if (type == &mp_type_bytes) { if (type == &mp_type_bytes
#if MICROPY_PY_BUILTINS_BYTEARRAY
|| type == &mp_type_bytearray
#endif
) {
// Taken from objstr.c:str_index_to_ptr() // Taken from objstr.c:str_index_to_ptr()
size_t index_val = mp_get_index(type, self_len, index, is_slice); size_t index_val = mp_get_index(type, self_len, index, is_slice);
return self_data + index_val; return self_data + index_val;
@ -225,48 +229,6 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
} }
} }
STATIC const mp_rom_map_elem_t struni_locals_dict_table[] = {
#if MICROPY_CPYTHON_COMPAT
{ MP_ROM_QSTR(MP_QSTR_encode), MP_ROM_PTR(&str_encode_obj) },
#endif
{ MP_ROM_QSTR(MP_QSTR_find), MP_ROM_PTR(&str_find_obj) },
{ MP_ROM_QSTR(MP_QSTR_rfind), MP_ROM_PTR(&str_rfind_obj) },
{ MP_ROM_QSTR(MP_QSTR_index), MP_ROM_PTR(&str_index_obj) },
{ MP_ROM_QSTR(MP_QSTR_rindex), MP_ROM_PTR(&str_rindex_obj) },
{ MP_ROM_QSTR(MP_QSTR_join), MP_ROM_PTR(&str_join_obj) },
{ MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&str_split_obj) },
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
{ MP_ROM_QSTR(MP_QSTR_splitlines), MP_ROM_PTR(&str_splitlines_obj) },
#endif
{ MP_ROM_QSTR(MP_QSTR_rsplit), MP_ROM_PTR(&str_rsplit_obj) },
{ MP_ROM_QSTR(MP_QSTR_startswith), MP_ROM_PTR(&str_startswith_obj) },
{ MP_ROM_QSTR(MP_QSTR_endswith), MP_ROM_PTR(&str_endswith_obj) },
{ MP_ROM_QSTR(MP_QSTR_strip), MP_ROM_PTR(&str_strip_obj) },
{ MP_ROM_QSTR(MP_QSTR_lstrip), MP_ROM_PTR(&str_lstrip_obj) },
{ MP_ROM_QSTR(MP_QSTR_rstrip), MP_ROM_PTR(&str_rstrip_obj) },
{ MP_ROM_QSTR(MP_QSTR_format), MP_ROM_PTR(&str_format_obj) },
{ MP_ROM_QSTR(MP_QSTR_replace), MP_ROM_PTR(&str_replace_obj) },
#if MICROPY_PY_BUILTINS_STR_COUNT
{ MP_ROM_QSTR(MP_QSTR_count), MP_ROM_PTR(&str_count_obj) },
#endif
#if MICROPY_PY_BUILTINS_STR_PARTITION
{ MP_ROM_QSTR(MP_QSTR_partition), MP_ROM_PTR(&str_partition_obj) },
{ MP_ROM_QSTR(MP_QSTR_rpartition), MP_ROM_PTR(&str_rpartition_obj) },
#endif
#if MICROPY_PY_BUILTINS_STR_CENTER
{ MP_ROM_QSTR(MP_QSTR_center), MP_ROM_PTR(&str_center_obj) },
#endif
{ MP_ROM_QSTR(MP_QSTR_lower), MP_ROM_PTR(&str_lower_obj) },
{ MP_ROM_QSTR(MP_QSTR_upper), MP_ROM_PTR(&str_upper_obj) },
{ MP_ROM_QSTR(MP_QSTR_isspace), MP_ROM_PTR(&str_isspace_obj) },
{ MP_ROM_QSTR(MP_QSTR_isalpha), MP_ROM_PTR(&str_isalpha_obj) },
{ MP_ROM_QSTR(MP_QSTR_isdigit), MP_ROM_PTR(&str_isdigit_obj) },
{ MP_ROM_QSTR(MP_QSTR_isupper), MP_ROM_PTR(&str_isupper_obj) },
{ MP_ROM_QSTR(MP_QSTR_islower), MP_ROM_PTR(&str_islower_obj) },
};
STATIC MP_DEFINE_CONST_DICT(struni_locals_dict, struni_locals_dict_table);
const mp_obj_type_t mp_type_str = { const mp_obj_type_t mp_type_str = {
{ &mp_type_type }, { &mp_type_type },
.name = MP_QSTR_str, .name = MP_QSTR_str,
@ -277,7 +239,7 @@ const mp_obj_type_t mp_type_str = {
.subscr = str_subscr, .subscr = str_subscr,
.getiter = mp_obj_new_str_iterator, .getiter = mp_obj_new_str_iterator,
.buffer_p = { .get_buffer = mp_obj_str_get_buffer }, .buffer_p = { .get_buffer = mp_obj_str_get_buffer },
.locals_dict = (mp_obj_dict_t *)&struni_locals_dict, .locals_dict = (mp_obj_dict_t *)&mp_obj_str_locals_dict,
}; };
/******************************************************************************/ /******************************************************************************/

View File

@ -0,0 +1,35 @@
# test bytearray with its re-use of byte functions
print(bytearray(b"hello world").find(b"ll"))
print(bytearray(b"hello\x00world").rfind(b"l"))
print(bytearray(b"abc efg ").strip(b"g a"))
print(bytearray(b" spacious ").lstrip())
print(bytearray(b"www.example.com").lstrip(b"cmowz."))
print(bytearray(b" spacious ").rstrip())
print(bytearray(b"mississippi").rstrip(b"ipz"))
print(bytearray(b"abc").split(b"a"))
print(bytearray(b"abcabc").rsplit(b"bc"))
print(bytearray(b"asdfasdf").replace(b"a", b"b"))
print("00\x0000".index("0", 0))
print("00\x0000".index("0", 3))
print("00\x0000".rindex("0", 0))
print("00\x0000".rindex("0", 3))
print(bytearray(b"foobar").endswith(b"bar"))
print(bytearray(b"1foo").startswith(b"foo", 1))
print(bytearray(b" T E \x00 S T").lower())
print(bytearray(b" te \x00 st").upper())
print(bytearray(b" \t\n\r\v\f").isspace())
print(bytearray(b"this ").isalpha())
print(bytearray(b"0123456789").isdigit())
print(bytearray(b"AB").isupper())
print(bytearray(b"cheese-cake").islower())
print(bytearray(b",").join((bytearray(b"abc"), bytearray(b"def"))))
print(type(bytearray(b",").join((b"a", b"b", b"c"))))

View File

@ -0,0 +1,8 @@
try:
bytearray.center
except AttributeError:
print("SKIP")
raise SystemExit
print(bytearray(b"foo").center(6))
print(type(bytearray(b"foo").center(6)))

View File

@ -0,0 +1,7 @@
try:
bytearray.count
except AttributeError:
print("SKIP")
raise SystemExit
print(bytearray(b"aaaa").count(b"a"))

View File

@ -0,0 +1,8 @@
try:
bytearray.partition
except AttributeError:
print("SKIP")
raise SystemExit
print(bytearray(b"asdsf").partition(b"s"))
print(bytearray(b"asdsf").rpartition(b"s"))

View File

@ -0,0 +1,13 @@
try:
bytes.center
except:
print("SKIP")
raise SystemExit
print(b"foo".center(0))
print(b"foo".center(1))
print(b"foo".center(3))
print(b"foo".center(4))
print(b"foo".center(5))
print(b"foo".center(6))
print(b"foo".center(20))