py/objstr: Always validate utf-8 for mp_obj_new_str.
All uses of this are either tiny strings or not-known-to-be-safe. Update comments for mp_obj_new_str_copy and mp_obj_new_str_of_type. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
This commit is contained in:
parent
3a910b1565
commit
6c3d8d38bf
8
py/obj.h
8
py/obj.h
|
@ -789,11 +789,11 @@ mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value);
|
|||
mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base);
|
||||
mp_obj_t mp_obj_new_int_from_ll(long long val); // this must return a multi-precision integer object (or raise an overflow exception)
|
||||
mp_obj_t mp_obj_new_int_from_ull(unsigned long long val); // this must return a multi-precision integer object (or raise an overflow exception)
|
||||
mp_obj_t mp_obj_new_str(const char *data, size_t len);
|
||||
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len);
|
||||
mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr);
|
||||
mp_obj_t mp_obj_new_str(const char *data, size_t len); // will check utf-8 (raises UnicodeError)
|
||||
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len); // input data must be valid utf-8
|
||||
mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr); // will check utf-8 (raises UnicodeError)
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
|
||||
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr); // only use when vstr is already known to be utf-8 encoded
|
||||
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr); // input data must be valid utf-8
|
||||
#else
|
||||
#define mp_obj_new_str_from_utf8_vstr mp_obj_new_str_from_vstr
|
||||
#endif
|
||||
|
|
11
py/objstr.c
11
py/objstr.c
|
@ -202,11 +202,7 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_
|
|||
} else {
|
||||
mp_buffer_info_t bufinfo;
|
||||
mp_get_buffer_raise(args[0], &bufinfo, MP_BUFFER_READ);
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
|
||||
if (!utf8_check(bufinfo.buf, bufinfo.len)) {
|
||||
mp_raise_msg(&mp_type_UnicodeError, NULL);
|
||||
}
|
||||
#endif
|
||||
// This will utf-8 check the input.
|
||||
return mp_obj_new_str(bufinfo.buf, bufinfo.len);
|
||||
}
|
||||
}
|
||||
|
@ -2268,6 +2264,11 @@ mp_obj_t mp_obj_new_bytes_from_vstr(vstr_t *vstr) {
|
|||
}
|
||||
|
||||
mp_obj_t mp_obj_new_str(const char *data, size_t len) {
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
|
||||
if (!utf8_check((byte *)data, len)) {
|
||||
mp_raise_msg(&mp_type_UnicodeError, NULL);
|
||||
}
|
||||
#endif
|
||||
qstr q = qstr_find_strn(data, len);
|
||||
if (q != MP_QSTRnull) {
|
||||
// qstr with this data already exists
|
||||
|
|
|
@ -88,8 +88,8 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t
|
|||
void mp_str_print_json(const mp_print_t *print, const byte *str_data, size_t str_len);
|
||||
mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs);
|
||||
mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args);
|
||||
mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t len);
|
||||
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len);
|
||||
mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, input data must be valid utf-8
|
||||
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, will check utf-8 (raises UnicodeError)
|
||||
|
||||
mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
|
||||
mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);
|
||||
|
|
Loading…
Reference in New Issue