extmod/ure: Support search/match() pos and endpos parameters
MICROPY_PY_URE_MATCH_SPAN_START_END is used to enable the functionality since it's similar.
This commit is contained in:
parent
20a787adb4
commit
0865c9d381
@ -178,6 +178,35 @@ STATIC mp_obj_t ure_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
|
|||||||
size_t len;
|
size_t len;
|
||||||
subj.begin = mp_obj_str_get_data(args[1], &len);
|
subj.begin = mp_obj_str_get_data(args[1], &len);
|
||||||
subj.end = subj.begin + len;
|
subj.end = subj.begin + len;
|
||||||
|
#if MICROPY_PY_URE_MATCH_SPAN_START_END
|
||||||
|
if (n_args > 2) {
|
||||||
|
const mp_obj_type_t *self_type = mp_obj_get_type(args[1]);
|
||||||
|
mp_int_t str_len = MP_OBJ_SMALL_INT_VALUE(mp_obj_len_maybe(args[1]));
|
||||||
|
const byte *begin = (const byte *)subj.begin;
|
||||||
|
|
||||||
|
int pos = mp_obj_get_int(args[2]);
|
||||||
|
if (pos >= str_len) {
|
||||||
|
return mp_const_none;
|
||||||
|
}
|
||||||
|
if (pos < 0) {
|
||||||
|
pos = 0;
|
||||||
|
}
|
||||||
|
const byte *pos_ptr = str_index_to_ptr(self_type, begin, len, MP_OBJ_NEW_SMALL_INT(pos), true);
|
||||||
|
|
||||||
|
const byte *endpos_ptr = (const byte *)subj.end;
|
||||||
|
if (n_args > 3) {
|
||||||
|
int endpos = mp_obj_get_int(args[3]);
|
||||||
|
if (endpos <= pos) {
|
||||||
|
return mp_const_none;
|
||||||
|
}
|
||||||
|
// Will cap to length
|
||||||
|
endpos_ptr = str_index_to_ptr(self_type, begin, len, args[3], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
subj.begin = (const char *)pos_ptr;
|
||||||
|
subj.end = (const char *)endpos_ptr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
int caps_num = (self->re.sub + 1) * 2;
|
int caps_num = (self->re.sub + 1) * 2;
|
||||||
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, char*, caps_num);
|
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, char*, caps_num);
|
||||||
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
|
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
|
||||||
|
@ -408,6 +408,15 @@ mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
|
|||||||
|
|
||||||
#if !MICROPY_PY_BUILTINS_STR_UNICODE
|
#if !MICROPY_PY_BUILTINS_STR_UNICODE
|
||||||
// objstrunicode defines own version
|
// objstrunicode defines own version
|
||||||
|
size_t str_offset_to_index(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
||||||
|
size_t offset) {
|
||||||
|
if (offset > self_len) {
|
||||||
|
mp_raise_ValueError(translate("offset out of bounds"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
||||||
mp_obj_t index, bool is_slice) {
|
mp_obj_t index, bool is_slice) {
|
||||||
size_t index_val = mp_get_index(type, self_len, index, is_slice);
|
size_t index_val = mp_get_index(type, self_len, index, is_slice);
|
||||||
|
@ -71,6 +71,8 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, siz
|
|||||||
mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
|
mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
|
||||||
mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);
|
mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);
|
||||||
|
|
||||||
|
size_t str_offset_to_index(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
||||||
|
size_t offset);
|
||||||
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
||||||
mp_obj_t index, bool is_slice);
|
mp_obj_t index, bool is_slice);
|
||||||
const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction);
|
const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction);
|
||||||
|
@ -112,6 +112,26 @@ STATIC mp_obj_t uni_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t str_offset_to_index(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
||||||
|
size_t offset) {
|
||||||
|
if (offset > self_len) {
|
||||||
|
mp_raise_ValueError(translate("offset out of bounds"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == &mp_type_bytes) {
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t index_val = 0;
|
||||||
|
const byte *s = self_data;
|
||||||
|
for (size_t i = 0; i < offset; i++, s++) {
|
||||||
|
if (!UTF8_IS_CONT(*s)) {
|
||||||
|
++index_val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return index_val;
|
||||||
|
}
|
||||||
|
|
||||||
// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or
|
// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or
|
||||||
// be capped to the first/last character of the string, depending on is_slice.
|
// be capped to the first/last character of the string, depending on is_slice.
|
||||||
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user