extmod/ure: Support search/match() pos and endpos parameters

MICROPY_PY_URE_MATCH_SPAN_START_END is used to enable the functionality
since it's similar.
This commit is contained in:
Noralf Trønnes 2019-02-12 22:42:59 +01:00
parent 20a787adb4
commit 0865c9d381
4 changed files with 60 additions and 0 deletions

View File

@ -178,6 +178,35 @@ STATIC mp_obj_t ure_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
size_t len; size_t len;
subj.begin = mp_obj_str_get_data(args[1], &len); subj.begin = mp_obj_str_get_data(args[1], &len);
subj.end = subj.begin + len; subj.end = subj.begin + len;
#if MICROPY_PY_URE_MATCH_SPAN_START_END
if (n_args > 2) {
const mp_obj_type_t *self_type = mp_obj_get_type(args[1]);
mp_int_t str_len = MP_OBJ_SMALL_INT_VALUE(mp_obj_len_maybe(args[1]));
const byte *begin = (const byte *)subj.begin;
int pos = mp_obj_get_int(args[2]);
if (pos >= str_len) {
return mp_const_none;
}
if (pos < 0) {
pos = 0;
}
const byte *pos_ptr = str_index_to_ptr(self_type, begin, len, MP_OBJ_NEW_SMALL_INT(pos), true);
const byte *endpos_ptr = (const byte *)subj.end;
if (n_args > 3) {
int endpos = mp_obj_get_int(args[3]);
if (endpos <= pos) {
return mp_const_none;
}
// Will cap to length
endpos_ptr = str_index_to_ptr(self_type, begin, len, args[3], true);
}
subj.begin = (const char *)pos_ptr;
subj.end = (const char *)endpos_ptr;
}
#endif
int caps_num = (self->re.sub + 1) * 2; int caps_num = (self->re.sub + 1) * 2;
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, char*, caps_num); mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, char*, caps_num);
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char // cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char

View File

@ -408,6 +408,15 @@ mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
#if !MICROPY_PY_BUILTINS_STR_UNICODE #if !MICROPY_PY_BUILTINS_STR_UNICODE
// objstrunicode defines own version // objstrunicode defines own version
size_t str_offset_to_index(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
size_t offset) {
if (offset > self_len) {
mp_raise_ValueError(translate("offset out of bounds"));
}
return offset;
}
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len, const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
mp_obj_t index, bool is_slice) { mp_obj_t index, bool is_slice) {
size_t index_val = mp_get_index(type, self_len, index, is_slice); size_t index_val = mp_get_index(type, self_len, index, is_slice);

View File

@ -71,6 +71,8 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, siz
mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in); mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags); mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);
size_t str_offset_to_index(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
size_t offset);
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len, const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
mp_obj_t index, bool is_slice); mp_obj_t index, bool is_slice);
const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction); const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction);

View File

@ -112,6 +112,26 @@ STATIC mp_obj_t uni_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
} }
} }
size_t str_offset_to_index(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
size_t offset) {
if (offset > self_len) {
mp_raise_ValueError(translate("offset out of bounds"));
}
if (type == &mp_type_bytes) {
return offset;
}
size_t index_val = 0;
const byte *s = self_data;
for (size_t i = 0; i < offset; i++, s++) {
if (!UTF8_IS_CONT(*s)) {
++index_val;
}
}
return index_val;
}
// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or // Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or
// be capped to the first/last character of the string, depending on is_slice. // be capped to the first/last character of the string, depending on is_slice.
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len, const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,