extmod/modure: Add match.span(), start() and end() methods, and tests.
This feature is controlled at compile time by MICROPY_PY_URE_MATCH_SPAN_START_END, disabled by default. Thanks to @dmazzella for the original patch for this feature; see #3770.
This commit is contained in:
parent
1f86460910
commit
1e9b871d29
|
@ -94,11 +94,66 @@ MP_DEFINE_CONST_FUN_OBJ_1(match_groups_obj, match_groups);
|
|||
|
||||
#endif
|
||||
|
||||
#if MICROPY_PY_URE_MATCH_SPAN_START_END
|
||||
|
||||
STATIC void match_span_helper(size_t n_args, const mp_obj_t *args, mp_obj_t span[2]) {
|
||||
mp_obj_match_t *self = MP_OBJ_TO_PTR(args[0]);
|
||||
|
||||
mp_int_t no = 0;
|
||||
if (n_args == 2) {
|
||||
no = mp_obj_get_int(args[1]);
|
||||
if (no < 0 || no >= self->num_matches) {
|
||||
nlr_raise(mp_obj_new_exception_arg1(&mp_type_IndexError, args[1]));
|
||||
}
|
||||
}
|
||||
|
||||
mp_int_t s = -1;
|
||||
mp_int_t e = -1;
|
||||
const char *start = self->caps[no * 2];
|
||||
if (start != NULL) {
|
||||
// have a match for this group
|
||||
const char *begin = mp_obj_str_get_str(self->str);
|
||||
s = start - begin;
|
||||
e = self->caps[no * 2 + 1] - begin;
|
||||
}
|
||||
|
||||
span[0] = mp_obj_new_int(s);
|
||||
span[1] = mp_obj_new_int(e);
|
||||
}
|
||||
|
||||
STATIC mp_obj_t match_span(size_t n_args, const mp_obj_t *args) {
|
||||
mp_obj_t span[2];
|
||||
match_span_helper(n_args, args, span);
|
||||
return mp_obj_new_tuple(2, span);
|
||||
}
|
||||
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_span_obj, 1, 2, match_span);
|
||||
|
||||
STATIC mp_obj_t match_start(size_t n_args, const mp_obj_t *args) {
|
||||
mp_obj_t span[2];
|
||||
match_span_helper(n_args, args, span);
|
||||
return span[0];
|
||||
}
|
||||
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_start_obj, 1, 2, match_start);
|
||||
|
||||
STATIC mp_obj_t match_end(size_t n_args, const mp_obj_t *args) {
|
||||
mp_obj_t span[2];
|
||||
match_span_helper(n_args, args, span);
|
||||
return span[1];
|
||||
}
|
||||
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_end_obj, 1, 2, match_end);
|
||||
|
||||
#endif
|
||||
|
||||
STATIC const mp_rom_map_elem_t match_locals_dict_table[] = {
|
||||
{ MP_ROM_QSTR(MP_QSTR_group), MP_ROM_PTR(&match_group_obj) },
|
||||
#if MICROPY_PY_URE_MATCH_GROUPS
|
||||
{ MP_ROM_QSTR(MP_QSTR_groups), MP_ROM_PTR(&match_groups_obj) },
|
||||
#endif
|
||||
#if MICROPY_PY_URE_MATCH_SPAN_START_END
|
||||
{ MP_ROM_QSTR(MP_QSTR_span), MP_ROM_PTR(&match_span_obj) },
|
||||
{ MP_ROM_QSTR(MP_QSTR_start), MP_ROM_PTR(&match_start_obj) },
|
||||
{ MP_ROM_QSTR(MP_QSTR_end), MP_ROM_PTR(&match_end_obj) },
|
||||
#endif
|
||||
};
|
||||
|
||||
STATIC MP_DEFINE_CONST_DICT(match_locals_dict, match_locals_dict_table);
|
||||
|
|
|
@ -1146,6 +1146,10 @@ typedef double mp_float_t;
|
|||
#define MICROPY_PY_URE_MATCH_GROUPS (0)
|
||||
#endif
|
||||
|
||||
#ifndef MICROPY_PY_URE_MATCH_SPAN_START_END
|
||||
#define MICROPY_PY_URE_MATCH_SPAN_START_END (0)
|
||||
#endif
|
||||
|
||||
#ifndef MICROPY_PY_UHEAPQ
|
||||
#define MICROPY_PY_UHEAPQ (0)
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# test match.span(), and nested spans
|
||||
|
||||
try:
|
||||
import ure as re
|
||||
except ImportError:
|
||||
try:
|
||||
import re
|
||||
except ImportError:
|
||||
print("SKIP")
|
||||
raise SystemExit
|
||||
|
||||
try:
|
||||
m = re.match(".", "a")
|
||||
m.span
|
||||
except AttributeError:
|
||||
print('SKIP')
|
||||
raise SystemExit
|
||||
|
||||
|
||||
def print_spans(match):
|
||||
print('----')
|
||||
try:
|
||||
i = 0
|
||||
while True:
|
||||
print(match.span(i), match.start(i), match.end(i))
|
||||
i += 1
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
m = re.match(r'(([0-9]*)([a-z]*)[0-9]*)','1234hello567')
|
||||
print_spans(m)
|
||||
|
||||
m = re.match(r'([0-9]*)(([a-z]*)([0-9]*))','1234hello567')
|
||||
print_spans(m)
|
||||
|
||||
# optional span that matches
|
||||
print_spans(re.match(r'(a)?b(c)', 'abc'))
|
||||
|
||||
# optional span that doesn't match
|
||||
print_spans(re.match(r'(a)?b(c)', 'bc'))
|
Loading…
Reference in New Issue