From cbeac094ef88b1f6a12cad7a3135f0530302ad20 Mon Sep 17 00:00:00 2001 From: Damien George Date: Thu, 24 May 2018 13:08:15 +1000 Subject: [PATCH] extmod/modure: Add match.span(), start() and end() methods, and tests. This feature is controlled at compile time by MICROPY_PY_URE_MATCH_SPAN_START_END, disabled by default. Thanks to @dmazzella for the original patch for this feature; see #3770. --- extmod/modure.c | 55 ++++++++++++++++++++++++++++++++++++++++ py/mpconfig.h | 4 +++ tests/extmod/ure_span.py | 40 +++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 tests/extmod/ure_span.py diff --git a/extmod/modure.c b/extmod/modure.c index cce41e5a5f..a3b5378249 100644 --- a/extmod/modure.c +++ b/extmod/modure.c @@ -94,11 +94,66 @@ MP_DEFINE_CONST_FUN_OBJ_1(match_groups_obj, match_groups); #endif +#if MICROPY_PY_URE_MATCH_SPAN_START_END + +STATIC void match_span_helper(size_t n_args, const mp_obj_t *args, mp_obj_t span[2]) { + mp_obj_match_t *self = MP_OBJ_TO_PTR(args[0]); + + mp_int_t no = 0; + if (n_args == 2) { + no = mp_obj_get_int(args[1]); + if (no < 0 || no >= self->num_matches) { + nlr_raise(mp_obj_new_exception_arg1(&mp_type_IndexError, args[1])); + } + } + + mp_int_t s = -1; + mp_int_t e = -1; + const char *start = self->caps[no * 2]; + if (start != NULL) { + // have a match for this group + const char *begin = mp_obj_str_get_str(self->str); + s = start - begin; + e = self->caps[no * 2 + 1] - begin; + } + + span[0] = mp_obj_new_int(s); + span[1] = mp_obj_new_int(e); +} + +STATIC mp_obj_t match_span(size_t n_args, const mp_obj_t *args) { + mp_obj_t span[2]; + match_span_helper(n_args, args, span); + return mp_obj_new_tuple(2, span); +} +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_span_obj, 1, 2, match_span); + +STATIC mp_obj_t match_start(size_t n_args, const mp_obj_t *args) { + mp_obj_t span[2]; + match_span_helper(n_args, args, span); + return span[0]; +} +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_start_obj, 1, 2, match_start); + +STATIC mp_obj_t match_end(size_t n_args, const mp_obj_t *args) { + mp_obj_t span[2]; + match_span_helper(n_args, args, span); + return span[1]; +} +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_end_obj, 1, 2, match_end); + +#endif + STATIC const mp_rom_map_elem_t match_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_group), MP_ROM_PTR(&match_group_obj) }, #if MICROPY_PY_URE_MATCH_GROUPS { MP_ROM_QSTR(MP_QSTR_groups), MP_ROM_PTR(&match_groups_obj) }, #endif + #if MICROPY_PY_URE_MATCH_SPAN_START_END + { MP_ROM_QSTR(MP_QSTR_span), MP_ROM_PTR(&match_span_obj) }, + { MP_ROM_QSTR(MP_QSTR_start), MP_ROM_PTR(&match_start_obj) }, + { MP_ROM_QSTR(MP_QSTR_end), MP_ROM_PTR(&match_end_obj) }, + #endif }; STATIC MP_DEFINE_CONST_DICT(match_locals_dict, match_locals_dict_table); diff --git a/py/mpconfig.h b/py/mpconfig.h index 8f789e861d..cf757cf427 100755 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -1168,6 +1168,10 @@ typedef double mp_float_t; #define MICROPY_PY_URE_MATCH_GROUPS (0) #endif +#ifndef MICROPY_PY_URE_MATCH_SPAN_START_END +#define MICROPY_PY_URE_MATCH_SPAN_START_END (0) +#endif + #ifndef MICROPY_PY_UHEAPQ #define MICROPY_PY_UHEAPQ (0) #endif diff --git a/tests/extmod/ure_span.py b/tests/extmod/ure_span.py new file mode 100644 index 0000000000..50f44399ce --- /dev/null +++ b/tests/extmod/ure_span.py @@ -0,0 +1,40 @@ +# test match.span(), and nested spans + +try: + import ure as re +except ImportError: + try: + import re + except ImportError: + print("SKIP") + raise SystemExit + +try: + m = re.match(".", "a") + m.span +except AttributeError: + print('SKIP') + raise SystemExit + + +def print_spans(match): + print('----') + try: + i = 0 + while True: + print(match.span(i), match.start(i), match.end(i)) + i += 1 + except IndexError: + pass + +m = re.match(r'(([0-9]*)([a-z]*)[0-9]*)','1234hello567') +print_spans(m) + +m = re.match(r'([0-9]*)(([a-z]*)([0-9]*))','1234hello567') +print_spans(m) + +# optional span that matches +print_spans(re.match(r'(a)?b(c)', 'abc')) + +# optional span that doesn't match +print_spans(re.match(r'(a)?b(c)', 'bc'))