extmod/re1.5: Support escaping within RE classes.

Fixes issues #3178 and #5220.

Tests are added, including all the cases mentioned in both bugs.
This commit is contained in:
Jim Mussared 2019-10-16 16:56:29 +11:00 committed by Damien George
parent 7a7ee16ccf
commit ebf8332104
3 changed files with 24 additions and 0 deletions

View File

@ -53,6 +53,9 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
PC++; // Skip # of pair byte
prog->len++;
for (cnt = 0; *re != ']'; re++, cnt++) {
if (*re == '\\') {
++re;
}
if (!*re) return NULL;
EMIT(PC++, *re);
if (re[1] == '-' && re[2] != ']') {

View File

@ -88,3 +88,23 @@ except:
# bytes objects
m = re.match(rb'a+?', b'ab'); print(m.group(0))
print("===")
# escaping
m = re.match(r'a\.c', 'a.c'); print(m.group(0) if m else '')
m = re.match(r'a\.b', 'abc'); print(m is None)
m = re.match(r'a\.b', 'a\\bc'); print(m is None)
m = re.match(r'[a\-z]', 'abc'); print(m.group(0))
m = re.match(r'[.\]]*', '.].]a'); print(m.group(0))
m = re.match(r'[.\]+]*', '.]+.]a'); print(m.group(0))
m = re.match(r'[a-f0-9x\-yz]*', 'abxcd1-23'); print(m.group(0))
m = re.match(r'[a\\b]*', 'a\\aa\\bb\\bbab'); print(m.group(0))
m = re.search(r'[a\-z]', '-'); print(m.group(0))
m = re.search(r'[a\-z]', 'f'); print(m is None)
m = re.search(r'[a\]z]', 'a'); print(m.group(0))
print(re.compile(r'[-a]').split('foo-bar'))
print(re.compile(r'[a-]').split('foo-bar'))
print(re.compile(r'[ax\-]').split('foo-bar'))
print(re.compile(r'[a\-x]').split('foo-bar'))
print(re.compile(r'[\-ax]').split('foo-bar'))
print("===")

View File

@ -23,3 +23,4 @@ test_re(r')')
test_re(r'[')
test_re(r'([')
test_re(r'([)')
test_re(r'[a\]')