From 20a787adb4140002acb8e5ea21536069e184ffaf Mon Sep 17 00:00:00 2001 From: Radomir Dopieralski Date: Tue, 25 Jul 2017 01:09:47 +0200 Subject: [PATCH] extmod/ure: Handle some escape sequences. Fix MicroPython #3176 Handle escape sequences inside regular expressions. This adds handling for \a, \b, \f, \n, \r, \v and \\. --- extmod/re1.5/compilecode.c | 46 +++++++++++++++++++++++++++++++++----- tests/extmod/ure1.py | 13 +++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c index a685a508a0..01d3d14988 100644 --- a/extmod/re1.5/compilecode.c +++ b/extmod/re1.5/compilecode.c @@ -10,6 +10,29 @@ #define EMIT(at, byte) (code ? (code[at] = byte) : (at)) #define PC (prog->bytelen) + +static char unescape(char c) { + switch (c) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 'v': + return '\v'; + case 'x': + return '\\'; + default: + return c; + } +} + + static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) { char *code = sizecode ? NULL : prog->insts; @@ -22,13 +45,16 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) case '\\': re++; if (!*re) return NULL; // Trailing backslash + term = PC; if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') { - term = PC; EMIT(PC++, NamedClass); EMIT(PC++, *re); - prog->len++; - break; + } else { + EMIT(PC++, Char); + EMIT(PC++, unescape(*re)); } + prog->len++; + break; default: term = PC; EMIT(PC++, Char); @@ -54,11 +80,21 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) prog->len++; for (cnt = 0; *re != ']'; re++, cnt++) { if (!*re) return NULL; - EMIT(PC++, *re); + if (*re == '\\') { + re += 1; + EMIT(PC++, unescape(*re)); + } else { + EMIT(PC++, *re); + } if (re[1] == '-' && re[2] != ']') { re += 2; } - EMIT(PC++, *re); + if (*re == '\\') { + re += 1; + EMIT(PC++, unescape(*re)); + } else { + EMIT(PC++, *re); + } } EMIT(term + 1, cnt); break; diff --git a/tests/extmod/ure1.py b/tests/extmod/ure1.py index 54471ed4f9..710720c8b6 100644 --- a/tests/extmod/ure1.py +++ b/tests/extmod/ure1.py @@ -28,6 +28,19 @@ try: except IndexError: print("IndexError") +r = re.compile(r"\n") +m = r.match("\n") +print(m.group(0)) +m = r.match("\\") +print(m) +r = re.compile(r"[\n-\r]") +m = r.match("\n") +print(m.group(0)) +r = re.compile(r"[\]]") +m = r.match("]") +print(m.group(0)) +print("===") + r = re.compile("[a-cu-z]") m = r.match("a") print(m.group(0))