extmod/ure: Handle some escape sequences.

Fix MicroPython #3176

Handle escape sequences inside regular expressions. This adds
handling for \a, \b, \f, \n, \r, \v and \\.
This commit is contained in:
Radomir Dopieralski 2017-07-25 01:09:47 +02:00 committed by Noralf Trønnes
parent b9dc23c070
commit 20a787adb4
2 changed files with 54 additions and 5 deletions

View File

@ -10,6 +10,29 @@
#define EMIT(at, byte) (code ? (code[at] = byte) : (at))
#define PC (prog->bytelen)
static char unescape(char c) {
switch (c) {
case 'a':
return '\a';
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 'v':
return '\v';
case 'x':
return '\\';
default:
return c;
}
}
static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
{
char *code = sizecode ? NULL : prog->insts;
@ -22,13 +45,16 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
case '\\':
re++;
if (!*re) return NULL; // Trailing backslash
if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') {
term = PC;
if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') {
EMIT(PC++, NamedClass);
EMIT(PC++, *re);
} else {
EMIT(PC++, Char);
EMIT(PC++, unescape(*re));
}
prog->len++;
break;
}
default:
term = PC;
EMIT(PC++, Char);
@ -54,12 +80,22 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
prog->len++;
for (cnt = 0; *re != ']'; re++, cnt++) {
if (!*re) return NULL;
if (*re == '\\') {
re += 1;
EMIT(PC++, unescape(*re));
} else {
EMIT(PC++, *re);
}
if (re[1] == '-' && re[2] != ']') {
re += 2;
}
if (*re == '\\') {
re += 1;
EMIT(PC++, unescape(*re));
} else {
EMIT(PC++, *re);
}
}
EMIT(term + 1, cnt);
break;
}

View File

@ -28,6 +28,19 @@ try:
except IndexError:
print("IndexError")
r = re.compile(r"\n")
m = r.match("\n")
print(m.group(0))
m = r.match("\\")
print(m)
r = re.compile(r"[\n-\r]")
m = r.match("\n")
print(m.group(0))
r = re.compile(r"[\]]")
m = r.match("]")
print(m.group(0))
print("===")
r = re.compile("[a-cu-z]")
m = r.match("a")
print(m.group(0))