extmod/re1.5: Check and report byte overflow errors in _compilecode.

The generated regex code is limited in the range of jumps and counts, and
this commit checks all cases which can overflow given the right kind of
input regex, and returns an error in such a case.

This change assumes that the results that overflow an int8_t do not
overflow a platform int.

Closes: #7078

Signed-off-by: Jeff Epler <jepler@gmail.com>

# Conflicts:
#	extmod/re1.5/compilecode.c
This commit is contained in:
Jeff Epler 2021-04-06 19:10:01 -05:00
parent 82479b6a2c
commit 2fb5eb3b11
3 changed files with 61 additions and 12 deletions

View File

@ -8,9 +8,9 @@
((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num)
#define REL(at, to) (to - at - 2)
#define EMIT(at, byte) (code ? (code[at] = byte) : (at))
#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err))
#define PC (prog->bytelen)
static char unescape(char c) {
switch (c) {
case 'a':
@ -33,9 +33,17 @@ static char unescape(char c) {
}
static void _emit_checked(int at, char *code, int val, bool *err) {
*err |= val != (int8_t)val;
if (code) {
code[at] = val;
}
}
static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
{
char *code = sizecode ? NULL : prog->insts;
bool err = false;
int start = PC;
int term = PC;
int alt_label = 0;
@ -96,7 +104,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
EMIT(PC++, *re);
}
}
EMIT(term + 1, cnt);
EMIT_CHECKED(term + 1, cnt);
break;
}
case '(': {
@ -107,7 +115,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
if (capture) {
sub = ++prog->sub;
EMIT(PC++, Save);
EMIT(PC++, 2 * sub);
EMIT_CHECKED(PC++, 2 * sub);
prog->len++;
} else {
re += 2;
@ -118,7 +126,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
if (capture) {
EMIT(PC++, Save);
EMIT(PC++, 2 * sub + 1);
EMIT_CHECKED(PC++, 2 * sub + 1);
prog->len++;
}
@ -133,7 +141,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
} else {
EMIT(term, Split);
}
EMIT(term + 1, REL(term, PC));
EMIT_CHECKED(term + 1, REL(term, PC));
prog->len++;
term = PC;
break;
@ -141,7 +149,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
if (PC == term) return NULL; // nothing to repeat
INSERT_CODE(term, 2, PC);
EMIT(PC, Jmp);
EMIT(PC + 1, REL(PC, term));
EMIT_CHECKED(PC + 1, REL(PC, term));
PC += 2;
if (re[1] == '?') {
EMIT(term, RSplit);
@ -149,7 +157,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
} else {
EMIT(term, Split);
}
EMIT(term + 1, REL(term, PC));
EMIT_CHECKED(term + 1, REL(term, PC));
prog->len += 2;
term = PC;
break;
@ -161,20 +169,20 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
} else {
EMIT(PC, RSplit);
}
EMIT(PC + 1, REL(PC, term));
EMIT_CHECKED(PC + 1, REL(PC, term));
PC += 2;
prog->len++;
term = PC;
break;
case '|':
if (alt_label) {
EMIT(alt_label, REL(alt_label, PC) + 1);
EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
}
INSERT_CODE(start, 2, PC);
EMIT(PC++, Jmp);
alt_label = PC++;
EMIT(start, Split);
EMIT(start + 1, REL(start, PC));
EMIT_CHECKED(start + 1, REL(start, PC));
prog->len += 2;
term = PC;
break;
@ -192,9 +200,9 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
}
if (alt_label) {
EMIT(alt_label, REL(alt_label, PC) + 1);
EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
}
return re;
return err ? NULL : re;
}
int re1_5_sizecode(const char *re)

34
tests/extmod/ure_limit.py Normal file
View File

@ -0,0 +1,34 @@
# Test overflow in ure.compile output code.
try:
import ure as re
except ImportError:
print("SKIP")
raise SystemExit
def test_re(r):
try:
re.compile(r)
except:
print("Error")
# too many chars in []
test_re("[" + "a" * 256 + "]")
# too many groups
test_re("(a)" * 256)
# jump too big for ?
test_re("(" + "a" * 62 + ")?")
# jump too big for *
test_re("(" + "a" * 60 + ".)*")
test_re("(" + "a" * 60 + "..)*")
# jump too big for +
test_re("(" + "a" * 62 + ")+")
# jump too big for |
test_re("b" * 63 + "|a")

View File

@ -0,0 +1,7 @@
Error
Error
Error
Error
Error
Error
Error