diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c index 01d3d14988..81c0bceef7 100644 --- a/extmod/re1.5/compilecode.c +++ b/extmod/re1.5/compilecode.c @@ -8,9 +8,9 @@ ((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num) #define REL(at, to) (to - at - 2) #define EMIT(at, byte) (code ? (code[at] = byte) : (at)) +#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err)) #define PC (prog->bytelen) - static char unescape(char c) { switch (c) { case 'a': @@ -33,9 +33,17 @@ static char unescape(char c) { } +static void _emit_checked(int at, char *code, int val, bool *err) { + *err |= val != (int8_t)val; + if (code) { + code[at] = val; + } +} + static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) { char *code = sizecode ? NULL : prog->insts; + bool err = false; int start = PC; int term = PC; int alt_label = 0; @@ -96,7 +104,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) EMIT(PC++, *re); } } - EMIT(term + 1, cnt); + EMIT_CHECKED(term + 1, cnt); break; } case '(': { @@ -107,7 +115,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) if (capture) { sub = ++prog->sub; EMIT(PC++, Save); - EMIT(PC++, 2 * sub); + EMIT_CHECKED(PC++, 2 * sub); prog->len++; } else { re += 2; @@ -118,7 +126,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) if (capture) { EMIT(PC++, Save); - EMIT(PC++, 2 * sub + 1); + EMIT_CHECKED(PC++, 2 * sub + 1); prog->len++; } @@ -133,7 +141,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) } else { EMIT(term, Split); } - EMIT(term + 1, REL(term, PC)); + EMIT_CHECKED(term + 1, REL(term, PC)); prog->len++; term = PC; break; @@ -141,7 +149,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) if (PC == term) return NULL; // nothing to repeat INSERT_CODE(term, 2, PC); EMIT(PC, Jmp); - EMIT(PC + 1, REL(PC, term)); + EMIT_CHECKED(PC + 1, REL(PC, term)); PC += 2; if (re[1] == '?') { EMIT(term, RSplit); @@ -149,7 +157,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) } else { EMIT(term, Split); } - EMIT(term + 1, REL(term, PC)); + EMIT_CHECKED(term + 1, REL(term, PC)); prog->len += 2; term = PC; break; @@ -161,20 +169,20 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) } else { EMIT(PC, RSplit); } - EMIT(PC + 1, REL(PC, term)); + EMIT_CHECKED(PC + 1, REL(PC, term)); PC += 2; prog->len++; term = PC; break; case '|': if (alt_label) { - EMIT(alt_label, REL(alt_label, PC) + 1); + EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1); } INSERT_CODE(start, 2, PC); EMIT(PC++, Jmp); alt_label = PC++; EMIT(start, Split); - EMIT(start + 1, REL(start, PC)); + EMIT_CHECKED(start + 1, REL(start, PC)); prog->len += 2; term = PC; break; @@ -192,9 +200,9 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) } if (alt_label) { - EMIT(alt_label, REL(alt_label, PC) + 1); + EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1); } - return re; + return err ? NULL : re; } int re1_5_sizecode(const char *re) diff --git a/tests/extmod/ure_limit.py b/tests/extmod/ure_limit.py new file mode 100644 index 0000000000..99c6a818e8 --- /dev/null +++ b/tests/extmod/ure_limit.py @@ -0,0 +1,34 @@ +# Test overflow in ure.compile output code. + +try: + import ure as re +except ImportError: + print("SKIP") + raise SystemExit + + +def test_re(r): + try: + re.compile(r) + except: + print("Error") + + +# too many chars in [] +test_re("[" + "a" * 256 + "]") + +# too many groups +test_re("(a)" * 256) + +# jump too big for ? +test_re("(" + "a" * 62 + ")?") + +# jump too big for * +test_re("(" + "a" * 60 + ".)*") +test_re("(" + "a" * 60 + "..)*") + +# jump too big for + +test_re("(" + "a" * 62 + ")+") + +# jump too big for | +test_re("b" * 63 + "|a") diff --git a/tests/extmod/ure_limit.py.exp b/tests/extmod/ure_limit.py.exp new file mode 100644 index 0000000000..8353be536c --- /dev/null +++ b/tests/extmod/ure_limit.py.exp @@ -0,0 +1,7 @@ +Error +Error +Error +Error +Error +Error +Error