From 85650bf1abc72f6b3607bf5946c190583117495f Mon Sep 17 00:00:00 2001 From: Scott Shawcroft Date: Thu, 5 Oct 2023 13:35:07 -0700 Subject: [PATCH] Fix re unescaping --- extmod/modure.c | 4 ++++ lib/re1.5/compilecode.c | 34 ++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/extmod/modure.c b/extmod/modure.c index 62aeed9b28..eb2dee2666 100644 --- a/extmod/modure.c +++ b/extmod/modure.c @@ -444,7 +444,11 @@ STATIC MP_DEFINE_CONST_DICT(re_locals_dict, re_locals_dict_table); STATIC MP_DEFINE_CONST_OBJ_TYPE( re_type, + #if CIRCUITPY + MP_QSTR_re, + #else MP_QSTR_ure, + #endif MP_TYPE_FLAG_NONE, print, re_print, locals_dict, &re_locals_dict diff --git a/lib/re1.5/compilecode.c b/lib/re1.5/compilecode.c index e6690d56b3..081c57441e 100644 --- a/lib/re1.5/compilecode.c +++ b/lib/re1.5/compilecode.c @@ -14,6 +14,27 @@ #define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err)) #define PC (prog->bytelen) +static char unescape(char c) { + switch (c) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + default: + return c; + } +} + static void _emit_checked(int at, char *code, int val, bool *err) { *err |= val != (int8_t)val; if (code) { @@ -34,14 +55,16 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) case '\\': re++; if (!*re) return NULL; // Trailing backslash + term = PC; if (MATCH_NAMED_CLASS_CHAR(*re)) { - term = PC; EMIT(PC++, NamedClass); EMIT(PC++, *re); - prog->len++; - break; + } else { + EMIT(PC++, Char); + EMIT(PC++, unescape(*re)); } - MP_FALLTHROUGH + prog->len++; + break; default: term = PC; EMIT(PC++, Char); @@ -74,7 +97,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) c = RE15_CLASS_NAMED_CLASS_INDICATOR; goto emit_char_pair; } else { - // CIRCUITPY TODO: handle unescape here again PR #1544 + c = unescape(c); } } if (!c) return NULL; @@ -84,7 +107,6 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) emit_char_pair: EMIT(PC++, c); EMIT(PC++, *re); - // CIRCUITPY TODO handle unescape here again PR #1544 } EMIT_CHECKED(term + 1, cnt); break;