Fix re unescaping

This commit is contained in:
Scott Shawcroft 2023-10-05 13:35:07 -07:00
parent 49511b0746
commit 85650bf1ab
No known key found for this signature in database
GPG Key ID: 0DFD512649C052DA
2 changed files with 32 additions and 6 deletions

View File

@ -444,7 +444,11 @@ STATIC MP_DEFINE_CONST_DICT(re_locals_dict, re_locals_dict_table);
STATIC MP_DEFINE_CONST_OBJ_TYPE( STATIC MP_DEFINE_CONST_OBJ_TYPE(
re_type, re_type,
#if CIRCUITPY
MP_QSTR_re,
#else
MP_QSTR_ure, MP_QSTR_ure,
#endif
MP_TYPE_FLAG_NONE, MP_TYPE_FLAG_NONE,
print, re_print, print, re_print,
locals_dict, &re_locals_dict locals_dict, &re_locals_dict

View File

@ -14,6 +14,27 @@
#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err)) #define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err))
#define PC (prog->bytelen) #define PC (prog->bytelen)
static char unescape(char c) {
switch (c) {
case 'a':
return '\a';
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
default:
return c;
}
}
static void _emit_checked(int at, char *code, int val, bool *err) { static void _emit_checked(int at, char *code, int val, bool *err) {
*err |= val != (int8_t)val; *err |= val != (int8_t)val;
if (code) { if (code) {
@ -34,14 +55,16 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
case '\\': case '\\':
re++; re++;
if (!*re) return NULL; // Trailing backslash if (!*re) return NULL; // Trailing backslash
term = PC;
if (MATCH_NAMED_CLASS_CHAR(*re)) { if (MATCH_NAMED_CLASS_CHAR(*re)) {
term = PC;
EMIT(PC++, NamedClass); EMIT(PC++, NamedClass);
EMIT(PC++, *re); EMIT(PC++, *re);
prog->len++; } else {
break; EMIT(PC++, Char);
EMIT(PC++, unescape(*re));
} }
MP_FALLTHROUGH prog->len++;
break;
default: default:
term = PC; term = PC;
EMIT(PC++, Char); EMIT(PC++, Char);
@ -74,7 +97,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
c = RE15_CLASS_NAMED_CLASS_INDICATOR; c = RE15_CLASS_NAMED_CLASS_INDICATOR;
goto emit_char_pair; goto emit_char_pair;
} else { } else {
// CIRCUITPY TODO: handle unescape here again PR #1544 c = unescape(c);
} }
} }
if (!c) return NULL; if (!c) return NULL;
@ -84,7 +107,6 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
emit_char_pair: emit_char_pair:
EMIT(PC++, c); EMIT(PC++, c);
EMIT(PC++, *re); EMIT(PC++, *re);
// CIRCUITPY TODO handle unescape here again PR #1544
} }
EMIT_CHECKED(term + 1, cnt); EMIT_CHECKED(term + 1, cnt);
break; break;