modure: Update to re1.5 v0.6, support for char sets/classes ([a-c]).
This commit is contained in:
parent
d27c0bb3aa
commit
95908b0f50
@ -38,7 +38,7 @@
|
||||
|
||||
#if MICROPY_PY_URE
|
||||
|
||||
#include "re1.5/regexp.h"
|
||||
#include "re1.5/re1.5.h"
|
||||
|
||||
#define FLAG_DEBUG 0x1000
|
||||
|
||||
@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = {
|
||||
#include "re1.5/compilecode.c"
|
||||
#include "re1.5/dumpcode.c"
|
||||
#include "re1.5/recursiveloop.c"
|
||||
#include "re1.5/charclass.c"
|
||||
|
||||
#endif //MICROPY_PY_URE
|
||||
|
11
extmod/re1.5/charclass.c
Normal file
11
extmod/re1.5/charclass.c
Normal file
@ -0,0 +1,11 @@
|
||||
#include "re1.5.h"
|
||||
|
||||
int _re1_5_classmatch(const char *pc, const char *sp)
|
||||
{
|
||||
// pc points to "cnt" byte after opcode
|
||||
int cnt = *pc++;
|
||||
while (cnt--) {
|
||||
if (!(*sp >= *pc && *sp <= pc[1])) return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "regexp.h"
|
||||
#include "re1.5.h"
|
||||
|
||||
static void insert_code(char *code, int at, int num, int *pc)
|
||||
{
|
||||
@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re)
|
||||
break;
|
||||
case ')':
|
||||
break;
|
||||
case '[': {
|
||||
pc += 2;
|
||||
re++;
|
||||
while (*re != ']') {
|
||||
if (!*re) return -1;
|
||||
if (re[1] == '-') {
|
||||
re += 2;
|
||||
}
|
||||
pc += 2;
|
||||
re++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog)
|
||||
EMIT(pc++, Any);
|
||||
prog->len++;
|
||||
break;
|
||||
case '[': {
|
||||
int cnt;
|
||||
term = pc;
|
||||
EMIT(pc++, Class);
|
||||
pc++; // Skip # of pair byte
|
||||
prog->len++;
|
||||
re++;
|
||||
for (cnt = 0; *re != ']'; re++, cnt++) {
|
||||
if (!*re) return NULL;
|
||||
EMIT(pc++, *re);
|
||||
if (re[1] == '-') {
|
||||
re += 2;
|
||||
}
|
||||
EMIT(pc++, *re);
|
||||
}
|
||||
EMIT(term + 1, cnt);
|
||||
break;
|
||||
}
|
||||
case '(':
|
||||
term = pc;
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "regexp.h"
|
||||
#include "re1.5.h"
|
||||
|
||||
void re1_5_dumpcode(ByteProg *prog)
|
||||
{
|
||||
@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog)
|
||||
case Any:
|
||||
printf("any\n");
|
||||
break;
|
||||
case Class: {
|
||||
int num = code[pc++];
|
||||
printf("class %d", num);
|
||||
while (num--) {
|
||||
printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
|
||||
pc += 2;
|
||||
}
|
||||
printf("\n");
|
||||
break;
|
||||
}
|
||||
case Match:
|
||||
printf("match\n");
|
||||
break;
|
||||
|
@ -80,14 +80,18 @@ enum /* Inst.opcode */
|
||||
CONSUMERS = 1,
|
||||
Char = CONSUMERS,
|
||||
Any,
|
||||
Class,
|
||||
|
||||
ASSERTS = 0x50,
|
||||
Bol = ASSERTS,
|
||||
Eol,
|
||||
|
||||
// Instructions which take relative offset as arg
|
||||
JUMPS = 0x60,
|
||||
Jmp = JUMPS,
|
||||
Split,
|
||||
RSplit,
|
||||
|
||||
// Other (special) instructions
|
||||
Save = 0x7e,
|
||||
Match = 0x7f,
|
||||
@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re);
|
||||
int re1_5_compilecode(ByteProg *prog, const char *re);
|
||||
void re1_5_dumpcode(ByteProg *prog);
|
||||
void cleanmarks(ByteProg *prog);
|
||||
int _re1_5_classmatch(const char *pc, const char *sp);
|
||||
|
||||
#endif /*_RE1_5_REGEXP__H*/
|
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "regexp.h"
|
||||
#include "re1.5.h"
|
||||
|
||||
static int
|
||||
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
|
||||
@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
|
||||
case Any:
|
||||
sp++;
|
||||
continue;
|
||||
case Class:
|
||||
if (!_re1_5_classmatch(pc, sp))
|
||||
return 0;
|
||||
pc += *(unsigned char*)pc * 2 + 1;
|
||||
sp++;
|
||||
continue;
|
||||
case Match:
|
||||
return 1;
|
||||
case Jmp:
|
||||
|
@ -20,6 +20,13 @@ try:
|
||||
except IndexError:
|
||||
print("IndexError")
|
||||
|
||||
r = re.compile("[a-c]")
|
||||
m = r.match("a")
|
||||
print(m.group(0))
|
||||
m = r.match("d")
|
||||
print(m)
|
||||
m = r.match("A")
|
||||
print(m)
|
||||
|
||||
r = re.compile("o+")
|
||||
m = r.search("foobar")
|
||||
|
Loading…
Reference in New Issue
Block a user