modure: Update to re1.5 v0.6, support for char sets/classes ([a-c]).

This commit is contained in:
Paul Sokolovsky 2014-10-15 04:43:13 +03:00
parent d27c0bb3aa
commit 95908b0f50
7 changed files with 74 additions and 4 deletions

View File

@ -38,7 +38,7 @@
#if MICROPY_PY_URE
#include "re1.5/regexp.h"
#include "re1.5/re1.5.h"
#define FLAG_DEBUG 0x1000
@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = {
#include "re1.5/compilecode.c"
#include "re1.5/dumpcode.c"
#include "re1.5/recursiveloop.c"
#include "re1.5/charclass.c"
#endif //MICROPY_PY_URE

11
extmod/re1.5/charclass.c Normal file
View File

@ -0,0 +1,11 @@
#include "re1.5.h"
int _re1_5_classmatch(const char *pc, const char *sp)
{
// pc points to "cnt" byte after opcode
int cnt = *pc++;
while (cnt--) {
if (!(*sp >= *pc && *sp <= pc[1])) return 0;
}
return 1;
}

View File

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "regexp.h"
#include "re1.5.h"
static void insert_code(char *code, int at, int num, int *pc)
{
@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re)
break;
case ')':
break;
case '[': {
pc += 2;
re++;
while (*re != ']') {
if (!*re) return -1;
if (re[1] == '-') {
re += 2;
}
pc += 2;
re++;
}
}
}
}
@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog)
EMIT(pc++, Any);
prog->len++;
break;
case '[': {
int cnt;
term = pc;
EMIT(pc++, Class);
pc++; // Skip # of pair byte
prog->len++;
re++;
for (cnt = 0; *re != ']'; re++, cnt++) {
if (!*re) return NULL;
EMIT(pc++, *re);
if (re[1] == '-') {
re += 2;
}
EMIT(pc++, *re);
}
EMIT(term + 1, cnt);
break;
}
case '(':
term = pc;

View File

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "regexp.h"
#include "re1.5.h"
void re1_5_dumpcode(ByteProg *prog)
{
@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog)
case Any:
printf("any\n");
break;
case Class: {
int num = code[pc++];
printf("class %d", num);
while (num--) {
printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
pc += 2;
}
printf("\n");
break;
}
case Match:
printf("match\n");
break;

View File

@ -80,14 +80,18 @@ enum /* Inst.opcode */
CONSUMERS = 1,
Char = CONSUMERS,
Any,
Class,
ASSERTS = 0x50,
Bol = ASSERTS,
Eol,
// Instructions which take relative offset as arg
JUMPS = 0x60,
Jmp = JUMPS,
Split,
RSplit,
// Other (special) instructions
Save = 0x7e,
Match = 0x7f,
@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re);
int re1_5_compilecode(ByteProg *prog, const char *re);
void re1_5_dumpcode(ByteProg *prog);
void cleanmarks(ByteProg *prog);
int _re1_5_classmatch(const char *pc, const char *sp);
#endif /*_RE1_5_REGEXP__H*/

View File

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "regexp.h"
#include "re1.5.h"
static int
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
case Any:
sp++;
continue;
case Class:
if (!_re1_5_classmatch(pc, sp))
return 0;
pc += *(unsigned char*)pc * 2 + 1;
sp++;
continue;
case Match:
return 1;
case Jmp:

View File

@ -20,6 +20,13 @@ try:
except IndexError:
print("IndexError")
r = re.compile("[a-c]")
m = r.match("a")
print(m.group(0))
m = r.match("d")
print(m)
m = r.match("A")
print(m)
r = re.compile("o+")
m = r.search("foobar")