modure: Update to re1.5 v0.6.1, fixed and extended character class support.

This commit is contained in:
Paul Sokolovsky 2014-10-16 13:56:13 +03:00
parent 391db8669b
commit 297d8469b8
6 changed files with 35 additions and 9 deletions

View File

@ -3,9 +3,11 @@
int _re1_5_classmatch(const char *pc, const char *sp)
{
// pc points to "cnt" byte after opcode
int is_positive = (pc[-1] == Class);
int cnt = *pc++;
while (cnt--) {
if (!(*sp >= *pc && *sp <= pc[1])) return 0;
if (*sp >= *pc && *sp <= pc[1]) return is_positive;
pc += 2;
}
return 1;
}
return !is_positive;
}

View File

@ -48,6 +48,7 @@ int re1_5_sizecode(const char *re)
case '[': {
pc += 2;
re++;
if (*re == '^') re++;
while (*re != ']') {
if (!*re) return -1;
if (re[1] == '-') {
@ -91,10 +92,15 @@ const char *_compilecode(const char *re, ByteProg *prog)
case '[': {
int cnt;
term = pc;
EMIT(pc++, Class);
re++;
if (*re == '^') {
EMIT(pc++, ClassNot);
re++;
} else {
EMIT(pc++, Class);
}
pc++; // Skip # of pair byte
prog->len++;
re++;
for (cnt = 0; *re != ']'; re++, cnt++) {
if (!*re) return NULL;
EMIT(pc++, *re);

View File

@ -32,9 +32,11 @@ void re1_5_dumpcode(ByteProg *prog)
case Any:
printf("any\n");
break;
case Class: {
int num = code[pc++];
printf("class %d", num);
case Class:
case ClassNot: {
int num = code[pc];
printf("class%s %d", (code[pc - 1] == ClassNot ? "not" : ""), num);
pc++;
while (num--) {
printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
pc += 2;

View File

@ -81,6 +81,7 @@ enum /* Inst.opcode */
Char = CONSUMERS,
Any,
Class,
ClassNot,
ASSERTS = 0x50,
Bol = ASSERTS,

View File

@ -24,6 +24,7 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
sp++;
continue;
case Class:
case ClassNot:
if (!_re1_5_classmatch(pc, sp))
return 0;
pc += *(unsigned char*)pc * 2 + 1;

View File

@ -20,13 +20,27 @@ try:
except IndexError:
print("IndexError")
r = re.compile("[a-c]")
r = re.compile("[a-cu-z]")
m = r.match("a")
print(m.group(0))
m = r.match("z")
print(m.group(0))
m = r.match("d")
print(m)
m = r.match("A")
print(m)
print("===")
r = re.compile("[^a-cu-z]")
m = r.match("a")
print(m)
m = r.match("z")
print(m)
m = r.match("d")
print(m.group(0))
m = r.match("A")
print(m.group(0))
r = re.compile("o+")
m = r.search("foobar")