extmod: Pull in upstream changes to re1.5; fixes bug, adds named class.

stackless
Damien George 2015-02-21 10:33:20 +00:00
parent 2e24034c3f
commit d09a5b51c2
4 changed files with 40 additions and 3 deletions

View File

@ -11,3 +11,23 @@ int _re1_5_classmatch(const char *pc, const char *sp)
}
return !is_positive;
}
int _re1_5_namedclassmatch(const char *pc, const char *sp)
{
// pc points to name of class
int off = (*pc >> 5) & 1;
if ((*pc | 0x20) == 'd') {
if (!(*sp >= '0' && *sp <= '9')) {
off ^= 1;
}
} else if ((*pc | 0x20) == 's') {
if (!(*sp == ' ' || (*sp >= '\t' && *sp <= '\r'))) {
off ^= 1;
}
} else { // w
if (!((*sp >= 'A' && *sp <= 'Z') || (*sp >= 'a' && *sp <= 'z') || (*sp >= '0' && *sp <= '9') || *sp == '_')) {
off ^= 1;
}
}
return off;
}

View File

@ -78,6 +78,13 @@ const char *_compilecode(const char *re, ByteProg *prog)
switch (*re) {
case '\\':
re++;
if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') {
term = pc;
EMIT(pc++, NamedClass);
EMIT(pc++, *re);
prog->len++;
break;
}
default:
term = pc;
EMIT(pc++, Char);
@ -112,11 +119,12 @@ const char *_compilecode(const char *re, ByteProg *prog)
EMIT(term + 1, cnt);
break;
}
case '(':
case '(': {
term = pc;
int sub = ++prog->sub;
EMIT(pc++, Save);
EMIT(pc++, 2 * ++prog->sub);
EMIT(pc++, 2 * sub);
prog->len++;
prog->bytelen = pc;
@ -124,10 +132,11 @@ const char *_compilecode(const char *re, ByteProg *prog)
pc = prog->bytelen;
EMIT(pc++, Save);
EMIT(pc++, 2 * prog->sub + 1);
EMIT(pc++, 2 * sub + 1);
prog->len++;
break;
}
case '?':
insert_code(code, term, 2, &pc);
EMIT(term, Split);

View File

@ -82,6 +82,7 @@ enum /* Inst.opcode */
Any,
Class,
ClassNot,
NamedClass,
ASSERTS = 0x50,
Bol = ASSERTS,
@ -145,5 +146,6 @@ int re1_5_compilecode(ByteProg *prog, const char *re);
void re1_5_dumpcode(ByteProg *prog);
void cleanmarks(ByteProg *prog);
int _re1_5_classmatch(const char *pc, const char *sp);
int _re1_5_namedclassmatch(const char *pc, const char *sp);
#endif /*_RE1_5_REGEXP__H*/

View File

@ -30,6 +30,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
pc += *(unsigned char*)pc * 2 + 1;
sp++;
continue;
case NamedClass:
if (!_re1_5_namedclassmatch(pc, sp))
return 0;
pc++;
sp++;
continue;
case Match:
return 1;
case Jmp: