X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fregex.c;h=ce230848facdd0ba7b1709dc311daa23dd66312b;hb=9f87c6fe6552ce8b2236a07307d2f825b0e4c439;hp=31a13845d500d16d03d38579ee635cab28d65d3f;hpb=5edfa74e0f4b9830b4a3c8ac0662bde60c4ea8be;p=libucw.git diff --git a/lib/regex.c b/lib/regex.c index 31a13845..ce230848 100644 --- a/lib/regex.c +++ b/lib/regex.c @@ -1,32 +1,49 @@ /* * Sherlock Library -- Regular Expressions * - * (c) 1997 Martin Mares, + * (c) 1997 Martin Mares + * (c) 2001 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. */ +#include "lib/lib.h" +#include "lib/chartype.h" + #include #include #include #include -#include "lib.h" - #define INITIAL_MEM 1024 /* Initial space allocated for each pattern */ +#define CHAR_SET_SIZE 256 /* How many characters in the character set. */ struct regex { struct re_pattern_buffer buf; struct re_registers regs; /* Must not change between re_match() calls */ + int len_cache; }; regex * -rx_compile(byte *p) +rx_compile(byte *p, int icase) { - regex *r = xmalloc(sizeof(regex)); + regex *r = xmalloc_zero(sizeof(regex)); const char *msg; - bzero(r, sizeof(*r)); r->buf.buffer = xmalloc(INITIAL_MEM); r->buf.allocated = INITIAL_MEM; + if (icase) + { + unsigned i; + r->buf.translate = xmalloc (CHAR_SET_SIZE); + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + r->buf.translate[i] = Cupcase(i); + } + else + r->buf.translate = NULL; + re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); msg = re_compile_pattern(p, strlen(p), &r->buf); if (!msg) return r; @@ -36,15 +53,18 @@ rx_compile(byte *p) void rx_free(regex *r) { - free(r->buf.buffer); - free(r); + xfree(r->buf.buffer); + if (r->buf.translate) + xfree(r->buf.translate); + xfree(r); } int rx_match(regex *r, byte *s) { - uns len = strlen(s); + int len = strlen(s); + r->len_cache = len; if (re_match(&r->buf, s, len, 0, &r->regs) < 0) return 0; if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */ @@ -67,14 +87,19 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) by++; if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */ { - int j = *by++ - '0'; - byte *s = src + r->regs.start[j]; - int i = r->regs.end[j] - r->regs.start[j]; - if (dest + i >= end) - return -1; - memcpy(dest, s, i); - dest += i; - continue; + uns j = *by++ - '0'; + if (j < r->regs.num_regs) + { + byte *s = src + r->regs.start[j]; + uns i = r->regs.end[j] - r->regs.start[j]; + if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache) + return -1; + if (dest + i >= end) + return -1; + memcpy(dest, s, i); + dest += i; + continue; + } } } if (dest < end)