X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=270fb590b5e7fc8873afefc2d5c898c97a1d1287;hb=e371dcc1cd2857036374dd9597705faed0427006;hp=43ef8694ac2609a7b653e9253ae1d68e36ef1014;hpb=ad014f5f33ed7f3079c6b7e688d12baf708ee3b3;p=libucw.git diff --git a/lib/regex.c b/lib/regex.c index 43ef8694..270fb590 100644 --- a/lib/regex.c +++ b/lib/regex.c @@ -1,5 +1,5 @@ /* - * Sherlock Library -- Interface to Regular Expression Libraries + * UCW Library -- Interface to Regular Expression Libraries * * (c) 1997--2004 Martin Mares * (c) 2001 Robert Spalek @@ -14,114 +14,16 @@ #include #include -#include -#if 1 - -/* BSD regular expression library */ - -#include - -#define INITIAL_MEM 1024 /* Initial space allocated for each pattern */ -#define CHAR_SET_SIZE 256 /* How many characters in the character set. */ - -struct regex { - struct re_pattern_buffer buf; - struct re_registers regs; /* Must not change between re_match() calls */ - int len_cache; -}; - -regex * -rx_compile(byte *p, int icase) -{ - regex *r = xmalloc_zero(sizeof(regex)); - const char *msg; - - r->buf.buffer = xmalloc(INITIAL_MEM); - r->buf.allocated = INITIAL_MEM; - if (icase) - { - unsigned i; - r->buf.translate = xmalloc (CHAR_SET_SIZE); - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - r->buf.translate[i] = Cupcase(i); - } - else - r->buf.translate = NULL; - re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); - msg = re_compile_pattern(p, strlen(p), &r->buf); - if (!msg) - return r; - die("Error parsing pattern `%s': %s", p, msg); -} - -void -rx_free(regex *r) -{ - xfree(r->buf.buffer); - if (r->buf.translate) - xfree(r->buf.translate); - xfree(r); -} - -int -rx_match(regex *r, byte *s) -{ - int len = strlen(s); - - r->len_cache = len; - if (re_match(&r->buf, s, len, 0, &r->regs) < 0) - return 0; - if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */ - return 0; - return 1; -} - -int -rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) -{ - byte *end = dest + destlen - 1; - - if (!rx_match(r, src)) - return 0; - - while (*by) - { - if (*by == '\\') - { - by++; - if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */ - { - uns j = *by++ - '0'; - if (j < r->regs.num_regs) - { - byte *s = src + r->regs.start[j]; - uns i = r->regs.end[j] - r->regs.start[j]; - if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache) - return -1; - if (dest + i >= end) - return -1; - memcpy(dest, s, i); - dest += i; - continue; - } - } - } - if (dest < end) - *dest++ = *by++; - else - return -1; - } - *dest = 0; - return 1; -} - -#elif 0 +#if defined(CONFIG_OWN_REGEX) || defined(CONFIG_POSIX_REGEX) /* POSIX regular expression library */ +#ifdef CONFIG_OWN_REGEX +#include "lib/regex/regex-sh.h" +#else #include +#endif struct regex { regex_t rx; @@ -129,14 +31,14 @@ struct regex { }; regex * -rx_compile(byte *p, int icase) +rx_compile(const char *p, int icase) { regex *r = xmalloc_zero(sizeof(regex)); int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0)); if (err) { - byte msg[256]; + char msg[256]; regerror(err, &r->rx, msg, sizeof(msg)-1); /* regfree(&r->rx) not needed */ die("Error parsing regular expression `%s': %s", p, msg); @@ -152,7 +54,7 @@ rx_free(regex *r) } int -rx_match(regex *r, byte *s) +rx_match(regex *r, const char *s) { int err = regexec(&r->rx, s, 10, r->matches, 0); if (!err) @@ -169,9 +71,9 @@ rx_match(regex *r, byte *s) } int -rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) +rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen) { - byte *end = dest + destlen - 1; + char *end = dest + destlen - 1; if (!rx_match(r, src)) return 0; @@ -186,7 +88,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) uns j = *by++ - '0'; if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0) { - byte *s = src + r->matches[j].rm_so; + const char *s = src + r->matches[j].rm_so; uns i = r->matches[j].rm_eo - r->matches[j].rm_so; if (dest + i >= end) return -1; @@ -205,7 +107,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) return 1; } -#else +#elif defined(CONFIG_PCRE) /* PCRE library */ @@ -220,7 +122,7 @@ struct regex { }; regex * -rx_compile(byte *p, int icase) +rx_compile(const char *p, int icase) { const char *err; int errpos, match_array_size, eno; @@ -250,7 +152,7 @@ rx_free(regex *r) } int -rx_match(regex *r, byte *s) +rx_match(regex *r, const char *s) { int len = str_len(s); int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size); @@ -269,9 +171,9 @@ rx_match(regex *r, byte *s) } int -rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) +rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen) { - byte *end = dest + destlen - 1; + char *end = dest + destlen - 1; if (!rx_match(r, src)) return 0; @@ -286,7 +188,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) uns j = *by++ - '0'; if (j < r->real_matches && r->matches[2*j] >= 0) { - byte *s = src + r->matches[2*j]; + char *s = src + r->matches[2*j]; uns i = r->matches[2*j+1] - r->matches[2*j]; if (dest + i >= end) return -1; @@ -305,6 +207,111 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) return 1; } +#else + +/* BSD regular expression library */ + +#ifdef CONFIG_OWN_BSD_REGEX +#include "lib/regex/regex-sh.h" +#else +#include +#endif + +#define INITIAL_MEM 1024 /* Initial space allocated for each pattern */ +#define CHAR_SET_SIZE 256 /* How many characters in the character set. */ + +struct regex { + struct re_pattern_buffer buf; + struct re_registers regs; /* Must not change between re_match() calls */ + int len_cache; +}; + +regex * +rx_compile(const char *p, int icase) +{ + regex *r = xmalloc_zero(sizeof(regex)); + const char *msg; + + r->buf.buffer = xmalloc(INITIAL_MEM); + r->buf.allocated = INITIAL_MEM; + if (icase) + { + unsigned i; + r->buf.translate = xmalloc (CHAR_SET_SIZE); + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + r->buf.translate[i] = Cupcase(i); + } + else + r->buf.translate = NULL; + re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); + msg = re_compile_pattern(p, strlen(p), &r->buf); + if (!msg) + return r; + die("Error parsing pattern `%s': %s", p, msg); +} + +void +rx_free(regex *r) +{ + xfree(r->buf.buffer); + if (r->buf.translate) + xfree(r->buf.translate); + xfree(r); +} + +int +rx_match(regex *r, const char *s) +{ + int len = strlen(s); + + r->len_cache = len; + if (re_match(&r->buf, s, len, 0, &r->regs) < 0) + return 0; + if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */ + return 0; + return 1; +} + +int +rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen) +{ + char *end = dest + destlen - 1; + + if (!rx_match(r, src)) + return 0; + + while (*by) + { + if (*by == '\\') + { + by++; + if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */ + { + uns j = *by++ - '0'; + if (j < r->regs.num_regs) + { + const char *s = src + r->regs.start[j]; + uns i = r->regs.end[j] - r->regs.start[j]; + if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache) + return -1; + if (dest + i >= end) + return -1; + memcpy(dest, s, i); + dest += i; + continue; + } + } + } + if (dest < end) + *dest++ = *by++; + else + return -1; + } + *dest = 0; + return 1; +} + #endif #ifdef TEST @@ -312,7 +319,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) int main(int argc, char **argv) { regex *r; - byte buf1[4096], buf2[4096]; + char buf1[4096], buf2[4096]; int opt_i = 0; if (!strcmp(argv[1], "-i"))