From: Pavel Charvat Date: Tue, 27 Jan 2009 23:48:36 +0000 (+0100) Subject: Merge branch 'v3.12.4' X-Git-Tag: holmes-import~118^2~6 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=1cf8ac51f5495ccd5187dc220ffc69e95d6e0cfc;p=libucw.git Merge branch 'v3.12.4' Conflicts: centrum/cz/cf/watson centrum/cz/doc/provoz.wiki lib/Makefile search/refs.c ucw/default.cfg ucw/perl/UCW/Configure/C.pm --- 1cf8ac51f5495ccd5187dc220ffc69e95d6e0cfc diff --cc ucw/default.cfg index f042800f,00000000..58f7e594 mode 100644,000000..100644 --- a/ucw/default.cfg +++ b/ucw/default.cfg @@@ -1,54 -1,0 +1,59 @@@ +# Configuration variables of the UCW library and their default values +# (c) 2005--2008 Martin Mares + +# Version of the whole package +Set("SHERLOCK_VERSION" => "3.99.1"); +Set("SHERLOCK_VERSION_CODE" => 3099001); +Set("UCW_VERSION" => Get("SHERLOCK_VERSION")); +Set("UCW_VERSION_CODE" => Get("SHERLOCK_VERSION_CODE")); + +# Compile everything with debug information and ASSERT's +UnSet("CONFIG_DEBUG"); + +# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages) +UnSet("CONFIG_EXACT_CPU"); + +# Support files >2GB +Set("CONFIG_LARGE_FILES"); + +# Use shared libraries +UnSet("CONFIG_SHARED"); + ++# If your system doesn't contain GNU libc 2.3 or newer, it's recommended to let Sherlock ++# use its own regex library (a copy of the glibc one), because the default regex library ++# is likely to be crappy. ++Set("CONFIG_OWN_REGEX"); ++ +# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy +# of GNU libc's getopt. This should not be necessary on GNU libc. +UnSet("CONFIG_OWN_GETOPT"); + +# Install libraries and their API includes +UnSet("CONFIG_INSTALL_API"); + +# Build with support for multi-threaded programs +Set("CONFIG_UCW_THREADS" => 1); + +# Include Perl modules +Set("CONFIG_UCW_PERL" => 1); + +# Include Perl modules written in C +UnSet("CONFIG_UCW_PERL_MODULES"); + +# Include support utilities for shell scripts +Set("CONFIG_UCW_SHELL_UTILS" => 1); + +# Include utilities +Set("CONFIG_UCW_UTILS" => 1); + +# Default configuration file +UnSet("DEFAULT_CONFIG"); + +# Environment variable with configuration file +UnSet("ENV_VAR_CONFIG"); + +# Use obsolete URL escaping rules (if you need behavior identical to the older versions of libucw) +UnSet("CONFIG_URL_ESCAPE_COMPAT"); + +# Return success +1; diff --cc ucw/regex.c index 6ead4648,00000000..f74f33e1 mode 100644,000000..100644 --- a/ucw/regex.c +++ b/ucw/regex.c @@@ -1,351 -1,0 +1,355 @@@ +/* + * UCW Library -- Interface to Regular Expression Libraries + * + * (c) 1997--2004 Martin Mares + * (c) 2001 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "ucw/lib.h" +#include "ucw/chartype.h" +#include "ucw/hashfunc.h" +#include "ucw/regex.h" + +#include +#include + - #ifdef CONFIG_POSIX_REGEX ++#if defined(CONFIG_OWN_REGEX) || defined(CONFIG_POSIX_REGEX) + +/* POSIX regular expression library */ + ++#ifdef CONFIG_OWN_REGEX ++#include "lib/regex/regex-sh.h" ++#else +#include ++#endif + +struct regex { + regex_t rx; + regmatch_t matches[10]; +}; + +regex * +rx_compile(const char *p, int icase) +{ + regex *r = xmalloc_zero(sizeof(regex)); + + int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0)); + if (err) + { + char msg[256]; + regerror(err, &r->rx, msg, sizeof(msg)-1); + /* regfree(&r->rx) not needed */ + die("Error parsing regular expression `%s': %s", p, msg); + } + return r; +} + +void +rx_free(regex *r) +{ + regfree(&r->rx); + xfree(r); +} + +int +rx_match(regex *r, const char *s) +{ + int err = regexec(&r->rx, s, 10, r->matches, 0); + if (!err) + { + /* regexec doesn't support anchored expressions, so we have to check ourselves that the full string is matched */ + return !(r->matches[0].rm_so || s[r->matches[0].rm_eo]); + } + else if (err == REG_NOMATCH) + return 0; + else if (err == REG_ESPACE) + die("Regex matching ran out of memory"); + else + die("Regex matching failed with unknown error %d", err); +} + +int +rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen) +{ + char *end = dest + destlen - 1; + + if (!rx_match(r, src)) + return 0; + + while (*by) + { + if (*by == '\\') + { + by++; + if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */ + { + uns j = *by++ - '0'; + if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0) + { + const char *s = src + r->matches[j].rm_so; + uns i = r->matches[j].rm_eo - r->matches[j].rm_so; + if (dest + i >= end) + return -1; + memcpy(dest, s, i); + dest += i; + continue; + } + } + } + if (dest < end) + *dest++ = *by++; + else + return -1; + } + *dest = 0; + return 1; +} + +#elif defined(CONFIG_PCRE) + +/* PCRE library */ + +#include + +struct regex { + pcre *rx; + pcre_extra *extra; + uns match_array_size; + uns real_matches; + int matches[0]; /* (max_matches+1) pairs (pos,len) plus some workspace */ +}; + +regex * +rx_compile(const char *p, int icase) +{ + const char *err; + int errpos, match_array_size, eno; + + pcre *rx = pcre_compile(p, PCRE_ANCHORED | PCRE_EXTRA | (icase ? PCRE_CASELESS : 0), &err, &errpos, NULL); + if (!rx) + die("Error parsing regular expression `%s': %s at position %d", p, err, errpos); + eno = pcre_fullinfo(rx, NULL, PCRE_INFO_CAPTURECOUNT, &match_array_size); + if (eno) + die("Internal error: pcre_fullinfo() failed with error %d", eno); + match_array_size = 3*(match_array_size+1); + regex *r = xmalloc_zero(sizeof(regex) + match_array_size * sizeof(int)); + r->rx = rx; + r->match_array_size = match_array_size; + r->extra = pcre_study(r->rx, 0, &err); + if (err) + die("Error studying regular expression `%s': %s", p, err); + return r; +} + +void +rx_free(regex *r) +{ + xfree(r->rx); + xfree(r->extra); + xfree(r); +} + +int +rx_match(regex *r, const char *s) +{ + int len = str_len(s); + int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size); + if (err >= 0) + { + r->real_matches = err; + /* need to check that the full string matches */ + return !(r->matches[0] || s[r->matches[1]]); + } + else if (err == PCRE_ERROR_NOMATCH) + return 0; + else if (err == PCRE_ERROR_NOMEMORY) + die("Regex matching ran out of memory"); + else + die("Regex matching failed with unknown error %d", err); +} + +int +rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen) +{ + char *end = dest + destlen - 1; + + if (!rx_match(r, src)) + return 0; + + while (*by) + { + if (*by == '\\') + { + by++; + if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */ + { + uns j = *by++ - '0'; + if (j < r->real_matches && r->matches[2*j] >= 0) + { + char *s = src + r->matches[2*j]; + uns i = r->matches[2*j+1] - r->matches[2*j]; + if (dest + i >= end) + return -1; + memcpy(dest, s, i); + dest += i; + continue; + } + } + } + if (dest < end) + *dest++ = *by++; + else + return -1; + } + *dest = 0; + return 1; +} + +#else + +/* BSD regular expression library */ + +#include + +#define INITIAL_MEM 1024 /* Initial space allocated for each pattern */ +#define CHAR_SET_SIZE 256 /* How many characters in the character set. */ + +struct regex { + struct re_pattern_buffer buf; + struct re_registers regs; /* Must not change between re_match() calls */ + int len_cache; +}; + +regex * +rx_compile(const char *p, int icase) +{ + regex *r = xmalloc_zero(sizeof(regex)); + const char *msg; + + r->buf.buffer = xmalloc(INITIAL_MEM); + r->buf.allocated = INITIAL_MEM; + if (icase) + { + unsigned i; + r->buf.translate = xmalloc (CHAR_SET_SIZE); + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + r->buf.translate[i] = Cupcase(i); + } + else + r->buf.translate = NULL; + re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); + msg = re_compile_pattern(p, strlen(p), &r->buf); + if (!msg) + return r; + die("Error parsing pattern `%s': %s", p, msg); +} + +void +rx_free(regex *r) +{ + xfree(r->buf.buffer); + if (r->buf.translate) + xfree(r->buf.translate); + xfree(r); +} + +int +rx_match(regex *r, const char *s) +{ + int len = strlen(s); + + r->len_cache = len; + if (re_match(&r->buf, s, len, 0, &r->regs) < 0) + return 0; + if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */ + return 0; + return 1; +} + +int +rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen) +{ + char *end = dest + destlen - 1; + + if (!rx_match(r, src)) + return 0; + + while (*by) + { + if (*by == '\\') + { + by++; + if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */ + { + uns j = *by++ - '0'; + if (j < r->regs.num_regs) + { + const char *s = src + r->regs.start[j]; + uns i = r->regs.end[j] - r->regs.start[j]; + if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache) + return -1; + if (dest + i >= end) + return -1; + memcpy(dest, s, i); + dest += i; + continue; + } + } + } + if (dest < end) + *dest++ = *by++; + else + return -1; + } + *dest = 0; + return 1; +} + +#endif + +#ifdef TEST + +int main(int argc, char **argv) +{ + regex *r; + char buf1[4096], buf2[4096]; + int opt_i = 0; + + if (!strcmp(argv[1], "-i")) + { + opt_i = 1; + argv++; + argc--; + } + r = rx_compile(argv[1], opt_i); + while (fgets(buf1, sizeof(buf1), stdin)) + { + char *p = strchr(buf1, '\n'); + if (p) + *p = 0; + if (argc == 2) + { + if (rx_match(r, buf1)) + puts("MATCH"); + else + puts("NO MATCH"); + } + else + { + int i = rx_subst(r, argv[2], buf1, buf2, sizeof(buf2)); + if (i < 0) + puts("OVERFLOW"); + else if (!i) + puts("NO MATCH"); + else + puts(buf2); + } + } + rx_free(r); +} + +#endif