2 * UCW Library -- Interface to Regular Expression Libraries
4 * (c) 1997--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2001 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
12 #include "lib/chartype.h"
13 #include "lib/hashfunc.h"
18 #if defined(CONFIG_OWN_REGEX) || defined(CONFIG_POSIX_REGEX)
20 /* POSIX regular expression library */
22 #ifdef CONFIG_OWN_REGEX
23 #include "lib/regex/regex-sh.h"
30 regmatch_t matches[10];
34 rx_compile(byte *p, int icase)
36 regex *r = xmalloc_zero(sizeof(regex));
38 int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0));
42 regerror(err, &r->rx, msg, sizeof(msg)-1);
43 /* regfree(&r->rx) not needed */
44 die("Error parsing regular expression `%s': %s", p, msg);
57 rx_match(regex *r, byte *s)
59 int err = regexec(&r->rx, s, 10, r->matches, 0);
62 /* regexec doesn't support anchored expressions, so we have to check ourselves that the full string is matched */
63 return !(r->matches[0].rm_so || s[r->matches[0].rm_eo]);
65 else if (err == REG_NOMATCH)
67 else if (err == REG_ESPACE)
68 die("Regex matching ran out of memory");
70 die("Regex matching failed with unknown error %d", err);
74 rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
76 byte *end = dest + destlen - 1;
78 if (!rx_match(r, src))
86 if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
89 if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0)
91 byte *s = src + r->matches[j].rm_so;
92 uns i = r->matches[j].rm_eo - r->matches[j].rm_so;
110 #elif defined(CONFIG_PCRE)
119 uns match_array_size;
121 int matches[0]; /* (max_matches+1) pairs (pos,len) plus some workspace */
125 rx_compile(byte *p, int icase)
128 int errpos, match_array_size, eno;
130 pcre *rx = pcre_compile(p, PCRE_ANCHORED | PCRE_EXTRA | (icase ? PCRE_CASELESS : 0), &err, &errpos, NULL);
132 die("Error parsing regular expression `%s': %s at position %d", p, err, errpos);
133 eno = pcre_fullinfo(rx, NULL, PCRE_INFO_CAPTURECOUNT, &match_array_size);
135 die("Internal error: pcre_fullinfo() failed with error %d", eno);
136 match_array_size = 3*(match_array_size+1);
137 regex *r = xmalloc_zero(sizeof(regex) + match_array_size * sizeof(int));
139 r->match_array_size = match_array_size;
140 r->extra = pcre_study(r->rx, 0, &err);
142 die("Error studying regular expression `%s': %s", p, err);
155 rx_match(regex *r, byte *s)
157 int len = str_len(s);
158 int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size);
161 r->real_matches = err;
162 /* need to check that the full string matches */
163 return !(r->matches[0] || s[r->matches[1]]);
165 else if (err == PCRE_ERROR_NOMATCH)
167 else if (err == PCRE_ERROR_NOMEMORY)
168 die("Regex matching ran out of memory");
170 die("Regex matching failed with unknown error %d", err);
174 rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
176 byte *end = dest + destlen - 1;
178 if (!rx_match(r, src))
186 if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
189 if (j < r->real_matches && r->matches[2*j] >= 0)
191 byte *s = src + r->matches[2*j];
192 uns i = r->matches[2*j+1] - r->matches[2*j];
212 /* BSD regular expression library */
214 #ifdef CONFIG_OWN_BSD_REGEX
215 #include "lib/regex/regex-sh.h"
220 #define INITIAL_MEM 1024 /* Initial space allocated for each pattern */
221 #define CHAR_SET_SIZE 256 /* How many characters in the character set. */
224 struct re_pattern_buffer buf;
225 struct re_registers regs; /* Must not change between re_match() calls */
230 rx_compile(byte *p, int icase)
232 regex *r = xmalloc_zero(sizeof(regex));
235 r->buf.buffer = xmalloc(INITIAL_MEM);
236 r->buf.allocated = INITIAL_MEM;
240 r->buf.translate = xmalloc (CHAR_SET_SIZE);
241 /* Map uppercase characters to corresponding lowercase ones. */
242 for (i = 0; i < CHAR_SET_SIZE; i++)
243 r->buf.translate[i] = Cupcase(i);
246 r->buf.translate = NULL;
247 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
248 msg = re_compile_pattern(p, strlen(p), &r->buf);
251 die("Error parsing pattern `%s': %s", p, msg);
257 xfree(r->buf.buffer);
258 if (r->buf.translate)
259 xfree(r->buf.translate);
264 rx_match(regex *r, byte *s)
269 if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
271 if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
277 rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
279 byte *end = dest + destlen - 1;
281 if (!rx_match(r, src))
289 if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
292 if (j < r->regs.num_regs)
294 byte *s = src + r->regs.start[j];
295 uns i = r->regs.end[j] - r->regs.start[j];
296 if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
319 int main(int argc, char **argv)
322 byte buf1[4096], buf2[4096];
325 if (!strcmp(argv[1], "-i"))
331 r = rx_compile(argv[1], opt_i);
332 while (fgets(buf1, sizeof(buf1), stdin))
334 char *p = strchr(buf1, '\n');
339 if (rx_match(r, buf1))
346 int i = rx_subst(r, argv[2], buf1, buf2, sizeof(buf2));