From 5edfa74e0f4b9830b4a3c8ac0662bde60c4ea8be Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 25 Nov 1997 11:19:39 +0000 Subject: [PATCH] Added regex library interface. --- lib/lib.h | 9 ++++ lib/regex.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 lib/regex.c diff --git a/lib/lib.h b/lib/lib.h index 137c2472..24d7e352 100644 --- a/lib/lib.h +++ b/lib/lib.h @@ -140,3 +140,12 @@ uns nextprime(uns); void init_timer(void); uns get_timer(void); + +/* regex.c */ + +typedef struct regex regex; + +regex *rx_compile(byte *r); +void rx_free(regex *r); +int rx_match(regex *r, byte *s); +int rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen); diff --git a/lib/regex.c b/lib/regex.c new file mode 100644 index 00000000..31a13845 --- /dev/null +++ b/lib/regex.c @@ -0,0 +1,123 @@ +/* + * Sherlock Library -- Regular Expressions + * + * (c) 1997 Martin Mares, + */ + +#include +#include +#include +#include + +#include "lib.h" + +#define INITIAL_MEM 1024 /* Initial space allocated for each pattern */ + +struct regex { + struct re_pattern_buffer buf; + struct re_registers regs; /* Must not change between re_match() calls */ +}; + +regex * +rx_compile(byte *p) +{ + regex *r = xmalloc(sizeof(regex)); + const char *msg; + + bzero(r, sizeof(*r)); + r->buf.buffer = xmalloc(INITIAL_MEM); + r->buf.allocated = INITIAL_MEM; + msg = re_compile_pattern(p, strlen(p), &r->buf); + if (!msg) + return r; + die("Error parsing pattern `%s': %s", p, msg); +} + +void +rx_free(regex *r) +{ + free(r->buf.buffer); + free(r); +} + +int +rx_match(regex *r, byte *s) +{ + uns len = strlen(s); + + if (re_match(&r->buf, s, len, 0, &r->regs) < 0) + return 0; + if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */ + return 0; + return 1; +} + +int +rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen) +{ + byte *end = dest + destlen - 1; + + if (!rx_match(r, src)) + return 0; + + while (*by) + { + if (*by == '\\') + { + by++; + if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */ + { + int j = *by++ - '0'; + byte *s = src + r->regs.start[j]; + int i = r->regs.end[j] - r->regs.start[j]; + if (dest + i >= end) + return -1; + memcpy(dest, s, i); + dest += i; + continue; + } + } + if (dest < end) + *dest++ = *by++; + else + return -1; + } + *dest = 0; + return 1; +} + +#ifdef TEST + +void main(int argc, char **argv) +{ + regex *r; + byte buf1[256], buf2[256]; + + r = rx_compile(argv[1]); + while (fgets(buf1, sizeof(buf1), stdin)) + { + char *p = strchr(buf1, '\n'); + if (p) + *p = 0; + if (argc == 2) + { + if (rx_match(r, buf1)) + puts("MATCH"); + else + puts("NO MATCH"); + } + else + { + int i = rx_subst(r, argv[2], buf1, buf2, sizeof(buf2)); + if (i < 0) + puts("OVERFLOW"); + else if (!i) + puts("NO MATCH"); + else + puts(buf2); + } + } + rx_free(r); +} + +#endif -- 2.39.5