Added functions for manipulating bit arrays. One day, an optimized

[libucw.git] / lib / regex.c
diff --git a/lib/regex.c b/lib/regex.c

index 31a13845d500d16d03d38579ee635cab28d65d3f..ce230848facdd0ba7b1709dc311daa23dd66312b 100644 (file)
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -1,32 +1,49 @@
  /*
   *     Sherlock Library -- Regular Expressions
   *
- *     (c) 1997 Martin Mares, <mj@atrey.karlin.mff.cuni.cz>
+ *     (c) 1997 Martin Mares <mj@ucw.cz>
+ *     (c) 2001 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
   */
  
+#include "lib/lib.h"
+#include "lib/chartype.h"
+
  #include <stdio.h>
  #include <string.h>
  #include <stdlib.h>
  #include <regex.h>
  
-#include "lib.h"
-
  #define INITIAL_MEM 1024               /* Initial space allocated for each pattern */
+#define CHAR_SET_SIZE 256              /* How many characters in the character set.  */
  
  struct regex {
    struct re_pattern_buffer buf;
    struct re_registers regs;            /* Must not change between re_match() calls */
+  int len_cache;
  };
  
  regex *
-rx_compile(byte *p)
+rx_compile(byte *p, int icase)
  {
-  regex *r = xmalloc(sizeof(regex));
+  regex *r = xmalloc_zero(sizeof(regex));
    const char *msg;
  
-  bzero(r, sizeof(*r));
    r->buf.buffer = xmalloc(INITIAL_MEM);
    r->buf.allocated = INITIAL_MEM;
+  if (icase)
+    {
+      unsigned i;
+      r->buf.translate = xmalloc (CHAR_SET_SIZE);
+      /* Map uppercase characters to corresponding lowercase ones.  */
+      for (i = 0; i < CHAR_SET_SIZE; i++)
+        r->buf.translate[i] = Cupcase(i);
+    }
+  else
+    r->buf.translate = NULL;
+  re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
    msg = re_compile_pattern(p, strlen(p), &r->buf);
    if (!msg)
      return r;
@@ -36,15 +53,18 @@ rx_compile(byte *p)
  void
  rx_free(regex *r)
  {
-  free(r->buf.buffer);
-  free(r);
+  xfree(r->buf.buffer);
+  if (r->buf.translate)
+    xfree(r->buf.translate);
+  xfree(r);
  }
  
  int
  rx_match(regex *r, byte *s)
  {
-  uns len = strlen(s);
+  int len = strlen(s);
  
+  r->len_cache = len;
    if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
      return 0;
    if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
@@ -67,14 +87,19 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
           by++;
           if (*by >= '0' && *by <= '9') /* \0 gets replaced by entire pattern */
             {
-             int j = *by++ - '0';
-             byte *s = src + r->regs.start[j];
-             int i = r->regs.end[j] - r->regs.start[j];
-             if (dest + i >= end)
-               return -1;
-             memcpy(dest, s, i);
-             dest += i;
-             continue;
+             uns j = *by++ - '0';
+             if (j < r->regs.num_regs)
+               {
+                 byte *s = src + r->regs.start[j];
+                 uns i = r->regs.end[j] - r->regs.start[j];
+                 if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
+                   return -1;
+                 if (dest + i >= end)
+                   return -1;
+                 memcpy(dest, s, i);
+                 dest += i;
+                 continue;
+               }
             }
         }
        if (dest < end)