X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=270fb590b5e7fc8873afefc2d5c898c97a1d1287;hb=e371dcc1cd2857036374dd9597705faed0427006;hp=43ef8694ac2609a7b653e9253ae1d68e36ef1014;hpb=ad014f5f33ed7f3079c6b7e688d12baf708ee3b3;p=libucw.git

diff --git a/lib/regex.c b/lib/regex.c
index 43ef8694..270fb590 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -1,5 +1,5 @@
 /*
- *	Sherlock Library -- Interface to Regular Expression Libraries
+ *	UCW Library -- Interface to Regular Expression Libraries
  *
  *	(c) 1997--2004 Martin Mares <mj@ucw.cz>
  *	(c) 2001 Robert Spalek <robert@ucw.cz>
@@ -14,114 +14,16 @@
 
 #include <stdio.h>
 #include <string.h>
-#include <stdlib.h>
 
-#if 1
-
-/* BSD regular expression library */
-
-#include <regex.h>
-
-#define INITIAL_MEM 1024		/* Initial space allocated for each pattern */
-#define CHAR_SET_SIZE 256		/* How many characters in the character set.  */
-
-struct regex {
-  struct re_pattern_buffer buf;
-  struct re_registers regs;		/* Must not change between re_match() calls */
-  int len_cache;
-};
-
-regex *
-rx_compile(byte *p, int icase)
-{
-  regex *r = xmalloc_zero(sizeof(regex));
-  const char *msg;
-
-  r->buf.buffer = xmalloc(INITIAL_MEM);
-  r->buf.allocated = INITIAL_MEM;
-  if (icase)
-    {
-      unsigned i;
-      r->buf.translate = xmalloc (CHAR_SET_SIZE);
-      /* Map uppercase characters to corresponding lowercase ones.  */
-      for (i = 0; i < CHAR_SET_SIZE; i++)
-        r->buf.translate[i] = Cupcase(i);
-    }
-  else
-    r->buf.translate = NULL;
-  re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
-  msg = re_compile_pattern(p, strlen(p), &r->buf);
-  if (!msg)
-    return r;
-  die("Error parsing pattern `%s': %s", p, msg);
-}
-
-void
-rx_free(regex *r)
-{
-  xfree(r->buf.buffer);
-  if (r->buf.translate)
-    xfree(r->buf.translate);
-  xfree(r);
-}
-
-int
-rx_match(regex *r, byte *s)
-{
-  int len = strlen(s);
-
-  r->len_cache = len;
-  if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
-    return 0;
-  if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
-    return 0;
-  return 1;
-}
-
-int
-rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
-{
-  byte *end = dest + destlen - 1;
-
-  if (!rx_match(r, src))
-    return 0;
-
-  while (*by)
-    {
-      if (*by == '\\')
-	{
-	  by++;
-	  if (*by >= '0' && *by <= '9')	/* \0 gets replaced by entire pattern */
-	    {
-	      uns j = *by++ - '0';
-	      if (j < r->regs.num_regs)
-		{
-		  byte *s = src + r->regs.start[j];
-		  uns i = r->regs.end[j] - r->regs.start[j];
-		  if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
-		    return -1;
-		  if (dest + i >= end)
-		    return -1;
-		  memcpy(dest, s, i);
-		  dest += i;
-		  continue;
-		}
-	    }
-	}
-      if (dest < end)
-	*dest++ = *by++;
-      else
-	return -1;
-    }
-  *dest = 0;
-  return 1;
-}
-
-#elif 0
+#if defined(CONFIG_OWN_REGEX) || defined(CONFIG_POSIX_REGEX)
 
 /* POSIX regular expression library */
 
+#ifdef CONFIG_OWN_REGEX
+#include "lib/regex/regex-sh.h"
+#else
 #include <regex.h>
+#endif
 
 struct regex {
   regex_t rx;
@@ -129,14 +31,14 @@ struct regex {
 };
 
 regex *
-rx_compile(byte *p, int icase)
+rx_compile(const char *p, int icase)
 {
   regex *r = xmalloc_zero(sizeof(regex));
 
   int err = regcomp(&r->rx, p, REG_EXTENDED | (icase ? REG_ICASE : 0));
   if (err)
     {
-      byte msg[256];
+      char msg[256];
       regerror(err, &r->rx, msg, sizeof(msg)-1);
       /* regfree(&r->rx) not needed */
       die("Error parsing regular expression `%s': %s", p, msg);
@@ -152,7 +54,7 @@ rx_free(regex *r)
 }
 
 int
-rx_match(regex *r, byte *s)
+rx_match(regex *r, const char *s)
 {
   int err = regexec(&r->rx, s, 10, r->matches, 0);
   if (!err)
@@ -169,9 +71,9 @@ rx_match(regex *r, byte *s)
 }
 
 int
-rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
 {
-  byte *end = dest + destlen - 1;
+  char *end = dest + destlen - 1;
 
   if (!rx_match(r, src))
     return 0;
@@ -186,7 +88,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
 	      uns j = *by++ - '0';
 	      if (j <= r->rx.re_nsub && r->matches[j].rm_so >= 0)
 		{
-		  byte *s = src + r->matches[j].rm_so;
+		  const char *s = src + r->matches[j].rm_so;
 		  uns i = r->matches[j].rm_eo - r->matches[j].rm_so;
 		  if (dest + i >= end)
 		    return -1;
@@ -205,7 +107,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
   return 1;
 }
 
-#else
+#elif defined(CONFIG_PCRE)
 
 /* PCRE library */
 
@@ -220,7 +122,7 @@ struct regex {
 };
 
 regex *
-rx_compile(byte *p, int icase)
+rx_compile(const char *p, int icase)
 {
   const char *err;
   int errpos, match_array_size, eno;
@@ -250,7 +152,7 @@ rx_free(regex *r)
 }
 
 int
-rx_match(regex *r, byte *s)
+rx_match(regex *r, const char *s)
 {
   int len = str_len(s);
   int err = pcre_exec(r->rx, r->extra, s, len, 0, 0, r->matches, r->match_array_size);
@@ -269,9 +171,9 @@ rx_match(regex *r, byte *s)
 }
 
 int
-rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
 {
-  byte *end = dest + destlen - 1;
+  char *end = dest + destlen - 1;
 
   if (!rx_match(r, src))
     return 0;
@@ -286,7 +188,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
 	      uns j = *by++ - '0';
 	      if (j < r->real_matches && r->matches[2*j] >= 0)
 		{
-		  byte *s = src + r->matches[2*j];
+		  char *s = src + r->matches[2*j];
 		  uns i = r->matches[2*j+1] - r->matches[2*j];
 		  if (dest + i >= end)
 		    return -1;
@@ -305,6 +207,111 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
   return 1;
 }
 
+#else
+
+/* BSD regular expression library */
+
+#ifdef CONFIG_OWN_BSD_REGEX
+#include "lib/regex/regex-sh.h"
+#else
+#include <regex.h>
+#endif
+
+#define INITIAL_MEM 1024		/* Initial space allocated for each pattern */
+#define CHAR_SET_SIZE 256		/* How many characters in the character set.  */
+
+struct regex {
+  struct re_pattern_buffer buf;
+  struct re_registers regs;		/* Must not change between re_match() calls */
+  int len_cache;
+};
+
+regex *
+rx_compile(const char *p, int icase)
+{
+  regex *r = xmalloc_zero(sizeof(regex));
+  const char *msg;
+
+  r->buf.buffer = xmalloc(INITIAL_MEM);
+  r->buf.allocated = INITIAL_MEM;
+  if (icase)
+    {
+      unsigned i;
+      r->buf.translate = xmalloc (CHAR_SET_SIZE);
+      /* Map uppercase characters to corresponding lowercase ones.  */
+      for (i = 0; i < CHAR_SET_SIZE; i++)
+        r->buf.translate[i] = Cupcase(i);
+    }
+  else
+    r->buf.translate = NULL;
+  re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
+  msg = re_compile_pattern(p, strlen(p), &r->buf);
+  if (!msg)
+    return r;
+  die("Error parsing pattern `%s': %s", p, msg);
+}
+
+void
+rx_free(regex *r)
+{
+  xfree(r->buf.buffer);
+  if (r->buf.translate)
+    xfree(r->buf.translate);
+  xfree(r);
+}
+
+int
+rx_match(regex *r, const char *s)
+{
+  int len = strlen(s);
+
+  r->len_cache = len;
+  if (re_match(&r->buf, s, len, 0, &r->regs) < 0)
+    return 0;
+  if (r->regs.start[0] || r->regs.end[0] != len) /* XXX: Why regex doesn't enforce implicit "^...$" ? */
+    return 0;
+  return 1;
+}
+
+int
+rx_subst(regex *r, const char *by, const char *src, char *dest, uns destlen)
+{
+  char *end = dest + destlen - 1;
+
+  if (!rx_match(r, src))
+    return 0;
+
+  while (*by)
+    {
+      if (*by == '\\')
+	{
+	  by++;
+	  if (*by >= '0' && *by <= '9')	/* \0 gets replaced by entire pattern */
+	    {
+	      uns j = *by++ - '0';
+	      if (j < r->regs.num_regs)
+		{
+		  const char *s = src + r->regs.start[j];
+		  uns i = r->regs.end[j] - r->regs.start[j];
+		  if (r->regs.start[j] > r->len_cache || r->regs.end[j] > r->len_cache)
+		    return -1;
+		  if (dest + i >= end)
+		    return -1;
+		  memcpy(dest, s, i);
+		  dest += i;
+		  continue;
+		}
+	    }
+	}
+      if (dest < end)
+	*dest++ = *by++;
+      else
+	return -1;
+    }
+  *dest = 0;
+  return 1;
+}
+
 #endif
 
 #ifdef TEST
@@ -312,7 +319,7 @@ rx_subst(regex *r, byte *by, byte *src, byte *dest, uns destlen)
 int main(int argc, char **argv)
 {
   regex *r;
-  byte buf1[4096], buf2[4096];
+  char buf1[4096], buf2[4096];
   int opt_i = 0;
 
   if (!strcmp(argv[1], "-i"))