From: Martin Mares <mj@ucw.cz>
Date: Sat, 10 Jul 2004 20:35:31 +0000 (+0000)
Subject: Moved the basic Unicode and UTF-8 functions to the main library.
X-Git-Tag: holmes-import~957
X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=f528738ecc582e648fb5ab97fc579b8d14327175;p=libucw.git

Moved the basic Unicode and UTF-8 functions to the main library.
---

diff --git a/lib/Makefile b/lib/Makefile
index a3b5b805..5297668f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,8 @@ LIBSH_MODS=alloc alloc_str ctmatch db fastbuf fb-file fb-mem lists \
 	finger proctitle ipaccess profile bitsig randomkey \
 	hashfunc base64 base224 fb-temp fb-mmap fb-printf urlkey \
 	partmap fb-limfd fb-buffer mainloop exitstatus runcmd carefulio \
-	lizard lizard-safe sighandler buck2obj obj2buck
+	lizard lizard-safe sighandler buck2obj obj2buck \
+	unicode-utf8 ff-utf8
 
 ifdef CONFIG_OWN_REGEX
 include lib/regex/Makefile
diff --git a/lib/unicode-utf8.c b/lib/unicode-utf8.c
new file mode 100644
index 00000000..5a9d1bfc
--- /dev/null
+++ b/lib/unicode-utf8.c
@@ -0,0 +1,37 @@
+/*
+ *	Sherlock Library -- UTF-8 Functions
+ *
+ *	(c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *	(c) 2003 Robert Spalek <robert@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/unicode.h"
+
+uns
+utf8_strlen(byte *str)
+{
+  uns len = 0;
+  while (*str)
+    {
+      UTF8_SKIP(str);
+      len++;
+    }
+  return len;
+}
+
+uns
+utf8_strnlen(byte *str, uns n)
+{
+  uns len = 0;
+  byte *end = str + n;
+  while (str < end)
+    {
+      UTF8_SKIP(str);
+      len++;
+    }
+  return len;
+}
diff --git a/lib/unicode.h b/lib/unicode.h
new file mode 100644
index 00000000..ca8bdea3
--- /dev/null
+++ b/lib/unicode.h
@@ -0,0 +1,81 @@
+/*
+ *	Sherlock Library -- Unicode Characters
+ *
+ *	(c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+#ifndef _UNICODE_H
+#define _UNICODE_H
+
+/* Macros for handling UTF-8 */
+
+#define UNI_REPLACEMENT 0xfffc
+
+#define PUT_UTF8(p,u) do {		\
+  if (u < 0x80)				\
+    *p++ = u;				\
+  else if (u < 0x800)			\
+    {					\
+      *p++ = 0xc0 | (u >> 6);		\
+      *p++ = 0x80 | (u & 0x3f);		\
+    }					\
+  else					\
+    {					\
+      *p++ = 0xe0 | (u >> 12);		\
+      *p++ = 0x80 | ((u >> 6) & 0x3f);	\
+      *p++ = 0x80 | (u & 0x3f);		\
+    }					\
+  } while(0)
+
+#define IS_UTF8(c) ((c) >= 0xc0)
+
+#define GET_UTF8_CHAR(p,u) do {		\
+    if (*p >= 0xf0)			\
+      {	/* Too large, use replacement char */	\
+	p++;				\
+	while ((*p & 0xc0) == 0x80)	\
+	  p++;				\
+	u = UNI_REPLACEMENT;		\
+      }					\
+    else if (*p >= 0xe0)		\
+      {					\
+	u = *p++ & 0x0f;		\
+	if ((*p & 0xc0) == 0x80)       	\
+	  u = (u << 6) | (*p++ & 0x3f);	\
+	if ((*p & 0xc0) == 0x80)	\
+	  u = (u << 6) | (*p++ & 0x3f);	\
+      }					\
+    else				\
+      {					\
+	u = *p++ & 0x1f;		\
+	if ((*p & 0xc0) == 0x80)	\
+	  u = (u << 6) | (*p++ & 0x3f);	\
+      }					\
+  } while (0)				\
+
+#define GET_UTF8(p,u)			\
+    if (IS_UTF8(*p))			\
+      GET_UTF8_CHAR(p,u);		\
+    else				\
+      u = *p++
+
+#define UTF8_SKIP(p) do {				\
+    uns c = *p++;					\
+    if (c >= 0xc0)					\
+      while (c & 0x40 && *p >= 0x80 && *p < 0xc0)	\
+        p++, c <<= 1;					\
+  } while (0)
+
+#define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80)
+
+#define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
+
+/* unicode-utf8.c */
+
+uns utf8_strlen(byte *str);
+uns utf8_strnlen(byte *str, uns n);
+
+#endif