]> mj.ucw.cz Git - libucw.git/commitdiff
Moved the basic Unicode and UTF-8 functions to the main library.
authorMartin Mares <mj@ucw.cz>
Sat, 10 Jul 2004 20:35:31 +0000 (20:35 +0000)
committerMartin Mares <mj@ucw.cz>
Sat, 10 Jul 2004 20:35:31 +0000 (20:35 +0000)
lib/Makefile
lib/unicode-utf8.c [new file with mode: 0644]
lib/unicode.h [new file with mode: 0644]

index a3b5b805287eb4547a40aba72a350915fc3ee15d..5297668f71c44a384395d7dddf239ec124c3c1bf 100644 (file)
@@ -11,7 +11,8 @@ LIBSH_MODS=alloc alloc_str ctmatch db fastbuf fb-file fb-mem lists \
        finger proctitle ipaccess profile bitsig randomkey \
        hashfunc base64 base224 fb-temp fb-mmap fb-printf urlkey \
        partmap fb-limfd fb-buffer mainloop exitstatus runcmd carefulio \
-       lizard lizard-safe sighandler buck2obj obj2buck
+       lizard lizard-safe sighandler buck2obj obj2buck \
+       unicode-utf8 ff-utf8
 
 ifdef CONFIG_OWN_REGEX
 include lib/regex/Makefile
diff --git a/lib/unicode-utf8.c b/lib/unicode-utf8.c
new file mode 100644 (file)
index 0000000..5a9d1bf
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ *     Sherlock Library -- UTF-8 Functions
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *     (c) 2003 Robert Spalek <robert@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#include "lib/lib.h"
+#include "lib/unicode.h"
+
+uns
+utf8_strlen(byte *str)
+{
+  uns len = 0;
+  while (*str)
+    {
+      UTF8_SKIP(str);
+      len++;
+    }
+  return len;
+}
+
+uns
+utf8_strnlen(byte *str, uns n)
+{
+  uns len = 0;
+  byte *end = str + n;
+  while (str < end)
+    {
+      UTF8_SKIP(str);
+      len++;
+    }
+  return len;
+}
diff --git a/lib/unicode.h b/lib/unicode.h
new file mode 100644 (file)
index 0000000..ca8bdea
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ *     Sherlock Library -- Unicode Characters
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UNICODE_H
+#define _UNICODE_H
+
+/* Macros for handling UTF-8 */
+
+#define UNI_REPLACEMENT 0xfffc
+
+#define PUT_UTF8(p,u) do {             \
+  if (u < 0x80)                                \
+    *p++ = u;                          \
+  else if (u < 0x800)                  \
+    {                                  \
+      *p++ = 0xc0 | (u >> 6);          \
+      *p++ = 0x80 | (u & 0x3f);                \
+    }                                  \
+  else                                 \
+    {                                  \
+      *p++ = 0xe0 | (u >> 12);         \
+      *p++ = 0x80 | ((u >> 6) & 0x3f); \
+      *p++ = 0x80 | (u & 0x3f);                \
+    }                                  \
+  } while(0)
+
+#define IS_UTF8(c) ((c) >= 0xc0)
+
+#define GET_UTF8_CHAR(p,u) do {                \
+    if (*p >= 0xf0)                    \
+      {        /* Too large, use replacement char */   \
+       p++;                            \
+       while ((*p & 0xc0) == 0x80)     \
+         p++;                          \
+       u = UNI_REPLACEMENT;            \
+      }                                        \
+    else if (*p >= 0xe0)               \
+      {                                        \
+       u = *p++ & 0x0f;                \
+       if ((*p & 0xc0) == 0x80)        \
+         u = (u << 6) | (*p++ & 0x3f); \
+       if ((*p & 0xc0) == 0x80)        \
+         u = (u << 6) | (*p++ & 0x3f); \
+      }                                        \
+    else                               \
+      {                                        \
+       u = *p++ & 0x1f;                \
+       if ((*p & 0xc0) == 0x80)        \
+         u = (u << 6) | (*p++ & 0x3f); \
+      }                                        \
+  } while (0)                          \
+
+#define GET_UTF8(p,u)                  \
+    if (IS_UTF8(*p))                   \
+      GET_UTF8_CHAR(p,u);              \
+    else                               \
+      u = *p++
+
+#define UTF8_SKIP(p) do {                              \
+    uns c = *p++;                                      \
+    if (c >= 0xc0)                                     \
+      while (c & 0x40 && *p >= 0x80 && *p < 0xc0)      \
+        p++, c <<= 1;                                  \
+  } while (0)
+
+#define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80)
+
+#define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
+
+/* unicode-utf8.c */
+
+uns utf8_strlen(byte *str);
+uns utf8_strnlen(byte *str, uns n);
+
+#endif