Unicode character type functions moved to charset/unicat.h

author Martin Mares <mj@ucw.cz>

Sat, 10 Jul 2004 20:36:55 +0000 (20:36 +0000)

committer Martin Mares <mj@ucw.cz>

Sat, 10 Jul 2004 20:36:55 +0000 (20:36 +0000)
author Martin Mares <mj@ucw.cz>
Sat, 10 Jul 2004 20:36:55 +0000 (20:36 +0000)
committer Martin Mares <mj@ucw.cz>
Sat, 10 Jul 2004 20:36:55 +0000 (20:36 +0000)
diff --git a/charset/unicat.h b/charset/unicat.h

new file mode 100644 (file)

index 0000000..1ddfb8e
--- /dev/null
+++ b/charset/unicat.h
@@ -0,0 +1,70 @@
+/*
+ *     The UniCode Character Categorizer
+ *
+ *     (c) 1997--2004 Martin Mares <mj@ucw.cz>
+ *
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ */
+
+#ifndef _UNICAT_H
+#define _UNICAT_H
+
+extern const byte *_U_cat[];
+extern const word *_U_upper[], *_U_lower[], *_U_unaccent[];
+
+static inline uns Ucategory(uns x)
+{
+  if (_U_cat[x >> 8U])
+    return _U_cat[x >> 8U][x & 0xff];
+  else
+    return 0;
+}
+
+static inline uns Utoupper(uns x)
+{
+  word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
+  return w ? w : x;
+}
+
+static inline uns Utolower(uns x)
+{
+  word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
+  return w ? w : x;
+}
+
+static inline uns Uunaccent(uns x)
+{
+  word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
+  return w ? w : x;
+}
+
+extern const word *Uexpand_lig(uns x);
+
+enum unicode_char_type {
+  _U_LETTER = 1,               /* Letters */
+  _U_UPPER = 2,                        /* Upper-case letters */
+  _U_LOWER = 4,                        /* Lower-case letters */
+  _U_CTRL = 8,                 /* Control characters */
+  _U_DIGIT = 16,               /* Digits */
+  _U_XDIGIT = 32,              /* Hexadecimal digits */
+  _U_SPACE = 64,               /* White spaces (spaces, tabs, newlines) */
+  _U_LIGATURE = 128,           /* Compatibility ligature (to be expanded) */
+};
+
+#define _U_LUPPER (_U_LETTER | _U_UPPER)
+#define _U_LLOWER (_U_LETTER | _U_LOWER)
+
+#define UCat(x,y) (Ucategory(x) & (y))
+
+#define Ualpha(x) UCat(x, _U_LETTER)
+#define Uupper(x) UCat(x, _U_UPPER)
+#define Ulower(x) UCat(x, _U_LOWER)
+#define Udigit(x) UCat(x, _U_DIGIT)
+#define Uxdigit(x) UCat(x, (_U_DIGIT | _U_XDIGIT))
+#define Ualnum(x) UCat(x, (_U_LETTER | _U_DIGIT))
+#define Uctrl(x) UCat(x, _U_CTRL)
+#define Uprint(x) !Uctrl(x)
+#define Uspace(x) UCat(x, _U_SPACE)
+
+#endif
diff --git a/charset/unicode.h b/charset/unicode.h

deleted file mode 100644 (file)

index 7bc6f23..0000000
--- a/charset/unicode.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- *     The UniCode Library
- *
- *     (c) 1997--2003 Martin Mares <mj@ucw.cz>
- *
- *     This software may be freely distributed and used according to the terms
- *     of the GNU Lesser General Public License.
- */
-
-#ifndef _UNICODE_H
-#define _UNICODE_H
-
-extern const byte *_U_cat[];
-extern const word *_U_upper[], *_U_lower[], *_U_unaccent[];
-
-static inline uns Ucategory(uns x)
-{
-  if (_U_cat[x >> 8U])
-    return _U_cat[x >> 8U][x & 0xff];
-  else
-    return 0;
-}
-
-static inline uns Utoupper(uns x)
-{
-  word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
-  return w ? w : x;
-}
-
-static inline uns Utolower(uns x)
-{
-  word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
-  return w ? w : x;
-}
-
-static inline uns Uunaccent(uns x)
-{
-  word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
-  return w ? w : x;
-}
-
-extern const word *Uexpand_lig(uns x);
-
-enum unicode_char_type {
-  _U_LETTER = 1,               /* Letters */
-  _U_UPPER = 2,                        /* Upper-case letters */
-  _U_LOWER = 4,                        /* Lower-case letters */
-  _U_CTRL = 8,                 /* Control characters */
-  _U_DIGIT = 16,               /* Digits */
-  _U_XDIGIT = 32,              /* Hexadecimal digits */
-  _U_SPACE = 64,               /* White spaces (spaces, tabs, newlines) */
-  _U_LIGATURE = 128,           /* Compatibility ligature (to be expanded) */
-};
-
-#define _U_LUPPER (_U_LETTER | _U_UPPER)
-#define _U_LLOWER (_U_LETTER | _U_LOWER)
-
-#define UCat(x,y) (Ucategory(x) & (y))
-
-#define Ualpha(x) UCat(x, _U_LETTER)
-#define Uupper(x) UCat(x, _U_UPPER)
-#define Ulower(x) UCat(x, _U_LOWER)
-#define Udigit(x) UCat(x, _U_DIGIT)
-#define Uxdigit(x) UCat(x, (_U_DIGIT | _U_XDIGIT))
-#define Ualnum(x) UCat(x, (_U_LETTER | _U_DIGIT))
-#define Uctrl(x) UCat(x, _U_CTRL)
-#define Uprint(x) !Uctrl(x)
-#define Uspace(x) UCat(x, _U_SPACE)
-
-#define UNI_REPLACEMENT 0xfffc
-
-#define PUT_UTF8(p,u) do {             \
-  if (u < 0x80)                                \
-    *p++ = u;                          \
-  else if (u < 0x800)                  \
-    {                                  \
-      *p++ = 0xc0 | (u >> 6);          \
-      *p++ = 0x80 | (u & 0x3f);                \
-    }                                  \
-  else                                 \
-    {                                  \
-      *p++ = 0xe0 | (u >> 12);         \
-      *p++ = 0x80 | ((u >> 6) & 0x3f); \
-      *p++ = 0x80 | (u & 0x3f);                \
-    }                                  \
-  } while(0)
-
-#define IS_UTF8(c) ((c) >= 0xc0)
-
-#define GET_UTF8_CHAR(p,u) do {                \
-    if (*p >= 0xf0)                    \
-      {        /* Too large, use replacement char */   \
-       p++;                            \
-       while ((*p & 0xc0) == 0x80)     \
-         p++;                          \
-       u = UNI_REPLACEMENT;            \
-      }                                        \
-    else if (*p >= 0xe0)               \
-      {                                        \
-       u = *p++ & 0x0f;                \
-       if ((*p & 0xc0) == 0x80)        \
-         u = (u << 6) | (*p++ & 0x3f); \
-       if ((*p & 0xc0) == 0x80)        \
-         u = (u << 6) | (*p++ & 0x3f); \
-      }                                        \
-    else                               \
-      {                                        \
-       u = *p++ & 0x1f;                \
-       if ((*p & 0xc0) == 0x80)        \
-         u = (u << 6) | (*p++ & 0x3f); \
-      }                                        \
-  } while (0)                          \
-
-#define GET_UTF8(p,u)                  \
-    if (IS_UTF8(*p))                   \
-      GET_UTF8_CHAR(p,u);              \
-    else                               \
-      u = *p++
-
-#define UTF8_SKIP(p) do {                              \
-    uns c = *p++;                                      \
-    if (c >= 0xc0)                                     \
-      while (c & 0x40 && *p >= 0x80 && *p < 0xc0)      \
-        p++, c <<= 1;                                  \
-  } while (0)
-
-#define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80)
-
-#define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
-
-uns ucs2_to_utf8(byte *, word *);
-uns utf8_to_ucs2(word *, byte *);
-byte *static_ucs2_to_utf8(word *);
-uns Ustrlen(word *);
-uns utf8_strlen(byte *str);
-uns utf8_strnlen(byte *str, uns n);
-
-#endif
author	Martin Mares <mj@ucw.cz>
	Sat, 10 Jul 2004 20:36:55 +0000 (20:36 +0000)
committer	Martin Mares <mj@ucw.cz>
	Sat, 10 Jul 2004 20:36:55 +0000 (20:36 +0000)
charset/unicat.h	[new file with mode: 0644]	patch \| blob
charset/unicode.h	[deleted file]	patch \| blob \| history