X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=charset%2Funicode.h;h=7bc6f235812ef3c5529dac1de956e768924eea7f;hb=d9820b2d7d4b908bb9da441f5a35a7999d701e60;hp=c0afc17abc9da1cb5c2e172385f7a272cd92de19;hpb=49ed04e2e93a6a5b01058638224621d5c07db01c;p=libucw.git

diff --git a/charset/unicode.h b/charset/unicode.h
index c0afc17a..7bc6f235 100644
--- a/charset/unicode.h
+++ b/charset/unicode.h
@@ -1,7 +1,7 @@
 /*
  *	The UniCode Library
  *
- *	(c) 1997 Martin Mares <mj@ucw.cz>
+ *	(c) 1997--2003 Martin Mares <mj@ucw.cz>
  *
  *	This software may be freely distributed and used according to the terms
  *	of the GNU Lesser General Public License.
@@ -10,12 +10,10 @@
 #ifndef _UNICODE_H
 #define _UNICODE_H
 
-#include "lib/chartype.h"
+extern const byte *_U_cat[];
+extern const word *_U_upper[], *_U_lower[], *_U_unaccent[];
 
-extern byte *_U_cat[];
-extern word *_U_upper[], *_U_lower[], *_U_unaccent[];
-
-static inline uns Ucategory(word x)
+static inline uns Ucategory(uns x)
 {
   if (_U_cat[x >> 8U])
     return _U_cat[x >> 8U][x & 0xff];
@@ -23,37 +21,51 @@ static inline uns Ucategory(word x)
     return 0;
 }
 
-static inline word Utoupper(word x)
+static inline uns Utoupper(uns x)
 {
   word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
   return w ? w : x;
 }
 
-static inline word Utolower(word x)
+static inline uns Utolower(uns x)
 {
   word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
   return w ? w : x;
 }
 
-static inline word Uunaccent(word x)
+static inline uns Uunaccent(uns x)
 {
   word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
   return w ? w : x;
 }
 
+extern const word *Uexpand_lig(uns x);
+
+enum unicode_char_type {
+  _U_LETTER = 1,		/* Letters */
+  _U_UPPER = 2,			/* Upper-case letters */
+  _U_LOWER = 4,			/* Lower-case letters */
+  _U_CTRL = 8,			/* Control characters */
+  _U_DIGIT = 16,		/* Digits */
+  _U_XDIGIT = 32,		/* Hexadecimal digits */
+  _U_SPACE = 64,		/* White spaces (spaces, tabs, newlines) */
+  _U_LIGATURE = 128,		/* Compatibility ligature (to be expanded) */
+};
+
+#define _U_LUPPER (_U_LETTER | _U_UPPER)
+#define _U_LLOWER (_U_LETTER | _U_LOWER)
+
 #define UCat(x,y) (Ucategory(x) & (y))
 
-#define Uupper(x) UCat(x, _C_UPPER)
-#define Ulower(x) UCat(x, _C_LOWER)
-#define Ualpha(x) UCat(x, _C_ALPHA)
-#define Ualnum(x) UCat(x, _C_ALNUM)
+#define Ualpha(x) UCat(x, _U_LETTER)
+#define Uupper(x) UCat(x, _U_UPPER)
+#define Ulower(x) UCat(x, _U_LOWER)
+#define Udigit(x) UCat(x, _U_DIGIT)
+#define Uxdigit(x) UCat(x, (_U_DIGIT | _U_XDIGIT))
+#define Ualnum(x) UCat(x, (_U_LETTER | _U_DIGIT))
+#define Uctrl(x) UCat(x, _U_CTRL)
 #define Uprint(x) !Uctrl(x)
-#define Udigit(x) UCat(x, _C_DIGIT)
-#define Uxdigit(x) UCat(x, _C_XDIGIT)
-#define Uword(x) UCat(x, _C_WORD)
-#define Ublank(x) UCat(x, _C_BLANK)
-#define Uctrl(x) UCat(x, _C_CTRL)
-#define Uspace(x) Ublank(x)
+#define Uspace(x) UCat(x, _U_SPACE)
 
 #define UNI_REPLACEMENT 0xfffc
 
@@ -112,11 +124,15 @@ static inline word Uunaccent(word x)
         p++, c <<= 1;					\
   } while (0)
 
+#define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80)
+
 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
 
 uns ucs2_to_utf8(byte *, word *);
 uns utf8_to_ucs2(word *, byte *);
 byte *static_ucs2_to_utf8(word *);
 uns Ustrlen(word *);
+uns utf8_strlen(byte *str);
+uns utf8_strnlen(byte *str, uns n);
 
 #endif