4 * (c) 1997--2003 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
13 #include "lib/chartype.h"
15 extern byte *_U_cat[];
16 extern word *_U_upper[], *_U_lower[], *_U_unaccent[];
18 static inline uns Ucategory(word x)
21 return _U_cat[x >> 8U][x & 0xff];
26 static inline word Utoupper(word x)
28 word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
32 static inline word Utolower(word x)
34 word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
38 static inline word Uunaccent(word x)
40 word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
44 #define UCat(x,y) (Ucategory(x) & (y))
46 #define Uupper(x) UCat(x, _C_UPPER)
47 #define Ulower(x) UCat(x, _C_LOWER)
48 #define Ualpha(x) UCat(x, _C_ALPHA)
49 #define Ualnum(x) UCat(x, _C_ALNUM)
50 #define Uprint(x) !Uctrl(x)
51 #define Udigit(x) UCat(x, _C_DIGIT)
52 #define Uxdigit(x) UCat(x, _C_XDIGIT)
53 #define Uword(x) UCat(x, _C_WORD)
54 #define Ublank(x) UCat(x, _C_BLANK)
55 #define Uctrl(x) UCat(x, _C_CTRL)
56 #define Uspace(x) Ublank(x)
58 #define UNI_REPLACEMENT 0xfffc
60 #define PUT_UTF8(p,u) do { \
65 *p++ = 0xc0 | (u >> 6); \
66 *p++ = 0x80 | (u & 0x3f); \
70 *p++ = 0xe0 | (u >> 12); \
71 *p++ = 0x80 | ((u >> 6) & 0x3f); \
72 *p++ = 0x80 | (u & 0x3f); \
76 #define IS_UTF8(c) ((c) >= 0xc0)
78 #define GET_UTF8_CHAR(p,u) do { \
80 { /* Too large, use replacement char */ \
82 while ((*p & 0xc0) == 0x80) \
84 u = UNI_REPLACEMENT; \
86 else if (*p >= 0xe0) \
89 if ((*p & 0xc0) == 0x80) \
90 u = (u << 6) | (*p++ & 0x3f); \
91 if ((*p & 0xc0) == 0x80) \
92 u = (u << 6) | (*p++ & 0x3f); \
97 if ((*p & 0xc0) == 0x80) \
98 u = (u << 6) | (*p++ & 0x3f); \
102 #define GET_UTF8(p,u) \
104 GET_UTF8_CHAR(p,u); \
108 #define UTF8_SKIP(p) do { \
111 while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \
115 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
117 uns ucs2_to_utf8(byte *, word *);
118 uns utf8_to_ucs2(word *, byte *);
119 byte *static_ucs2_to_utf8(word *);
121 uns utf8_strlen(byte *str);
122 uns utf8_strnlen(byte *str, uns n);