4 * (c) 1997 Martin Mares, <mj@atrey.karlin.mff.cuni.cz>
10 #include "lib/config.h"
12 extern byte *_U_cat[], *_U_sig[];
13 extern word *_U_upper[], *_U_lower[], *_U_unaccent[];
15 #define _C_UPPER 1 /* Upper-case letters */
16 #define _C_LOWER 2 /* Lower-case letters */
17 #define _C_PRINT 4 /* Printable */
18 #define _C_DIGIT 8 /* Digits */
19 #define _C_CTRL 16 /* Control characters */
20 #define _C_XDIGIT 32 /* Hexadecimal digits */
21 #define _C_BLANK 64 /* Blanks */
22 #define _C_INNER 128 /* `inner punctuation' -- underscore etc. */
24 #define _C_ALPHA (_C_UPPER | _C_LOWER)
25 #define _C_ALNUM (_C_ALPHA | _C_DIGIT)
26 #define _C_WORD (_C_ALNUM | _C_INNER)
27 #define _C_WSTART (_C_ALPHA | _C_INNER)
29 static inline uns Ucategory(word x)
32 return _U_cat[x >> 8U][x & 0xff];
37 static inline word Utoupper(word x)
39 word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
43 static inline word Utolower(word x)
45 word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
49 static inline word Uunaccent(word x)
51 word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
55 static inline byte Usig(word x)
58 return _U_sig[x >> 8U][x & 0xff] ? : 0xff;
63 #define UCat(x,y) (Ucategory(x) & (y))
65 #define Uupper(x) UCat(x, _C_UPPER)
66 #define Ulower(x) UCat(x, _C_LOWER)
67 #define Ualpha(x) UCat(x, _C_ALPHA)
68 #define Ualnum(x) UCat(x, _C_ALNUM)
69 #define Uprint(x) !Uctrl(x)
70 #define Udigit(x) UCat(x, _C_DIGIT)
71 #define Uxdigit(x) UCat(x, _C_XDIGIT)
72 #define Uword(x) UCat(x, _C_WORD)
73 #define Ublank(x) UCat(x, _C_BLANK)
74 #define Uctrl(x) UCat(x, _C_CTRL)
75 #define Uspace(x) Ublank(x)
77 #define UNI_REPLACEMENT 0xfffc
79 #define PUT_UTF8(p,u) do { \
84 *p++ = 0xc0 | (u >> 6); \
85 *p++ = 0x80 | (u & 0x3f); \
89 *p++ = 0xe0 | (u >> 12); \
90 *p++ = 0x80 | ((u >> 6) & 0x3f); \
91 *p++ = 0x80 | (u & 0x3f); \
95 #define IS_UTF8(c) ((c) >= 0xc0)
97 #define GET_UTF8_CHAR(p,u) do { \
99 { /* Too large, use replacement char */ \
101 while ((*p & 0xc0) == 0x80) \
103 u = UNI_REPLACEMENT; \
105 else if (*p >= 0xe0) \
108 if ((*p & 0xc0) == 0x80) \
109 u = (u << 6) | (*p++ & 0x3f); \
110 if ((*p & 0xc0) == 0x80) \
111 u = (u << 6) | (*p++ & 0x3f); \
116 if ((*p & 0xc0) == 0x80) \
117 u = (u << 6) | (*p++ & 0x3f); \
121 #define GET_UTF8(p,u) \
123 GET_UTF8_CHAR(p,u); \
127 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
129 uns ucs2_to_utf8(byte *, word *);
130 uns utf8_to_ucs2(word *, byte *);
131 byte *static_ucs2_to_utf8(word *);