2 * Sherlock Library -- Unicode Characters
4 * (c) 1997--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
14 /* Macros for handling UTF-8 */
16 #define UNI_REPLACEMENT 0xfffc
18 #define PUT_UTF8(p,u) do { \
23 *p++ = 0xc0 | (u >> 6); \
24 *p++ = 0x80 | (u & 0x3f); \
28 *p++ = 0xe0 | (u >> 12); \
29 *p++ = 0x80 | ((u >> 6) & 0x3f); \
30 *p++ = 0x80 | (u & 0x3f); \
34 #define PUT_UTF8_32(p,u) do { \
37 else if (u < (1<<21)) \
39 *p++ = 0xf0 | (u >> 18); \
40 *p++ = 0x80 | ((u >> 12) & 0x3f); \
41 *p++ = 0x80 | ((u >> 6) & 0x3f); \
42 *p++ = 0x80 | (u & 0x3f); \
44 else if (u < (1<<26)) \
46 *p++ = 0xf8 | (u >> 24); \
47 *p++ = 0x80 | ((u >> 18) & 0x3f); \
48 *p++ = 0x80 | ((u >> 12) & 0x3f); \
49 *p++ = 0x80 | ((u >> 6) & 0x3f); \
50 *p++ = 0x80 | (u & 0x3f); \
52 else if (u < (1U<<31)) \
54 *p++ = 0xfc | (u >> 30); \
55 *p++ = 0x80 | ((u >> 24) & 0x3f); \
56 *p++ = 0x80 | ((u >> 18) & 0x3f); \
57 *p++ = 0x80 | ((u >> 12) & 0x3f); \
58 *p++ = 0x80 | ((u >> 6) & 0x3f); \
59 *p++ = 0x80 | (u & 0x3f); \
63 #define IS_UTF8(c) ((c) >= 0xc0)
65 #define GET_UTF8_CHAR(p,u) do { \
67 { /* Too large, use replacement char */ \
69 while ((*p & 0xc0) == 0x80) \
71 u = UNI_REPLACEMENT; \
73 else if (*p >= 0xe0) \
76 if ((*p & 0xc0) == 0x80) \
77 u = (u << 6) | (*p++ & 0x3f); \
78 if ((*p & 0xc0) == 0x80) \
79 u = (u << 6) | (*p++ & 0x3f); \
84 if ((*p & 0xc0) == 0x80) \
85 u = (u << 6) | (*p++ & 0x3f); \
89 #define GET_UTF8_32_CHAR(p,u) do { \
95 if ((*p & 0xc0) == 0x80) \
96 u = (u << 6) | (*p++ & 0x3f); \
97 if ((*p & 0xc0) == 0x80) \
98 u = (u << 6) | (*p++ & 0x3f); \
99 if ((*p & 0xc0) == 0x80) \
100 u = (u << 6) | (*p++ & 0x3f); \
102 else if (*p < 0xfc) \
105 if ((*p & 0xc0) == 0x80) \
106 u = (u << 6) | (*p++ & 0x3f); \
107 if ((*p & 0xc0) == 0x80) \
108 u = (u << 6) | (*p++ & 0x3f); \
109 if ((*p & 0xc0) == 0x80) \
110 u = (u << 6) | (*p++ & 0x3f); \
111 if ((*p & 0xc0) == 0x80) \
112 u = (u << 6) | (*p++ & 0x3f); \
114 else if (*p < 0xfe) \
117 if ((*p & 0xc0) == 0x80) \
118 u = (u << 6) | (*p++ & 0x3f); \
119 if ((*p & 0xc0) == 0x80) \
120 u = (u << 6) | (*p++ & 0x3f); \
121 if ((*p & 0xc0) == 0x80) \
122 u = (u << 6) | (*p++ & 0x3f); \
123 if ((*p & 0xc0) == 0x80) \
124 u = (u << 6) | (*p++ & 0x3f); \
125 if ((*p & 0xc0) == 0x80) \
126 u = (u << 6) | (*p++ & 0x3f); \
129 { /* Too large, use replacement char */ \
131 while ((*p & 0xc0) == 0x80) \
133 u = UNI_REPLACEMENT; \
137 #define GET_UTF8(p,u) \
139 GET_UTF8_CHAR(p,u); \
143 #define GET_UTF8_32(p,u) \
145 GET_UTF8_32_CHAR(p,u); \
149 #define UTF8_SKIP(p) do { \
152 while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \
156 #define UTF8_SKIP_BWD(p) while ((*--(p) & 0xc0) == 0x80)
175 utf8_encoding_len(uns c)
179 ASSERT(c >= 0xc0 && c < 0xfe);
193 uns utf8_strlen(byte *str);
194 uns utf8_strnlen(byte *str, uns n);