]> mj.ucw.cz Git - libucw.git/blob - lib/unicode.h
ca8bdea3b2f51011367c0a87f74fb7bcaa94a5e8
[libucw.git] / lib / unicode.h
1 /*
2  *      Sherlock Library -- Unicode Characters
3  *
4  *      (c) 1997--2004 Martin Mares <mj@ucw.cz>
5  *
6  *      This software may be freely distributed and used according to the terms
7  *      of the GNU Lesser General Public License.
8  */
9
10 #ifndef _UNICODE_H
11 #define _UNICODE_H
12
13 /* Macros for handling UTF-8 */
14
15 #define UNI_REPLACEMENT 0xfffc
16
17 #define PUT_UTF8(p,u) do {              \
18   if (u < 0x80)                         \
19     *p++ = u;                           \
20   else if (u < 0x800)                   \
21     {                                   \
22       *p++ = 0xc0 | (u >> 6);           \
23       *p++ = 0x80 | (u & 0x3f);         \
24     }                                   \
25   else                                  \
26     {                                   \
27       *p++ = 0xe0 | (u >> 12);          \
28       *p++ = 0x80 | ((u >> 6) & 0x3f);  \
29       *p++ = 0x80 | (u & 0x3f);         \
30     }                                   \
31   } while(0)
32
33 #define IS_UTF8(c) ((c) >= 0xc0)
34
35 #define GET_UTF8_CHAR(p,u) do {         \
36     if (*p >= 0xf0)                     \
37       { /* Too large, use replacement char */   \
38         p++;                            \
39         while ((*p & 0xc0) == 0x80)     \
40           p++;                          \
41         u = UNI_REPLACEMENT;            \
42       }                                 \
43     else if (*p >= 0xe0)                \
44       {                                 \
45         u = *p++ & 0x0f;                \
46         if ((*p & 0xc0) == 0x80)        \
47           u = (u << 6) | (*p++ & 0x3f); \
48         if ((*p & 0xc0) == 0x80)        \
49           u = (u << 6) | (*p++ & 0x3f); \
50       }                                 \
51     else                                \
52       {                                 \
53         u = *p++ & 0x1f;                \
54         if ((*p & 0xc0) == 0x80)        \
55           u = (u << 6) | (*p++ & 0x3f); \
56       }                                 \
57   } while (0)                           \
58
59 #define GET_UTF8(p,u)                   \
60     if (IS_UTF8(*p))                    \
61       GET_UTF8_CHAR(p,u);               \
62     else                                \
63       u = *p++
64
65 #define UTF8_SKIP(p) do {                               \
66     uns c = *p++;                                       \
67     if (c >= 0xc0)                                      \
68       while (c & 0x40 && *p >= 0x80 && *p < 0xc0)       \
69         p++, c <<= 1;                                   \
70   } while (0)
71
72 #define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80)
73
74 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
75
76 /* unicode-utf8.c */
77
78 uns utf8_strlen(byte *str);
79 uns utf8_strnlen(byte *str, uns n);
80
81 #endif