lib/unicode.h

   1 /*
   2  *      Sherlock Library -- Unicode Characters
   3  *
   4  *      (c) 1997--2004 Martin Mares <mj@ucw.cz>
   5  *
   6  *      This software may be freely distributed and used according to the terms
   7  *      of the GNU Lesser General Public License.
   8  */
   9
  10 #ifndef _UNICODE_H
  11 #define _UNICODE_H
  12
  13 /* Macros for handling UTF-8 */
  14
  15 #define UNI_REPLACEMENT 0xfffc
  16
  17 #define PUT_UTF8(p,u) do {              \
  18   if (u < 0x80)                         \
  19     *p++ = u;                           \
  20   else if (u < 0x800)                   \
  21     {                                   \
  22       *p++ = 0xc0 | (u >> 6);           \
  23       *p++ = 0x80 | (u & 0x3f);         \
  24     }                                   \
  25   else                                  \
  26     {                                   \
  27       *p++ = 0xe0 | (u >> 12);          \
  28       *p++ = 0x80 | ((u >> 6) & 0x3f);  \
  29       *p++ = 0x80 | (u & 0x3f);         \
  30     }                                   \
  31   } while(0)
  32
  33 #define IS_UTF8(c) ((c) >= 0xc0)
  34
  35 #define GET_UTF8_CHAR(p,u) do {         \
  36     if (*p >= 0xf0)                     \
  37       { /* Too large, use replacement char */   \
  38         p++;                            \
  39         while ((*p & 0xc0) == 0x80)     \
  40           p++;                          \
  41         u = UNI_REPLACEMENT;            \
  42       }                                 \
  43     else if (*p >= 0xe0)                \
  44       {                                 \
  45         u = *p++ & 0x0f;                \
  46         if ((*p & 0xc0) == 0x80)        \
  47           u = (u << 6) | (*p++ & 0x3f); \
  48         if ((*p & 0xc0) == 0x80)        \
  49           u = (u << 6) | (*p++ & 0x3f); \
  50       }                                 \
  51     else                                \
  52       {                                 \
  53         u = *p++ & 0x1f;                \
  54         if ((*p & 0xc0) == 0x80)        \
  55           u = (u << 6) | (*p++ & 0x3f); \
  56       }                                 \
  57   } while (0)                           \
  58
  59 #define GET_UTF8(p,u)                   \
  60     if (IS_UTF8(*p))                    \
  61       GET_UTF8_CHAR(p,u);               \
  62     else                                \
  63       u = *p++
  64
  65 #define UTF8_SKIP(p) do {                               \
  66     uns c = *p++;                                       \
  67     if (c >= 0xc0)                                      \
  68       while (c & 0x40 && *p >= 0x80 && *p < 0xc0)       \
  69         p++, c <<= 1;                                   \
  70   } while (0)
  71
  72 #define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80)
  73
  74 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
  75
  76 /* unicode-utf8.c */
  77
  78 uns utf8_strlen(byte *str);
  79 uns utf8_strnlen(byte *str, uns n);
  80
  81 #endif