charset/unicode.h

   1 /*
   2  *      The UniCode Library
   3  *
   4  *      (c) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
   5  */
   6
   7 #ifndef _UNICODE_H
   8 #define _UNICODE_H
   9
  10 #include "lib/chartype.h"
  11
  12 extern byte *_U_cat[];
  13 extern word *_U_upper[], *_U_lower[], *_U_unaccent[];
  14
  15 static inline uns Ucategory(word x)
  16 {
  17   if (_U_cat[x >> 8U])
  18     return _U_cat[x >> 8U][x & 0xff];
  19   else
  20     return 0;
  21 }
  22
  23 static inline word Utoupper(word x)
  24 {
  25   word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
  26   return w ? w : x;
  27 }
  28
  29 static inline word Utolower(word x)
  30 {
  31   word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
  32   return w ? w : x;
  33 }
  34
  35 static inline word Uunaccent(word x)
  36 {
  37   word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
  38   return w ? w : x;
  39 }
  40
  41 #define UCat(x,y) (Ucategory(x) & (y))
  42
  43 #define Uupper(x) UCat(x, _C_UPPER)
  44 #define Ulower(x) UCat(x, _C_LOWER)
  45 #define Ualpha(x) UCat(x, _C_ALPHA)
  46 #define Ualnum(x) UCat(x, _C_ALNUM)
  47 #define Uprint(x) !Uctrl(x)
  48 #define Udigit(x) UCat(x, _C_DIGIT)
  49 #define Uxdigit(x) UCat(x, _C_XDIGIT)
  50 #define Uword(x) UCat(x, _C_WORD)
  51 #define Ublank(x) UCat(x, _C_BLANK)
  52 #define Uctrl(x) UCat(x, _C_CTRL)
  53 #define Uspace(x) Ublank(x)
  54
  55 #define UNI_REPLACEMENT 0xfffc
  56
  57 #define PUT_UTF8(p,u) do {              \
  58   if (u < 0x80)                         \
  59     *p++ = u;                           \
  60   else if (u < 0x800)                   \
  61     {                                   \
  62       *p++ = 0xc0 | (u >> 6);           \
  63       *p++ = 0x80 | (u & 0x3f);         \
  64     }                                   \
  65   else                                  \
  66     {                                   \
  67       *p++ = 0xe0 | (u >> 12);          \
  68       *p++ = 0x80 | ((u >> 6) & 0x3f);  \
  69       *p++ = 0x80 | (u & 0x3f);         \
  70     }                                   \
  71   } while(0)
  72
  73 #define IS_UTF8(c) ((c) >= 0xc0)
  74
  75 #define GET_UTF8_CHAR(p,u) do {         \
  76     if (*p >= 0xf0)                     \
  77       { /* Too large, use replacement char */   \
  78         p++;                            \
  79         while ((*p & 0xc0) == 0x80)     \
  80           p++;                          \
  81         u = UNI_REPLACEMENT;            \
  82       }                                 \
  83     else if (*p >= 0xe0)                \
  84       {                                 \
  85         u = *p++ & 0x0f;                \
  86         if ((*p & 0xc0) == 0x80)        \
  87           u = (u << 6) | (*p++ & 0x3f); \
  88         if ((*p & 0xc0) == 0x80)        \
  89           u = (u << 6) | (*p++ & 0x3f); \
  90       }                                 \
  91     else                                \
  92       {                                 \
  93         u = *p++ & 0x1f;                \
  94         if ((*p & 0xc0) == 0x80)        \
  95           u = (u << 6) | (*p++ & 0x3f); \
  96       }                                 \
  97   } while (0)                           \
  98
  99 #define GET_UTF8(p,u)                   \
 100     if (IS_UTF8(*p))                    \
 101       GET_UTF8_CHAR(p,u);               \
 102     else                                \
 103       u = *p++
 104
 105 #define UTF8_SKIP(p) do {               \
 106     if (*p++ >= 0xc0)                   \
 107       while (*p >= 0x80 && *p < 0xc0)   \
 108         p++;                            \
 109   } while (0)
 110
 111 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
 112
 113 uns ucs2_to_utf8(byte *, word *);
 114 uns utf8_to_ucs2(word *, byte *);
 115 byte *static_ucs2_to_utf8(word *);
 116 uns Ustrlen(word *);
 117
 118 #endif