charset/unicode.h

   1 /*
   2  *      The UniCode Library
   3  *
   4  *      (c) 1997 Martin Mares, <mj@atrey.karlin.mff.cuni.cz>
   5  */
   6
   7 #ifndef _UNICODE_H
   8 #define _UNICODE_H
   9
  10 #include <lib/config.h>
  11 #include <lib/string.h>
  12
  13 extern byte *_U_cat[], *_U_sig[];
  14 extern word *_U_upper[], *_U_lower[], *_U_unaccent[];
  15
  16 static inline uns Ucategory(word x)
  17 {
  18   if (_U_cat[x >> 8U])
  19     return _U_cat[x >> 8U][x & 0xff];
  20   else
  21     return 0;
  22 }
  23
  24 static inline word Utoupper(word x)
  25 {
  26   word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
  27   return w ? w : x;
  28 }
  29
  30 static inline word Utolower(word x)
  31 {
  32   word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
  33   return w ? w : x;
  34 }
  35
  36 static inline word Uunaccent(word x)
  37 {
  38   word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
  39   return w ? w : x;
  40 }
  41
  42 static inline byte Usig(word x)
  43 {
  44   if (_U_sig[x >> 8U])
  45     return _U_sig[x >> 8U][x & 0xff] ? : 0xff;
  46   else
  47     return 0xff;
  48 }
  49
  50 #define UCat(x,y) (Ucategory(x) & (y))
  51
  52 #define Uupper(x) UCat(x, _C_UPPER)
  53 #define Ulower(x) UCat(x, _C_LOWER)
  54 #define Ualpha(x) UCat(x, _C_ALPHA)
  55 #define Ualnum(x) UCat(x, _C_ALNUM)
  56 #define Uprint(x) !Uctrl(x)
  57 #define Udigit(x) UCat(x, _C_DIGIT)
  58 #define Uxdigit(x) UCat(x, _C_XDIGIT)
  59 #define Uword(x) UCat(x, _C_WORD)
  60 #define Ublank(x) UCat(x, _C_BLANK)
  61 #define Uctrl(x) UCat(x, _C_CTRL)
  62 #define Uspace(x) Ublank(x)
  63
  64 #define UNI_REPLACEMENT 0xfffc
  65
  66 #define PUT_UTF8(p,u) do {              \
  67   if (u < 0x80)                         \
  68     *p++ = u;                           \
  69   else if (u < 0x800)                   \
  70     {                                   \
  71       *p++ = 0xc0 | (u >> 6);           \
  72       *p++ = 0x80 | (u & 0x3f);         \
  73     }                                   \
  74   else                                  \
  75     {                                   \
  76       *p++ = 0xe0 | (u >> 12);          \
  77       *p++ = 0x80 | ((u >> 6) & 0x3f);  \
  78       *p++ = 0x80 | (u & 0x3f);         \
  79     }                                   \
  80   } while(0)
  81
  82 #define IS_UTF8(c) ((c) >= 0xc0)
  83
  84 #define GET_UTF8_CHAR(p,u) do {         \
  85     if (*p >= 0xf0)                     \
  86       { /* Too large, use replacement char */   \
  87         p++;                            \
  88         while ((*p & 0xc0) == 0x80)     \
  89           p++;                          \
  90         u = UNI_REPLACEMENT;            \
  91       }                                 \
  92     else if (*p >= 0xe0)                \
  93       {                                 \
  94         u = *p++ & 0x0f;                \
  95         if ((*p & 0xc0) == 0x80)        \
  96           u = (u << 6) | (*p++ & 0x3f); \
  97         if ((*p & 0xc0) == 0x80)        \
  98           u = (u << 6) | (*p++ & 0x3f); \
  99       }                                 \
 100     else                                \
 101       {                                 \
 102         u = *p++ & 0x1f;                \
 103         if ((*p & 0xc0) == 0x80)        \
 104           u = (u << 6) | (*p++ & 0x3f); \
 105       }                                 \
 106   } while (0)                           \
 107
 108 #define GET_UTF8(p,u)                   \
 109     if (IS_UTF8(*p))                    \
 110       GET_UTF8_CHAR(p,u);               \
 111     else                                \
 112       u = *p++
 113
 114 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
 115
 116 uns ucs2_to_utf8(byte *, word *);
 117 uns utf8_to_ucs2(word *, byte *);
 118 byte *static_ucs2_to_utf8(word *);
 119 uns Ustrlen(word *);
 120
 121 #endif