2 * The UniCode Character Categorizer
4 * (c) 1997--2004 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
10 #ifndef _CHARSET_UNICAT_H
11 #define _CHARSET_UNICAT_H
13 #ifdef CONFIG_UCW_CLEAN_ABI
14 #define Uexpand_lig ucw_Uexpand_lig
17 extern const byte *_U_cat[];
18 extern const u16 *_U_upper[], *_U_lower[], *_U_unaccent[];
20 static inline uns Ucategory(uns x)
23 return _U_cat[x >> 8U][x & 0xff];
28 static inline uns Utoupper(uns x)
30 uns w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0;
34 static inline uns Utolower(uns x)
36 uns w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0;
40 static inline uns Uunaccent(uns x)
42 uns w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0;
46 extern const u16 *Uexpand_lig(uns x);
48 enum unicode_char_type {
49 _U_LETTER = 1, /* Letters */
50 _U_UPPER = 2, /* Upper-case letters */
51 _U_LOWER = 4, /* Lower-case letters */
52 _U_CTRL = 8, /* Control characters */
53 _U_DIGIT = 16, /* Digits */
54 _U_XDIGIT = 32, /* Hexadecimal digits */
55 _U_SPACE = 64, /* White spaces (spaces, tabs, newlines) */
56 _U_LIGATURE = 128, /* Compatibility ligature (to be expanded) */
59 #define _U_LUPPER (_U_LETTER | _U_UPPER)
60 #define _U_LLOWER (_U_LETTER | _U_LOWER)
62 #define UCat(x,y) (Ucategory(x) & (y))
64 #define Ualpha(x) UCat(x, _U_LETTER)
65 #define Uupper(x) UCat(x, _U_UPPER)
66 #define Ulower(x) UCat(x, _U_LOWER)
67 #define Udigit(x) UCat(x, _U_DIGIT)
68 #define Uxdigit(x) UCat(x, (_U_DIGIT | _U_XDIGIT))
69 #define Ualnum(x) UCat(x, (_U_LETTER | _U_DIGIT))
70 #define Uctrl(x) UCat(x, _U_CTRL)
71 #define Uprint(x) !Uctrl(x)
72 #define Uspace(x) UCat(x, _U_SPACE)