From 0c4c92ea6a27b21551bd17a2a84a30e568cda660 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sat, 10 Jul 2004 20:36:55 +0000 Subject: [PATCH] Unicode character type functions moved to charset/unicat.h charset/unicode.h removed, because it's now empty. --- charset/unicat.h | 70 +++++++++++++++++++++++ charset/unicode.h | 138 ---------------------------------------------- 2 files changed, 70 insertions(+), 138 deletions(-) create mode 100644 charset/unicat.h delete mode 100644 charset/unicode.h diff --git a/charset/unicat.h b/charset/unicat.h new file mode 100644 index 00000000..1ddfb8e6 --- /dev/null +++ b/charset/unicat.h @@ -0,0 +1,70 @@ +/* + * The UniCode Character Categorizer + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UNICAT_H +#define _UNICAT_H + +extern const byte *_U_cat[]; +extern const word *_U_upper[], *_U_lower[], *_U_unaccent[]; + +static inline uns Ucategory(uns x) +{ + if (_U_cat[x >> 8U]) + return _U_cat[x >> 8U][x & 0xff]; + else + return 0; +} + +static inline uns Utoupper(uns x) +{ + word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0; + return w ? w : x; +} + +static inline uns Utolower(uns x) +{ + word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0; + return w ? w : x; +} + +static inline uns Uunaccent(uns x) +{ + word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0; + return w ? w : x; +} + +extern const word *Uexpand_lig(uns x); + +enum unicode_char_type { + _U_LETTER = 1, /* Letters */ + _U_UPPER = 2, /* Upper-case letters */ + _U_LOWER = 4, /* Lower-case letters */ + _U_CTRL = 8, /* Control characters */ + _U_DIGIT = 16, /* Digits */ + _U_XDIGIT = 32, /* Hexadecimal digits */ + _U_SPACE = 64, /* White spaces (spaces, tabs, newlines) */ + _U_LIGATURE = 128, /* Compatibility ligature (to be expanded) */ +}; + +#define _U_LUPPER (_U_LETTER | _U_UPPER) +#define _U_LLOWER (_U_LETTER | _U_LOWER) + +#define UCat(x,y) (Ucategory(x) & (y)) + +#define Ualpha(x) UCat(x, _U_LETTER) +#define Uupper(x) UCat(x, _U_UPPER) +#define Ulower(x) UCat(x, _U_LOWER) +#define Udigit(x) UCat(x, _U_DIGIT) +#define Uxdigit(x) UCat(x, (_U_DIGIT | _U_XDIGIT)) +#define Ualnum(x) UCat(x, (_U_LETTER | _U_DIGIT)) +#define Uctrl(x) UCat(x, _U_CTRL) +#define Uprint(x) !Uctrl(x) +#define Uspace(x) UCat(x, _U_SPACE) + +#endif diff --git a/charset/unicode.h b/charset/unicode.h deleted file mode 100644 index 7bc6f235..00000000 --- a/charset/unicode.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * The UniCode Library - * - * (c) 1997--2003 Martin Mares - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UNICODE_H -#define _UNICODE_H - -extern const byte *_U_cat[]; -extern const word *_U_upper[], *_U_lower[], *_U_unaccent[]; - -static inline uns Ucategory(uns x) -{ - if (_U_cat[x >> 8U]) - return _U_cat[x >> 8U][x & 0xff]; - else - return 0; -} - -static inline uns Utoupper(uns x) -{ - word w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0; - return w ? w : x; -} - -static inline uns Utolower(uns x) -{ - word w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0; - return w ? w : x; -} - -static inline uns Uunaccent(uns x) -{ - word w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0; - return w ? w : x; -} - -extern const word *Uexpand_lig(uns x); - -enum unicode_char_type { - _U_LETTER = 1, /* Letters */ - _U_UPPER = 2, /* Upper-case letters */ - _U_LOWER = 4, /* Lower-case letters */ - _U_CTRL = 8, /* Control characters */ - _U_DIGIT = 16, /* Digits */ - _U_XDIGIT = 32, /* Hexadecimal digits */ - _U_SPACE = 64, /* White spaces (spaces, tabs, newlines) */ - _U_LIGATURE = 128, /* Compatibility ligature (to be expanded) */ -}; - -#define _U_LUPPER (_U_LETTER | _U_UPPER) -#define _U_LLOWER (_U_LETTER | _U_LOWER) - -#define UCat(x,y) (Ucategory(x) & (y)) - -#define Ualpha(x) UCat(x, _U_LETTER) -#define Uupper(x) UCat(x, _U_UPPER) -#define Ulower(x) UCat(x, _U_LOWER) -#define Udigit(x) UCat(x, _U_DIGIT) -#define Uxdigit(x) UCat(x, (_U_DIGIT | _U_XDIGIT)) -#define Ualnum(x) UCat(x, (_U_LETTER | _U_DIGIT)) -#define Uctrl(x) UCat(x, _U_CTRL) -#define Uprint(x) !Uctrl(x) -#define Uspace(x) UCat(x, _U_SPACE) - -#define UNI_REPLACEMENT 0xfffc - -#define PUT_UTF8(p,u) do { \ - if (u < 0x80) \ - *p++ = u; \ - else if (u < 0x800) \ - { \ - *p++ = 0xc0 | (u >> 6); \ - *p++ = 0x80 | (u & 0x3f); \ - } \ - else \ - { \ - *p++ = 0xe0 | (u >> 12); \ - *p++ = 0x80 | ((u >> 6) & 0x3f); \ - *p++ = 0x80 | (u & 0x3f); \ - } \ - } while(0) - -#define IS_UTF8(c) ((c) >= 0xc0) - -#define GET_UTF8_CHAR(p,u) do { \ - if (*p >= 0xf0) \ - { /* Too large, use replacement char */ \ - p++; \ - while ((*p & 0xc0) == 0x80) \ - p++; \ - u = UNI_REPLACEMENT; \ - } \ - else if (*p >= 0xe0) \ - { \ - u = *p++ & 0x0f; \ - if ((*p & 0xc0) == 0x80) \ - u = (u << 6) | (*p++ & 0x3f); \ - if ((*p & 0xc0) == 0x80) \ - u = (u << 6) | (*p++ & 0x3f); \ - } \ - else \ - { \ - u = *p++ & 0x1f; \ - if ((*p & 0xc0) == 0x80) \ - u = (u << 6) | (*p++ & 0x3f); \ - } \ - } while (0) \ - -#define GET_UTF8(p,u) \ - if (IS_UTF8(*p)) \ - GET_UTF8_CHAR(p,u); \ - else \ - u = *p++ - -#define UTF8_SKIP(p) do { \ - uns c = *p++; \ - if (c >= 0xc0) \ - while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \ - p++, c <<= 1; \ - } while (0) - -#define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80) - -#define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3) - -uns ucs2_to_utf8(byte *, word *); -uns utf8_to_ucs2(word *, byte *); -byte *static_ucs2_to_utf8(word *); -uns Ustrlen(word *); -uns utf8_strlen(byte *str); -uns utf8_strnlen(byte *str, uns n); - -#endif -- 2.39.2