From: Martin Mares Date: Sat, 10 Jul 2004 20:35:31 +0000 (+0000) Subject: Moved the basic Unicode and UTF-8 functions to the main library. X-Git-Tag: holmes-import~957 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=f528738ecc582e648fb5ab97fc579b8d14327175;p=libucw.git Moved the basic Unicode and UTF-8 functions to the main library. --- diff --git a/lib/Makefile b/lib/Makefile index a3b5b805..5297668f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,8 @@ LIBSH_MODS=alloc alloc_str ctmatch db fastbuf fb-file fb-mem lists \ finger proctitle ipaccess profile bitsig randomkey \ hashfunc base64 base224 fb-temp fb-mmap fb-printf urlkey \ partmap fb-limfd fb-buffer mainloop exitstatus runcmd carefulio \ - lizard lizard-safe sighandler buck2obj obj2buck + lizard lizard-safe sighandler buck2obj obj2buck \ + unicode-utf8 ff-utf8 ifdef CONFIG_OWN_REGEX include lib/regex/Makefile diff --git a/lib/unicode-utf8.c b/lib/unicode-utf8.c new file mode 100644 index 00000000..5a9d1bfc --- /dev/null +++ b/lib/unicode-utf8.c @@ -0,0 +1,37 @@ +/* + * Sherlock Library -- UTF-8 Functions + * + * (c) 1997--2004 Martin Mares + * (c) 2003 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/unicode.h" + +uns +utf8_strlen(byte *str) +{ + uns len = 0; + while (*str) + { + UTF8_SKIP(str); + len++; + } + return len; +} + +uns +utf8_strnlen(byte *str, uns n) +{ + uns len = 0; + byte *end = str + n; + while (str < end) + { + UTF8_SKIP(str); + len++; + } + return len; +} diff --git a/lib/unicode.h b/lib/unicode.h new file mode 100644 index 00000000..ca8bdea3 --- /dev/null +++ b/lib/unicode.h @@ -0,0 +1,81 @@ +/* + * Sherlock Library -- Unicode Characters + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UNICODE_H +#define _UNICODE_H + +/* Macros for handling UTF-8 */ + +#define UNI_REPLACEMENT 0xfffc + +#define PUT_UTF8(p,u) do { \ + if (u < 0x80) \ + *p++ = u; \ + else if (u < 0x800) \ + { \ + *p++ = 0xc0 | (u >> 6); \ + *p++ = 0x80 | (u & 0x3f); \ + } \ + else \ + { \ + *p++ = 0xe0 | (u >> 12); \ + *p++ = 0x80 | ((u >> 6) & 0x3f); \ + *p++ = 0x80 | (u & 0x3f); \ + } \ + } while(0) + +#define IS_UTF8(c) ((c) >= 0xc0) + +#define GET_UTF8_CHAR(p,u) do { \ + if (*p >= 0xf0) \ + { /* Too large, use replacement char */ \ + p++; \ + while ((*p & 0xc0) == 0x80) \ + p++; \ + u = UNI_REPLACEMENT; \ + } \ + else if (*p >= 0xe0) \ + { \ + u = *p++ & 0x0f; \ + if ((*p & 0xc0) == 0x80) \ + u = (u << 6) | (*p++ & 0x3f); \ + if ((*p & 0xc0) == 0x80) \ + u = (u << 6) | (*p++ & 0x3f); \ + } \ + else \ + { \ + u = *p++ & 0x1f; \ + if ((*p & 0xc0) == 0x80) \ + u = (u << 6) | (*p++ & 0x3f); \ + } \ + } while (0) \ + +#define GET_UTF8(p,u) \ + if (IS_UTF8(*p)) \ + GET_UTF8_CHAR(p,u); \ + else \ + u = *p++ + +#define UTF8_SKIP(p) do { \ + uns c = *p++; \ + if (c >= 0xc0) \ + while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \ + p++, c <<= 1; \ + } while (0) + +#define UTF8_SKIP_BWD(p) while ((--*(p) & 0xc0) == 0x80) + +#define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3) + +/* unicode-utf8.c */ + +uns utf8_strlen(byte *str); +uns utf8_strnlen(byte *str, uns n); + +#endif