From: Martin Mares Date: Tue, 18 Jun 2002 17:37:18 +0000 (+0000) Subject: UTF8_SKIP now recognizes the real end of the UTF-8 character X-Git-Tag: holmes-import~1397 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=d7a9c8cf5648a81d098010ea75b1a1f158f97ebb;p=libucw.git UTF8_SKIP now recognizes the real end of the UTF-8 character and doesn't get confused by garbage after it. --- diff --git a/charset/unicode.h b/charset/unicode.h index f12fff1a..0895e89e 100644 --- a/charset/unicode.h +++ b/charset/unicode.h @@ -102,10 +102,11 @@ static inline word Uunaccent(word x) else \ u = *p++ -#define UTF8_SKIP(p) do { \ - if (*p++ >= 0xc0) \ - while (*p >= 0x80 && *p < 0xc0) \ - p++; \ +#define UTF8_SKIP(p) do { \ + uns c = *p++; \ + if (c >= 0xc0) \ + while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \ + p++, c <<= 1; \ } while (0) #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)