From d7a9c8cf5648a81d098010ea75b1a1f158f97ebb Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 18 Jun 2002 17:37:18 +0000 Subject: [PATCH] UTF8_SKIP now recognizes the real end of the UTF-8 character and doesn't get confused by garbage after it. --- charset/unicode.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/charset/unicode.h b/charset/unicode.h index f12fff1a..0895e89e 100644 --- a/charset/unicode.h +++ b/charset/unicode.h @@ -102,10 +102,11 @@ static inline word Uunaccent(word x) else \ u = *p++ -#define UTF8_SKIP(p) do { \ - if (*p++ >= 0xc0) \ - while (*p >= 0x80 && *p < 0xc0) \ - p++; \ +#define UTF8_SKIP(p) do { \ + uns c = *p++; \ + if (c >= 0xc0) \ + while (c & 0x40 && *p >= 0x80 && *p < 0xc0) \ + p++, c <<= 1; \ } while (0) #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3) -- 2.39.2