From: Martin Mares <mj@ucw.cz>
Date: Tue, 18 Jun 2002 17:37:18 +0000 (+0000)
Subject: UTF8_SKIP now recognizes the real end of the UTF-8 character
X-Git-Tag: holmes-import~1397
X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=d7a9c8cf5648a81d098010ea75b1a1f158f97ebb;p=libucw.git

UTF8_SKIP now recognizes the real end of the UTF-8 character
and doesn't get confused by garbage after it.
---

diff --git a/charset/unicode.h b/charset/unicode.h
index f12fff1a..0895e89e 100644
--- a/charset/unicode.h
+++ b/charset/unicode.h
@@ -102,10 +102,11 @@ static inline word Uunaccent(word x)
     else				\
       u = *p++
 
-#define UTF8_SKIP(p) do {		\
-    if (*p++ >= 0xc0)			\
-      while (*p >= 0x80 && *p < 0xc0)	\
-        p++; 				\
+#define UTF8_SKIP(p) do {				\
+    uns c = *p++;					\
+    if (c >= 0xc0)					\
+      while (c & 0x40 && *p >= 0x80 && *p < 0xc0)	\
+        p++, c <<= 1;					\
   } while (0)
 
 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)