From d7a9c8cf5648a81d098010ea75b1a1f158f97ebb Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Tue, 18 Jun 2002 17:37:18 +0000
Subject: [PATCH] UTF8_SKIP now recognizes the real end of the UTF-8 character
 and doesn't get confused by garbage after it.

---
 charset/unicode.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/charset/unicode.h b/charset/unicode.h
index f12fff1a..0895e89e 100644
--- a/charset/unicode.h
+++ b/charset/unicode.h
@@ -102,10 +102,11 @@ static inline word Uunaccent(word x)
     else				\
       u = *p++
 
-#define UTF8_SKIP(p) do {		\
-    if (*p++ >= 0xc0)			\
-      while (*p >= 0x80 && *p < 0xc0)	\
-        p++; 				\
+#define UTF8_SKIP(p) do {				\
+    uns c = *p++;					\
+    if (c >= 0xc0)					\
+      while (c & 0x40 && *p >= 0x80 && *p < 0xc0)	\
+        p++, c <<= 1;					\
   } while (0)
 
 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)
-- 
2.39.5