]> mj.ucw.cz Git - libucw.git/commitdiff
UTF8_SKIP now recognizes the real end of the UTF-8 character
authorMartin Mares <mj@ucw.cz>
Tue, 18 Jun 2002 17:37:18 +0000 (17:37 +0000)
committerMartin Mares <mj@ucw.cz>
Tue, 18 Jun 2002 17:37:18 +0000 (17:37 +0000)
and doesn't get confused by garbage after it.

charset/unicode.h

index f12fff1add89fb3843bb99492358cacc224a84be..0895e89edbf179f0e9e49fe9e7d94bdf8c837c48 100644 (file)
@@ -102,10 +102,11 @@ static inline word Uunaccent(word x)
     else                               \
       u = *p++
 
-#define UTF8_SKIP(p) do {              \
-    if (*p++ >= 0xc0)                  \
-      while (*p >= 0x80 && *p < 0xc0)  \
-        p++;                           \
+#define UTF8_SKIP(p) do {                              \
+    uns c = *p++;                                      \
+    if (c >= 0xc0)                                     \
+      while (c & 0x40 && *p >= 0x80 && *p < 0xc0)      \
+        p++, c <<= 1;                                  \
   } while (0)
 
 #define UTF8_SPACE(u) ((u) < 0x80 ? 1 : (u) < 0x800 ? 2 : 3)