From e549923645c63b24629e3ce77f4583e12387de96 Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Thu, 28 Dec 2017 20:38:34 +0100 Subject: [PATCH] Revert "Unicode: Added reading of 32bit UTF-8 unicode values with protection against buffer overflow." This reverts commit 445e507caadb330b5ad640b3c5a357e6326c7855. --- ucw/unicode.h | 70 --------------------------------------------------- 1 file changed, 70 deletions(-) diff --git a/ucw/unicode.h b/ucw/unicode.h index 2cb1c02f..4ec1c6b2 100644 --- a/ucw/unicode.h +++ b/ucw/unicode.h @@ -89,7 +89,6 @@ put1: *p++ = 0x80 | (u & 0x3f); } #define UTF8_GET_NEXT if (unlikely((*p & 0xc0) != 0x80)) goto bad; u = (u << 6) | (*p++ & 0x3f) -#define UTF8_CHECK_AVAIL(n) if (unlikely(avail < n)) goto bad #define UTF8_CHECK_RANGE(r) if (unlikely(u < r)) goto bad /** @@ -185,75 +184,6 @@ bad: return (byte *)p; } -/** - * Decode a value from the range `[0, 0x7FFFFFFF]` - * or return @repl if the encoding has been corrupted. - * This function never reads behind @stop (including). - * At least one byte must be available (@stop > @p). - **/ -static inline byte *utf8_32_get_repl_safe(const byte *p, const byte *stop, uint *uu, uint repl) -{ - uint u = *p++; - if (u < 0x80) - goto ok; - else if (unlikely(u < 0xc0)) - goto bad; - uint limit; - size_t avail = stop - p; - if (u < 0xe0) - { - UTF8_CHECK_AVAIL(1); - u &= 0x1f; - limit = 0x80; - goto get1; - } - else if (u < 0xf0) - { - UTF8_CHECK_AVAIL(2); - u &= 0x0f; - limit = 0x800; - goto get2; - } - else if (u < 0xf8) - { - UTF8_CHECK_AVAIL(3); - u &= 0x07; - limit = 1 << 16; - goto get3; - } - else if (u < 0xfc) - { - UTF8_CHECK_AVAIL(4); - u &= 0x03; - limit = 1 << 21; - goto get4; - } - else if (u < 0xfe) - { - UTF8_CHECK_AVAIL(5); - u &= 0x01; - limit = 1 << 26; - UTF8_GET_NEXT; -get4: UTF8_GET_NEXT; -get3: UTF8_GET_NEXT; -get2: UTF8_GET_NEXT; -get1: UTF8_GET_NEXT; - if (unlikely(u < limit)) - goto bad; - } - else - goto bad; - -ok: - *uu = u; - return (byte *)p; - -bad: - /* Incorrect byte sequence */ - *uu = repl; - return (byte *)p; -} - /** * Decode a value from the range `[0, 0xFFFF]` (basic multilingual plane) * or return `UNI_REPLACEMENT` if the encoding has been corrupted. -- 2.39.5