From: Pavel Charvat Date: Mon, 10 Dec 2007 10:33:29 +0000 (+0100) Subject: UCW: Added bget_utf16_{le,be} routines. X-Git-Tag: holmes-import~490 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=318592e5f0f42161924da92b48cf85c6152ee6ab;p=libucw.git UCW: Added bget_utf16_{le,be} routines. --- diff --git a/lib/ff-unicode.c b/lib/ff-unicode.c index 69a5247b..047fff84 100644 --- a/lib/ff-unicode.c +++ b/lib/ff-unicode.c @@ -12,6 +12,7 @@ #include "lib/fastbuf.h" #include "lib/unicode.h" #include "lib/ff-unicode.h" +#include "lib/ff-binary.h" int bget_utf8_slow(struct fastbuf *b, uns repl) @@ -165,3 +166,33 @@ bput_utf8_32_slow(struct fastbuf *b, uns u) bputc(b, 0x80 | (u & 0x3f)); } } + +int +bget_utf16_be_slow(struct fastbuf *b, uns repl) +{ + if (bpeekc(b) < 0) + return -1; + uns u = bgetw_be(b), x, y; + if ((int)u < 0) + return repl; + if ((x = u - 0xd800) >= 0x800) + return u; + if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400) + return repl; + return 0x10000 + (x << 10) + y; +} + +int +bget_utf16_le_slow(struct fastbuf *b, uns repl) +{ + if (bpeekc(b) < 0) + return -1; + uns u = bgetw_le(b), x, y; + if ((int)u < 0) + return repl; + if ((x = u - 0xd800) >= 0x800) + return u; + if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400) + return repl; + return 0x10000 + (x << 10) + y; +} diff --git a/lib/ff-unicode.h b/lib/ff-unicode.h index e3838b1c..f577c635 100644 --- a/lib/ff-unicode.h +++ b/lib/ff-unicode.h @@ -18,6 +18,8 @@ int bget_utf8_slow(struct fastbuf *b, uns repl); int bget_utf8_32_slow(struct fastbuf *b, uns repl); void bput_utf8_slow(struct fastbuf *b, uns u); void bput_utf8_32_slow(struct fastbuf *b, uns u); +int bget_utf16_be_slow(struct fastbuf *b, uns repl); +int bget_utf16_le_slow(struct fastbuf *b, uns repl); static inline int bget_utf8_repl(struct fastbuf *b, uns repl) @@ -77,4 +79,42 @@ bput_utf8_32(struct fastbuf *b, uns u) bput_utf8_32_slow(b, u); } +static inline int +bget_utf16_be_repl(struct fastbuf *b, uns repl) +{ + uns u; + if (bavailr(b) >= 4) + { + b->bptr = utf16_be_get_repl(b->bptr, &u, repl); + return u; + } + else + return bget_utf16_be_slow(b, repl); +} + +static inline int +bget_utf16_le_repl(struct fastbuf *b, uns repl) +{ + uns u; + if (bavailr(b) >= 4) + { + b->bptr = utf16_le_get_repl(b->bptr, &u, repl); + return u; + } + else + return bget_utf16_le_slow(b, repl); +} + +static inline int +bget_utf16_be(struct fastbuf *b) +{ + return bget_utf16_be_repl(b, UNI_REPLACEMENT); +} + +static inline int +bget_utf16_le(struct fastbuf *b) +{ + return bget_utf16_le_repl(b, UNI_REPLACEMENT); +} + #endif diff --git a/sherlock/xml/xml.c b/sherlock/xml/xml.c index b365c683..73b3a92f 100644 --- a/sherlock/xml/xml.c +++ b/sherlock/xml/xml.c @@ -110,64 +110,6 @@ xml_char_cat(uns c) return 1; } -/*** Generic UTF decoding ***/ - -static uns -bget_utf16_le_slow(struct fastbuf *fb, uns repl) -{ - if ((int)bpeekc(fb) < 0) - return ~0U; - uns u = bgetw_le(fb), x, y; - if ((int)u < 0) - return repl; - if ((x = u - 0xd800) >= 0x800) - return u; - if (x >= 0x400 || (int)bpeekc(fb) < 0 || (y = bgetw_le(fb) - 0xdc00) >= 0x400) - return repl; - return 0x10000 + (x << 10) + y; -} - -static uns -bget_utf16_be_slow(struct fastbuf *fb, uns repl) -{ - if ((int)bpeekc(fb) < 0) - return ~0U; - uns u = bgetw_be(fb), x, y; - if ((int)u < 0) - return repl; - if ((x = u - 0xd800) >= 0x800) - return u; - if (x >= 0x400 || (int)bpeekc(fb) < 0 || (y = bgetw_be(fb) - 0xdc00) >= 0x400) - return repl; - return 0x10000 + (x << 10) + y; -} - -static inline uns -bget_utf16_le_repl(struct fastbuf *fb, uns repl) -{ - uns u; - if (bavailr(fb) >= 4) - { - fb->bptr = utf16_le_get_repl(fb->bptr, &u, repl); - return u; - } - else - return bget_utf16_le_slow(fb, repl); -} - -static inline uns -bget_utf16_be_repl(struct fastbuf *fb, uns repl) -{ - uns u; - if (bavailr(fb) >= 4) - { - fb->bptr = utf16_be_get_repl(fb->bptr, &u, repl); - return u; - } - else - return bget_utf16_be_slow(fb, repl); -} - /*** Memory management ***/ static void NONRET