From: Pavel Charvat Date: Mon, 10 Dec 2007 10:23:34 +0000 (+0100) Subject: UCW: Added UTF-8 routines with custom replacement char (lib/ff-unicode.h). X-Git-Tag: holmes-import~492 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=10f1d0ab666c28cf8aeca9c04a254af5c6ed6b22;p=libucw.git UCW: Added UTF-8 routines with custom replacement char (lib/ff-unicode.h). --- diff --git a/lib/ff-unicode.c b/lib/ff-unicode.c index 7f46aaab..69a5247b 100644 --- a/lib/ff-unicode.c +++ b/lib/ff-unicode.c @@ -14,7 +14,7 @@ #include "lib/ff-unicode.h" int -bget_utf8_slow(struct fastbuf *b) +bget_utf8_slow(struct fastbuf *b, uns repl) { int c = bgetc(b); int code; @@ -22,7 +22,7 @@ bget_utf8_slow(struct fastbuf *b) if (c < 0x80) /* Includes EOF */ return c; if (c < 0xc0) /* Incorrect combination */ - return UNI_REPLACEMENT; + return repl; if (c >= 0xf0) /* Too large, skip it */ { while ((c = bgetc(b)) >= 0x80 && c < 0xc0) @@ -51,11 +51,11 @@ bget_utf8_slow(struct fastbuf *b) wrong: if (c >= 0) bungetc(b); - return UNI_REPLACEMENT; + return repl; } int -bget_utf8_32_slow(struct fastbuf *b) +bget_utf8_32_slow(struct fastbuf *b, uns repl) { int c = bgetc(b); int code; @@ -64,7 +64,7 @@ bget_utf8_32_slow(struct fastbuf *b) if (c < 0x80) /* Includes EOF */ return c; if (c < 0xc0) /* Incorrect combination */ - return UNI_REPLACEMENT; + return repl; if (c < 0xe0) { code = c & 0x1f; @@ -107,7 +107,7 @@ bget_utf8_32_slow(struct fastbuf *b) wrong: if (c >= 0) bungetc(b); - return UNI_REPLACEMENT; + return repl; } void diff --git a/lib/ff-unicode.h b/lib/ff-unicode.h index 31510ff4..e3838b1c 100644 --- a/lib/ff-unicode.h +++ b/lib/ff-unicode.h @@ -14,47 +14,57 @@ #include "lib/fastbuf.h" #include "lib/unicode.h" -int bget_utf8_slow(struct fastbuf *b); -int bget_utf8_32_slow(struct fastbuf *b); +int bget_utf8_slow(struct fastbuf *b, uns repl); +int bget_utf8_32_slow(struct fastbuf *b, uns repl); void bput_utf8_slow(struct fastbuf *b, uns u); void bput_utf8_32_slow(struct fastbuf *b, uns u); static inline int -bget_utf8(struct fastbuf *b) +bget_utf8_repl(struct fastbuf *b, uns repl) { uns u; - if (bavailr(b) >= 3) { - GET_UTF8(b->bptr, u); + b->bptr = utf8_get_repl(b->bptr, &u, repl); return u; } else - return bget_utf8_slow(b); + return bget_utf8_slow(b, repl); } -static inline void -bput_utf8(struct fastbuf *b, uns u) +static inline int +bget_utf8_32_repl(struct fastbuf *b, uns repl) { - ASSERT(u < 65536); - if (bavailw(b) >= 3) - PUT_UTF8(b->bptr, u); + uns u; + if (bavailr(b) >= 6) + { + b->bptr = utf8_32_get_repl(b->bptr, &u, repl); + return u; + } else - bput_utf8_slow(b, u); + return bget_utf8_32_slow(b, repl); +} + +static inline int +bget_utf8(struct fastbuf *b) +{ + return bget_utf8_repl(b, UNI_REPLACEMENT); } static inline int bget_utf8_32(struct fastbuf *b) { - uns u; + return bget_utf8_32_repl(b, UNI_REPLACEMENT); +} - if (bavailr(b) >= 6) - { - GET_UTF8_32(b->bptr, u); - return u; - } +static inline void +bput_utf8(struct fastbuf *b, uns u) +{ + ASSERT(u < 65536); + if (bavailw(b) >= 3) + b->bptr = utf8_put(b->bptr, u); else - return bget_utf8_32_slow(b); + bput_utf8_slow(b, u); } static inline void @@ -62,7 +72,7 @@ bput_utf8_32(struct fastbuf *b, uns u) { ASSERT(u < (1U<<31)); if (bavailw(b) >= 6) - PUT_UTF8_32(b->bptr, u); + b->bptr = utf8_32_put(b->bptr, u); else bput_utf8_32_slow(b, u); }