X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fff-utf8.c;h=a7e40d3c0cf35bd177a0e33d68a5bf29845d37f8;hb=c5fbc7b75705d1f7a322ad73e6055284a3b94e73;hp=f55719b1ada99c02811bee8d2da1141912f9d89b;hpb=414288b47086de5aafa715dd9c794f62f94f8ab2;p=libucw.git diff --git a/lib/ff-utf8.c b/lib/ff-utf8.c index f55719b1..a7e40d3c 100644 --- a/lib/ff-utf8.c +++ b/lib/ff-utf8.c @@ -1,7 +1,8 @@ /* - * Sherlock Library: Reading and writing of UTF-8 on Fastbuf Streams + * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams * * (c) 2001--2004 Martin Mares + * (c) 2004 Robert Spalek * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. @@ -53,6 +54,62 @@ bget_utf8_slow(struct fastbuf *b) return UNI_REPLACEMENT; } +int +bget_utf8_32_slow(struct fastbuf *b) +{ + int c = bgetc(b); + int code; + int nr; + + if (c < 0x80) /* Includes EOF */ + return c; + if (c < 0xc0) /* Incorrect combination */ + return UNI_REPLACEMENT; + if (c < 0xe0) + { + code = c & 0x1f; + nr = 1; + } + else if (c < 0xf0) + { + code = c & 0x0f; + nr = 2; + } + else if (c < 0xf8) + { + code = c & 0x07; + nr = 3; + } + else if (c < 0xfc) + { + code = c & 0x03; + nr = 4; + } + else if (c < 0xfe) + { + code = c & 0x01; + nr = 5; + } + else /* Too large, skip it */ + { + while ((c = bgetc(b)) >= 0x80 && c < 0xc0) + ; + goto wrong; + } + while (nr-- > 0) + { + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + return code; + + wrong: + if (c >= 0) + bungetc(b); + return UNI_REPLACEMENT; +} + void bput_utf8_slow(struct fastbuf *b, uns u) { @@ -71,3 +128,40 @@ bput_utf8_slow(struct fastbuf *b, uns u) bputc(b, 0x80 | (u & 0x3f)); } } + +void +bput_utf8_32_slow(struct fastbuf *b, uns u) +{ + ASSERT(u < (1U<<31)); + if (u < 0x80) + bputc(b, u); + else + { + if (u < 0x800) + bputc(b, 0xc0 | (u >> 6)); + else + { + if (u < (1<<16)) + bputc(b, 0xe0 | (u >> 12)); + else + { + if (u < (1<<21)) + bputc(b, 0xf0 | (u >> 18)); + else + { + if (u < (1<<26)) + bputc(b, 0xf8 | (u >> 24)); + else + { + bputc(b, 0xfc | (u >> 30)); + bputc(b, 0x80 | ((u >> 24) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 18) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 12) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 6) & 0x3f)); + } + bputc(b, 0x80 | (u & 0x3f)); + } +}