X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=charset%2Funistream.h;h=66680ba42a6dc4e2eef2ed1097125861a0741645;hb=d9820b2d7d4b908bb9da441f5a35a7999d701e60;hp=5483d8097b22f8b509da87605922e85a271a60f5;hpb=dd79339e61140f94f3602e8bbdee78e838f9090c;p=libucw.git diff --git a/charset/unistream.h b/charset/unistream.h index 5483d809..66680ba4 100644 --- a/charset/unistream.h +++ b/charset/unistream.h @@ -1,72 +1,42 @@ /* * The UniCode Library: Reading and writing of UTF-8 on Fastbuf Streams * - * (c) 2001 Martin Mares + * (c) 2001--2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. */ #ifndef _UNISTREAM_H #define _UNISTREAM_H -/* FIXME: Do these need to be inline? */ +#include "charset/unicode.h" + +int bget_utf8_slow(struct fastbuf *b); +void bput_utf8_slow(struct fastbuf *b, uns u); static inline int bget_utf8(struct fastbuf *b) { - int c = bgetc(b); - int code; + uns u; - if (c < 0x80) /* Includes EOF */ - return c; - if (c < 0xc0) /* Incorrect combination */ - return UNI_REPLACEMENT; - if (c >= 0xf0) /* Too large, skip it */ - { - while ((c = bgetc(b)) >= 0x80 && c < 0xc0) - ; - goto wrong; - } - if (c >= 0xe0) /* 3 bytes */ - { - code = c & 0x0f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - else /* 2 bytes */ + if (b->bptr + 5 <= b->bufend) { - code = c & 0x1f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); + GET_UTF8(b->bptr, u); + return u; } - return code; - - wrong: - if (c >= 0) - bungetc(b, c); - return UNI_REPLACEMENT; + else + return bget_utf8_slow(b); } static inline void bput_utf8(struct fastbuf *b, uns u) { ASSERT(u < 65536); - if (u < 0x80) - bputc(b, u); + if (b->bptr + 5 <= b->bufend) + PUT_UTF8(b->bptr, u); else - { - if (u < 0x800) - bputc(b, 0xc0 | (u >> 6)); - else - { - bputc(b, 0xe0 | (u >> 12)); - bputc(b, 0x80 | ((u >> 6) & 0x3f)); - } - bputc(b, 0x80 | (u & 0x3f)); - } + bput_utf8_slow(b, u); } #endif