X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=charset%2Funistream.h;h=3eff996950f8e3821ecaacbf58d041ba3f43048e;hb=8a67ac6bc2e78c422f511af0d8a1dcd58f5cabac;hp=5483d8097b22f8b509da87605922e85a271a60f5;hpb=dd79339e61140f94f3602e8bbdee78e838f9090c;p=libucw.git diff --git a/charset/unistream.h b/charset/unistream.h index 5483d809..3eff9969 100644 --- a/charset/unistream.h +++ b/charset/unistream.h @@ -7,66 +7,31 @@ #ifndef _UNISTREAM_H #define _UNISTREAM_H -/* FIXME: Do these need to be inline? */ +int bget_utf8_slow(struct fastbuf *b); +void bput_utf8_slow(struct fastbuf *b, uns u); static inline int bget_utf8(struct fastbuf *b) { - int c = bgetc(b); - int code; + uns u; - if (c < 0x80) /* Includes EOF */ - return c; - if (c < 0xc0) /* Incorrect combination */ - return UNI_REPLACEMENT; - if (c >= 0xf0) /* Too large, skip it */ + if (b->bptr + 5 <= b->bufend) { - while ((c = bgetc(b)) >= 0x80 && c < 0xc0) - ; - goto wrong; + GET_UTF8(b->bptr, u); + return u; } - if (c >= 0xe0) /* 3 bytes */ - { - code = c & 0x0f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - else /* 2 bytes */ - { - code = c & 0x1f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - return code; - - wrong: - if (c >= 0) - bungetc(b, c); - return UNI_REPLACEMENT; + else + return bget_utf8_slow(b); } static inline void bput_utf8(struct fastbuf *b, uns u) { ASSERT(u < 65536); - if (u < 0x80) - bputc(b, u); + if (b->bptr + 5 <= b->bufend) + PUT_UTF8(b->bptr, u); else - { - if (u < 0x800) - bputc(b, 0xc0 | (u >> 6)); - else - { - bputc(b, 0xe0 | (u >> 12)); - bputc(b, 0x80 | ((u >> 6) & 0x3f)); - } - bputc(b, 0x80 | (u & 0x3f)); - } + bput_utf8_slow(b, u); } #endif