From: Martin Mares Date: Tue, 27 Mar 2001 10:28:31 +0000 (+0000) Subject: Slow case of b(get|put)_utf8 no longer inline. X-Git-Tag: holmes-import~1504 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=0ba1850de076504dbf931928e709c4119f7bd02d;p=libucw.git Slow case of b(get|put)_utf8 no longer inline. --- diff --git a/charset/Makefile b/charset/Makefile index 6454e26f..40423122 100644 --- a/charset/Makefile +++ b/charset/Makefile @@ -3,7 +3,7 @@ DIRS+=charset UNI_OBJS=toupper.o tolower.o tocat.o utf8.o unaccent.o strlen.o debug.o \ - charconv.o setnames.o + charconv.o setnames.o unistream.o obj/charset/libcharset.a: $(addprefix obj/charset/,$(UNI_OBJS)) diff --git a/charset/unistream.c b/charset/unistream.c new file mode 100644 index 00000000..0e7bbb96 --- /dev/null +++ b/charset/unistream.c @@ -0,0 +1,70 @@ +/* + * The UniCode Library: Reading and writing of UTF-8 on Fastbuf Streams + * + * (c) 2001 Martin Mares + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "charset/unicode.h" +#include "charset/unistream.h" + +int +bget_utf8_slow(struct fastbuf *b) +{ + int c = bgetc(b); + int code; + + if (c < 0x80) /* Includes EOF */ + return c; + if (c < 0xc0) /* Incorrect combination */ + return UNI_REPLACEMENT; + if (c >= 0xf0) /* Too large, skip it */ + { + while ((c = bgetc(b)) >= 0x80 && c < 0xc0) + ; + goto wrong; + } + if (c >= 0xe0) /* 3 bytes */ + { + code = c & 0x0f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + else /* 2 bytes */ + { + code = c & 0x1f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + return code; + + wrong: + if (c >= 0) + bungetc(b, c); + return UNI_REPLACEMENT; +} + +void +bput_utf8_slow(struct fastbuf *b, uns u) +{ + ASSERT(u < 65536); + if (u < 0x80) + bputc(b, u); + else + { + if (u < 0x800) + bputc(b, 0xc0 | (u >> 6)); + else + { + bputc(b, 0xe0 | (u >> 12)); + bputc(b, 0x80 | ((u >> 6) & 0x3f)); + } + bputc(b, 0x80 | (u & 0x3f)); + } +} diff --git a/charset/unistream.h b/charset/unistream.h index 5483d809..3eff9969 100644 --- a/charset/unistream.h +++ b/charset/unistream.h @@ -7,66 +7,31 @@ #ifndef _UNISTREAM_H #define _UNISTREAM_H -/* FIXME: Do these need to be inline? */ +int bget_utf8_slow(struct fastbuf *b); +void bput_utf8_slow(struct fastbuf *b, uns u); static inline int bget_utf8(struct fastbuf *b) { - int c = bgetc(b); - int code; + uns u; - if (c < 0x80) /* Includes EOF */ - return c; - if (c < 0xc0) /* Incorrect combination */ - return UNI_REPLACEMENT; - if (c >= 0xf0) /* Too large, skip it */ + if (b->bptr + 5 <= b->bufend) { - while ((c = bgetc(b)) >= 0x80 && c < 0xc0) - ; - goto wrong; + GET_UTF8(b->bptr, u); + return u; } - if (c >= 0xe0) /* 3 bytes */ - { - code = c & 0x0f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - else /* 2 bytes */ - { - code = c & 0x1f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - return code; - - wrong: - if (c >= 0) - bungetc(b, c); - return UNI_REPLACEMENT; + else + return bget_utf8_slow(b); } static inline void bput_utf8(struct fastbuf *b, uns u) { ASSERT(u < 65536); - if (u < 0x80) - bputc(b, u); + if (b->bptr + 5 <= b->bufend) + PUT_UTF8(b->bptr, u); else - { - if (u < 0x800) - bputc(b, 0xc0 | (u >> 6)); - else - { - bputc(b, 0xe0 | (u >> 12)); - bputc(b, 0x80 | ((u >> 6) & 0x3f)); - } - bputc(b, 0x80 | (u & 0x3f)); - } + bput_utf8_slow(b, u); } #endif