DIRS+=charset
UNI_OBJS=toupper.o tolower.o tocat.o utf8.o unaccent.o strlen.o debug.o \
- charconv.o setnames.o
+ charconv.o setnames.o unistream.o
obj/charset/libcharset.a: $(addprefix obj/charset/,$(UNI_OBJS))
--- /dev/null
+/*
+ * The UniCode Library: Reading and writing of UTF-8 on Fastbuf Streams
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "charset/unicode.h"
+#include "charset/unistream.h"
+
+int
+bget_utf8_slow(struct fastbuf *b)
+{
+ int c = bgetc(b);
+ int code;
+
+ if (c < 0x80) /* Includes EOF */
+ return c;
+ if (c < 0xc0) /* Incorrect combination */
+ return UNI_REPLACEMENT;
+ if (c >= 0xf0) /* Too large, skip it */
+ {
+ while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+ ;
+ goto wrong;
+ }
+ if (c >= 0xe0) /* 3 bytes */
+ {
+ code = c & 0x0f;
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ else /* 2 bytes */
+ {
+ code = c & 0x1f;
+ if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+ goto wrong;
+ code = (code << 6) | (c & 0x3f);
+ }
+ return code;
+
+ wrong:
+ if (c >= 0)
+ bungetc(b, c);
+ return UNI_REPLACEMENT;
+}
+
+void
+bput_utf8_slow(struct fastbuf *b, uns u)
+{
+ ASSERT(u < 65536);
+ if (u < 0x80)
+ bputc(b, u);
+ else
+ {
+ if (u < 0x800)
+ bputc(b, 0xc0 | (u >> 6));
+ else
+ {
+ bputc(b, 0xe0 | (u >> 12));
+ bputc(b, 0x80 | ((u >> 6) & 0x3f));
+ }
+ bputc(b, 0x80 | (u & 0x3f));
+ }
+}
#ifndef _UNISTREAM_H
#define _UNISTREAM_H
-/* FIXME: Do these need to be inline? */
+int bget_utf8_slow(struct fastbuf *b);
+void bput_utf8_slow(struct fastbuf *b, uns u);
static inline int
bget_utf8(struct fastbuf *b)
{
- int c = bgetc(b);
- int code;
+ uns u;
- if (c < 0x80) /* Includes EOF */
- return c;
- if (c < 0xc0) /* Incorrect combination */
- return UNI_REPLACEMENT;
- if (c >= 0xf0) /* Too large, skip it */
+ if (b->bptr + 5 <= b->bufend)
{
- while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
- ;
- goto wrong;
+ GET_UTF8(b->bptr, u);
+ return u;
}
- if (c >= 0xe0) /* 3 bytes */
- {
- code = c & 0x0f;
- if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
- goto wrong;
- code = (code << 6) | (c & 0x3f);
- if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
- goto wrong;
- code = (code << 6) | (c & 0x3f);
- }
- else /* 2 bytes */
- {
- code = c & 0x1f;
- if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
- goto wrong;
- code = (code << 6) | (c & 0x3f);
- }
- return code;
-
- wrong:
- if (c >= 0)
- bungetc(b, c);
- return UNI_REPLACEMENT;
+ else
+ return bget_utf8_slow(b);
}
static inline void
bput_utf8(struct fastbuf *b, uns u)
{
ASSERT(u < 65536);
- if (u < 0x80)
- bputc(b, u);
+ if (b->bptr + 5 <= b->bufend)
+ PUT_UTF8(b->bptr, u);
else
- {
- if (u < 0x800)
- bputc(b, 0xc0 | (u >> 6));
- else
- {
- bputc(b, 0xe0 | (u >> 12));
- bputc(b, 0x80 | ((u >> 6) & 0x3f));
- }
- bputc(b, 0x80 | (u & 0x3f));
- }
+ bput_utf8_slow(b, u);
}
#endif