From: Martin Mares Date: Sun, 21 Jan 2001 17:59:58 +0000 (+0000) Subject: Added functions for reading/writing UTF-8 characters on fastbuf streams. X-Git-Tag: holmes-import~1577 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=dd79339e61140f94f3602e8bbdee78e838f9090c;p=libucw.git Added functions for reading/writing UTF-8 characters on fastbuf streams. --- diff --git a/charset/unistream.h b/charset/unistream.h new file mode 100644 index 00000000..5483d809 --- /dev/null +++ b/charset/unistream.h @@ -0,0 +1,72 @@ +/* + * The UniCode Library: Reading and writing of UTF-8 on Fastbuf Streams + * + * (c) 2001 Martin Mares + */ + +#ifndef _UNISTREAM_H +#define _UNISTREAM_H + +/* FIXME: Do these need to be inline? */ + +static inline int +bget_utf8(struct fastbuf *b) +{ + int c = bgetc(b); + int code; + + if (c < 0x80) /* Includes EOF */ + return c; + if (c < 0xc0) /* Incorrect combination */ + return UNI_REPLACEMENT; + if (c >= 0xf0) /* Too large, skip it */ + { + while ((c = bgetc(b)) >= 0x80 && c < 0xc0) + ; + goto wrong; + } + if (c >= 0xe0) /* 3 bytes */ + { + code = c & 0x0f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + else /* 2 bytes */ + { + code = c & 0x1f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + return code; + + wrong: + if (c >= 0) + bungetc(b, c); + return UNI_REPLACEMENT; +} + +static inline void +bput_utf8(struct fastbuf *b, uns u) +{ + ASSERT(u < 65536); + if (u < 0x80) + bputc(b, u); + else + { + if (u < 0x800) + bputc(b, 0xc0 | (u >> 6)); + else + { + bputc(b, 0xe0 | (u >> 12)); + bputc(b, 0x80 | ((u >> 6) & 0x3f)); + } + bputc(b, 0x80 | (u & 0x3f)); + } +} + +#endif