2 * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams
4 * (c) 2001--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
12 #include <ucw/fastbuf.h>
13 #include <ucw/unicode.h>
14 #include <ucw/ff-unicode.h>
15 #include <ucw/ff-binary.h>
20 bget_utf8_slow(struct fastbuf *b, uint repl)
25 if (c < 0x80) /* Includes EOF */
27 if (c < 0xc0) /* Incorrect combination */
29 if (c >= 0xf0) /* Too large, skip it */
31 while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
35 if (c >= 0xe0) /* 3 bytes */
38 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
40 code = (code << 6) | (c & 0x3f);
41 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
43 code = (code << 6) | (c & 0x3f);
48 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
50 code = (code << 6) | (c & 0x3f);
61 bget_utf8_32_slow(struct fastbuf *b, uint repl)
67 if (c < 0x80) /* Includes EOF */
69 if (c < 0xc0) /* Incorrect combination */
96 else /* Too large, skip it */
98 while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
104 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
106 code = (code << 6) | (c & 0x3f);
117 bput_utf8_slow(struct fastbuf *b, uint u)
125 bputc(b, 0xc0 | (u >> 6));
128 bputc(b, 0xe0 | (u >> 12));
129 bputc(b, 0x80 | ((u >> 6) & 0x3f));
131 bputc(b, 0x80 | (u & 0x3f));
136 bput_utf8_32_slow(struct fastbuf *b, uint u)
138 ASSERT(u < (1U<<31));
144 bputc(b, 0xc0 | (u >> 6));
148 bputc(b, 0xe0 | (u >> 12));
152 bputc(b, 0xf0 | (u >> 18));
156 bputc(b, 0xf8 | (u >> 24));
159 bputc(b, 0xfc | (u >> 30));
160 bputc(b, 0x80 | ((u >> 24) & 0x3f));
162 bputc(b, 0x80 | ((u >> 18) & 0x3f));
164 bputc(b, 0x80 | ((u >> 12) & 0x3f));
166 bputc(b, 0x80 | ((u >> 6) & 0x3f));
168 bputc(b, 0x80 | (u & 0x3f));
175 bget_utf16_be_slow(struct fastbuf *b, uint repl)
179 uint u = bgetw_be(b), x, y;
182 if ((x = u - 0xd800) >= 0x800)
184 if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400)
186 return 0x10000 + (x << 10) + y;
190 bget_utf16_le_slow(struct fastbuf *b, uint repl)
194 uint u = bgetw_le(b), x, y;
197 if ((x = u - 0xd800) >= 0x800)
199 if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400)
201 return 0x10000 + (x << 10) + y;
205 bput_utf16_be_slow(struct fastbuf *b, uint u)
207 if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
212 else if ((u -= 0x10000) < 0x100000)
214 bputc(b, 0xd8 | (u >> 18));
215 bputc(b, (u >> 10) & 0xff);
216 bputc(b, 0xdc | ((u >> 8) & 0x3));
224 bput_utf16_le_slow(struct fastbuf *b, uint u)
226 if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
231 else if ((u -= 0x10000) < 0x100000)
233 bputc(b, (u >> 10) & 0xff);
234 bputc(b, 0xd8 | (u >> 18));
236 bputc(b, 0xdc | ((u >> 8) & 0x3));
247 int main(int argc, char **argv)
250 F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
251 F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
254 #define F(x) FUNC_##x,
259 #define F(x) [FUNC_##x] = #x,
266 for (uint i = 0; i < ARRAY_SIZE(names); i++)
267 if (!strcasecmp(names[i], argv[1]))
271 fprintf(stderr, "Invalid usage!\n");
275 struct fastbuf *b = fbgrow_create(8);
276 if (func < FUNC_BPUT_UTF8)
279 while (scanf("%x", &u) == 1)
282 while (bpeekc(b) >= 0)
289 u = bget_utf8_slow(b, UNI_REPLACEMENT);
291 case FUNC_BGET_UTF8_32:
292 u = bget_utf8_32_slow(b, UNI_REPLACEMENT);
294 case FUNC_BGET_UTF16_BE:
295 u = bget_utf16_be_slow(b, UNI_REPLACEMENT);
297 case FUNC_BGET_UTF16_LE:
298 u = bget_utf16_le_slow(b, UNI_REPLACEMENT);
310 while (scanf("%x", &u) == 1)
315 bput_utf8_slow(b, u);
317 case FUNC_BPUT_UTF8_32:
318 bput_utf8_32_slow(b, u);
320 case FUNC_BPUT_UTF16_BE:
321 bput_utf16_be_slow(b, u);
323 case FUNC_BPUT_UTF16_LE:
324 bput_utf16_le_slow(b, u);
331 while (bpeekc(b) >= 0)
335 printf("%02x", bgetc(b));