2 * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams
4 * (c) 2001--2015 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
12 #include <ucw/fastbuf.h>
13 #include <ucw/unicode.h>
14 #include <ucw/ff-unicode.h>
15 #include <ucw/ff-binary.h>
20 bget_utf8_slow(struct fastbuf *b, uint repl)
25 if (c < 0x80) /* Includes EOF */
27 if (c < 0xc0) /* Incorrect combination */
29 if (c >= 0xf0) /* Too large, skip it */
31 while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
35 if (c >= 0xe0) /* 3 bytes */
38 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
40 code = (code << 6) | (c & 0x3f);
41 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
43 code = (code << 6) | (c & 0x3f);
50 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
52 code = (code << 6) | (c & 0x3f);
66 bget_utf8_32_slow(struct fastbuf *b, uint repl)
73 if (c < 0x80) /* Includes EOF */
75 if (c < 0xc0) /* Incorrect combination */
111 if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
113 code = (code << 6) | (c & 0x3f);
127 bput_utf8_slow(struct fastbuf *b, uint u)
135 bputc(b, 0xc0 | (u >> 6));
138 bputc(b, 0xe0 | (u >> 12));
139 bputc(b, 0x80 | ((u >> 6) & 0x3f));
141 bputc(b, 0x80 | (u & 0x3f));
146 bput_utf8_32_slow(struct fastbuf *b, uint u)
148 ASSERT(u < (1U<<31));
154 bputc(b, 0xc0 | (u >> 6));
158 bputc(b, 0xe0 | (u >> 12));
162 bputc(b, 0xf0 | (u >> 18));
166 bputc(b, 0xf8 | (u >> 24));
169 bputc(b, 0xfc | (u >> 30));
170 bputc(b, 0x80 | ((u >> 24) & 0x3f));
172 bputc(b, 0x80 | ((u >> 18) & 0x3f));
174 bputc(b, 0x80 | ((u >> 12) & 0x3f));
176 bputc(b, 0x80 | ((u >> 6) & 0x3f));
178 bputc(b, 0x80 | (u & 0x3f));
185 bget_utf16_be_slow(struct fastbuf *b, uint repl)
189 uint u = bgetw_be(b), x, y;
192 if ((x = u - 0xd800) >= 0x800)
194 if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_be(b) - 0xdc00) >= 0x400)
196 return 0x10000 + (x << 10) + y;
200 bget_utf16_le_slow(struct fastbuf *b, uint repl)
204 uint u = bgetw_le(b), x, y;
207 if ((x = u - 0xd800) >= 0x800)
209 if (x >= 0x400 || bpeekc(b) < 0 || (y = bgetw_le(b) - 0xdc00) >= 0x400)
211 return 0x10000 + (x << 10) + y;
215 bput_utf16_be_slow(struct fastbuf *b, uint u)
217 if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
222 else if ((u -= 0x10000) < 0x100000)
224 bputc(b, 0xd8 | (u >> 18));
225 bputc(b, (u >> 10) & 0xff);
226 bputc(b, 0xdc | ((u >> 8) & 0x3));
234 bput_utf16_le_slow(struct fastbuf *b, uint u)
236 if (u < 0xd800 || (u < 0x10000 && u >= 0xe000))
241 else if ((u -= 0x10000) < 0x100000)
243 bputc(b, (u >> 10) & 0xff);
244 bputc(b, 0xd8 | (u >> 18));
246 bputc(b, 0xdc | ((u >> 8) & 0x3));
257 int main(int argc, char **argv)
260 F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
261 F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
264 #define F(x) FUNC_##x,
269 #define F(x) [FUNC_##x] = #x,
276 for (uint i = 0; i < ARRAY_SIZE(names); i++)
277 if (!strcasecmp(names[i], argv[1]))
281 fprintf(stderr, "Invalid usage!\n");
285 struct fastbuf *b = fbgrow_create(8);
286 if (func < FUNC_BPUT_UTF8)
289 while (scanf("%x", &u) == 1)
292 while (bpeekc(b) >= 0)
299 u = bget_utf8_slow(b, UNI_REPLACEMENT);
301 case FUNC_BGET_UTF8_32:
302 u = bget_utf8_32_slow(b, UNI_REPLACEMENT);
304 case FUNC_BGET_UTF16_BE:
305 u = bget_utf16_be_slow(b, UNI_REPLACEMENT);
307 case FUNC_BGET_UTF16_LE:
308 u = bget_utf16_le_slow(b, UNI_REPLACEMENT);
320 while (scanf("%x", &u) == 1)
325 bput_utf8_slow(b, u);
327 case FUNC_BPUT_UTF8_32:
328 bput_utf8_32_slow(b, u);
330 case FUNC_BPUT_UTF16_BE:
331 bput_utf16_be_slow(b, u);
333 case FUNC_BPUT_UTF16_LE:
334 bput_utf16_le_slow(b, u);
341 while (bpeekc(b) >= 0)
345 printf("%02x", bgetc(b));