X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fff-unicode.c;h=6057e2408dd0c3b9328079d2089dc3077fe97dcd;hb=d75e8af89c831ba37062d16a102871ef4f443457;hp=047fff84cf16bb57d05748d38b2f9d8c6e6dfe6a;hpb=318592e5f0f42161924da92b48cf85c6152ee6ab;p=libucw.git diff --git a/lib/ff-unicode.c b/lib/ff-unicode.c index 047fff84..6057e240 100644 --- a/lib/ff-unicode.c +++ b/lib/ff-unicode.c @@ -14,6 +14,8 @@ #include "lib/ff-unicode.h" #include "lib/ff-binary.h" +/*** UTF-8 ***/ + int bget_utf8_slow(struct fastbuf *b, uns repl) { @@ -167,6 +169,8 @@ bput_utf8_32_slow(struct fastbuf *b, uns u) } } +/*** UTF-16 ***/ + int bget_utf16_be_slow(struct fastbuf *b, uns repl) { @@ -196,3 +200,147 @@ bget_utf16_le_slow(struct fastbuf *b, uns repl) return repl; return 0x10000 + (x << 10) + y; } + +void +bput_utf16_be_slow(struct fastbuf *b, uns u) +{ + if (u < 0xd800 || (u < 0x10000 && u >= 0xe000)) + { + bputc(b, u >> 8); + bputc(b, u & 0xff); + } + else if ((u -= 0x10000) < 0x100000) + { + bputc(b, 0xd8 | (u >> 18)); + bputc(b, (u >> 10) & 0xff); + bputc(b, 0xdc | ((u >> 8) & 0x3)); + bputc(b, u & 0xff); + } + else + ASSERT(0); +} + +void +bput_utf16_le_slow(struct fastbuf *b, uns u) +{ + if (u < 0xd800 || (u < 0x10000 && u >= 0xe000)) + { + bputc(b, u & 0xff); + bputc(b, u >> 8); + } + else if ((u -= 0x10000) < 0x100000) + { + bputc(b, (u >> 10) & 0xff); + bputc(b, 0xd8 | (u >> 18)); + bputc(b, u & 0xff); + bputc(b, 0xdc | ((u >> 8) & 0x3)); + } + else + ASSERT(0); +} + +#ifdef TEST + +#include +#include + +int main(int argc, char **argv) +{ +#define FUNCS \ + F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \ + F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE) + + enum { +#define F(x) FUNC_##x, + FUNCS +#undef F + }; + char *names[] = { +#define F(x) [FUNC_##x] = #x, + FUNCS +#undef F + }; + + uns func = ~0U; + if (argc > 1) + for (uns i = 0; i < ARRAY_SIZE(names); i++) + if (!strcasecmp(names[i], argv[1])) + func = i; + if (!~func) + { + fprintf(stderr, "Invalid usage!\n"); + return 1; + } + + struct fastbuf *b = fbgrow_create(8); + if (func < FUNC_BPUT_UTF8) + { + uns u; + while (scanf("%x", &u) == 1) + bputc(b, u); + fbgrow_rewind(b); + while (bpeekc(b) >= 0) + { + if (btell(b)) + putchar(' '); + switch (func) + { + case FUNC_BGET_UTF8: + u = bget_utf8_slow(b, UNI_REPLACEMENT); + break; + case FUNC_BGET_UTF8_32: + u = bget_utf8_32_slow(b, UNI_REPLACEMENT); + break; + case FUNC_BGET_UTF16_BE: + u = bget_utf16_be_slow(b, UNI_REPLACEMENT); + break; + case FUNC_BGET_UTF16_LE: + u = bget_utf16_le_slow(b, UNI_REPLACEMENT); + break; + default: + ASSERT(0); + } + printf("%04x", u); + } + putchar('\n'); + } + else + { + uns u, i = 0; + while (scanf("%x", &u) == 1) + { + switch (func) + { + case FUNC_BPUT_UTF8: + bput_utf8_slow(b, u); + break; + case FUNC_BPUT_UTF8_32: + bput_utf8_32_slow(b, u); + break; + case FUNC_BPUT_UTF16_BE: + bput_utf16_be_slow(b, u); + break; + case FUNC_BPUT_UTF16_LE: + bput_utf16_le_slow(b, u); + break; + default: + ASSERT(0); + } + fbgrow_rewind(b); + u = 0; + while (bpeekc(b) >= 0) + { + if (i++) + putchar(' '); + printf("%02x", bgetc(b)); + } + fbgrow_reset(b); + } + putchar('\n'); + } + bclose(b); + + return 0; +} + +#endif