From dc0fa983ac2914b85f0e97771dfea836dfbcd0a2 Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Mon, 10 Dec 2007 13:50:09 +0100 Subject: [PATCH] UCW: Added UTF-16 test cases. --- lib/unicode.c | 98 +++++++++++++++++++++++++++++++++++++++------------ lib/unicode.t | 64 ++++++++++++++++++++++++--------- 2 files changed, 122 insertions(+), 40 deletions(-) diff --git a/lib/unicode.c b/lib/unicode.c index e4a33af8..c5ffb826 100644 --- a/lib/unicode.c +++ b/lib/unicode.c @@ -37,55 +37,107 @@ utf8_strnlen(const byte *str, uns n) } #ifdef TEST + #include #include + int main(int argc, char **argv) { byte buf[256]; - if (argc > 1 && !strncmp(argv[1], "get", 3)) + +#define FUNCS \ + F(UTF8_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \ + F(UTF8_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT) + + enum { +#define F(x) FUNC_##x, + FUNCS +#undef F + }; + char *names[] = { +#define F(x) [FUNC_##x] = #x, + FUNCS +#undef F + }; + + uns func = ~0U; + if (argc > 1) + for (uns i = 0; i < ARRAY_SIZE(names); i++) + if (!strcasecmp(names[i], argv[1])) + func = i; + if (!~func) + { + fprintf(stderr, "Invalid usage!\n"); + return 1; + } + + if (func < FUNC_UTF8_PUT) { - int f32 = !strcmp(argv[1], "get32"); - byte *p = buf; + byte *p = buf, *q = buf, *last; uns u; + bzero(buf, sizeof(buf)); while (scanf("%x", &u) == 1) - *p++ = u; - *p = 0; - p = buf; - while (*p) + *q++ = u; + while (p < q) { + last = p; if (p != buf) putchar(' '); - if (f32) - GET_UTF8_32(p, u); - else - GET_UTF8(p, u); + switch (func) + { + case FUNC_UTF8_GET: + p = utf8_get(p, &u); + break; + case FUNC_UTF8_32_GET: + p = utf8_32_get(p, &u); + break; + case FUNC_UTF16_BE_GET: + p = utf16_be_get(p, &u); + break; + case FUNC_UTF16_LE_GET: + p = utf16_le_get(p, &u); + break; + default: + ASSERT(0); + } printf("%04x", u); + ASSERT(last < p && p <= q); } putchar('\n'); } - else if (argc > 1 && !strncmp(argv[1], "put", 3)) + else { uns u, i=0; - int f32 = !strcmp(argv[1], "put32"); while (scanf("%x", &u) == 1) { - byte *p = buf; - if (f32) - PUT_UTF8_32(p, u); - else - PUT_UTF8(p, u); - *p = 0; - for (p=buf; *p; p++) + byte *p = buf, *q = buf; + switch (func) + { + case FUNC_UTF8_PUT: + p = utf8_put(p, u); + break; + case FUNC_UTF8_32_PUT: + p = utf8_32_put(p, u); + break; + case FUNC_UTF16_BE_PUT: + p = utf16_be_put(p, u); + break; + case FUNC_UTF16_LE_PUT: + p = utf16_le_put(p, u); + break; + default: + ASSERT(0); + } + while (q < p) { if (i++) putchar(' '); - printf("%02x", *p); + printf("%02x", *q++); } } putchar('\n'); } - else - puts("?"); return 0; } + #endif diff --git a/lib/unicode.t b/lib/unicode.t index 132bbbd2..1b5549ef 100644 --- a/lib/unicode.t +++ b/lib/unicode.t @@ -1,41 +1,71 @@ -# Tests for the Unicode UTF-8 module +# Tests for the Unicode module -Name: put1 -Run: ../obj/lib/unicode-utf8-t put +Name: utf8_put (1) +Run: ../obj/lib/unicode-t utf8_put In: 0041 0048 004f 004a Out: 41 48 4f 4a -Name: put2 -Run: ../obj/lib/unicode-utf8-t put +Name: utf8_put (2) +Run: ../obj/lib/unicode-t utf8_put In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5 Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5 -Name: get1 -Run: ../obj/lib/unicode-utf8-t get +Name: utf8_get (1) +Run: ../obj/lib/unicode-t utf8_get In: 41 48 4f 4a Out: 0041 0048 004f 004a -Name: get2 -Run: ../obj/lib/unicode-utf8-t get +Name: utf8_get (2) +Run: ../obj/lib/unicode-t utf8_get In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5 Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5 -Name: get3 -Run: ../obj/lib/unicode-utf8-t get +Name: utf8_get (3) +Run: ../obj/lib/unicode-t utf8_get In: 84 ff f9 f8 c2 aa 41 Out: fffc fffc fffc fffc 00aa 0041 -Name: put32 -Run: ../obj/lib/unicode-utf8-t put32 +Name: utf8_32_put +Run: ../obj/lib/unicode-t utf8_32_put In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a -Name: get32a -Run: ../obj/lib/unicode-utf8-t get32 +Name: utf8_32_get (1) +Run: ../obj/lib/unicode-t utf8_32_get In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a -Name: get32b -Run: ../obj/lib/unicode-utf8-t get32 +Name: utf8_32_get (2) +Run: ../obj/lib/unicode-t utf8_32_get In: fe 83 81 Out: fffc fffc fffc + +Name: utf16_be_put +Run: ../obj/lib/unicode-t utf16_be_put +In: 0041 004a 2a5f feff 0000 10ffff ffff 10000 +Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00 + +Name: utf16_le_put +Run: ../obj/lib/unicode-t utf16_le_put +In: 0041 004a 2a5f feff 0000 10ffff ffff 10000 +Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc + +Name: utf16_be_get (1) +Run: ../obj/lib/unicode-t utf16_be_get +In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00 +Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000 + +Name: utf16_be_get (2) +Run: ../obj/lib/unicode-t utf16_be_get +In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01 +Out: fffc 2a5f fffc fffc 2a5f fffc + +Name: utf16_le_get (1) +Run: ../obj/lib/unicode-t utf16_le_get +In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc +Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000 + +Name: utf16_le_get (2) +Run: ../obj/lib/unicode-t utf16_le_get +In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8 +Out: fffc 2a5f fffc fffc 2a5f fffc -- 2.39.2