}
#ifdef TEST
+
#include <string.h>
#include <stdio.h>
+
int main(int argc, char **argv)
{
byte buf[256];
- if (argc > 1 && !strncmp(argv[1], "get", 3))
+
+#define FUNCS \
+ F(UTF8_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \
+ F(UTF8_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT)
+
+ enum {
+#define F(x) FUNC_##x,
+ FUNCS
+#undef F
+ };
+ char *names[] = {
+#define F(x) [FUNC_##x] = #x,
+ FUNCS
+#undef F
+ };
+
+ uns func = ~0U;
+ if (argc > 1)
+ for (uns i = 0; i < ARRAY_SIZE(names); i++)
+ if (!strcasecmp(names[i], argv[1]))
+ func = i;
+ if (!~func)
+ {
+ fprintf(stderr, "Invalid usage!\n");
+ return 1;
+ }
+
+ if (func < FUNC_UTF8_PUT)
{
- int f32 = !strcmp(argv[1], "get32");
- byte *p = buf;
+ byte *p = buf, *q = buf, *last;
uns u;
+ bzero(buf, sizeof(buf));
while (scanf("%x", &u) == 1)
- *p++ = u;
- *p = 0;
- p = buf;
- while (*p)
+ *q++ = u;
+ while (p < q)
{
+ last = p;
if (p != buf)
putchar(' ');
- if (f32)
- GET_UTF8_32(p, u);
- else
- GET_UTF8(p, u);
+ switch (func)
+ {
+ case FUNC_UTF8_GET:
+ p = utf8_get(p, &u);
+ break;
+ case FUNC_UTF8_32_GET:
+ p = utf8_32_get(p, &u);
+ break;
+ case FUNC_UTF16_BE_GET:
+ p = utf16_be_get(p, &u);
+ break;
+ case FUNC_UTF16_LE_GET:
+ p = utf16_le_get(p, &u);
+ break;
+ default:
+ ASSERT(0);
+ }
printf("%04x", u);
+ ASSERT(last < p && p <= q);
}
putchar('\n');
}
- else if (argc > 1 && !strncmp(argv[1], "put", 3))
+ else
{
uns u, i=0;
- int f32 = !strcmp(argv[1], "put32");
while (scanf("%x", &u) == 1)
{
- byte *p = buf;
- if (f32)
- PUT_UTF8_32(p, u);
- else
- PUT_UTF8(p, u);
- *p = 0;
- for (p=buf; *p; p++)
+ byte *p = buf, *q = buf;
+ switch (func)
+ {
+ case FUNC_UTF8_PUT:
+ p = utf8_put(p, u);
+ break;
+ case FUNC_UTF8_32_PUT:
+ p = utf8_32_put(p, u);
+ break;
+ case FUNC_UTF16_BE_PUT:
+ p = utf16_be_put(p, u);
+ break;
+ case FUNC_UTF16_LE_PUT:
+ p = utf16_le_put(p, u);
+ break;
+ default:
+ ASSERT(0);
+ }
+ while (q < p)
{
if (i++)
putchar(' ');
- printf("%02x", *p);
+ printf("%02x", *q++);
}
}
putchar('\n');
}
- else
- puts("?");
return 0;
}
+
#endif
-# Tests for the Unicode UTF-8 module
+# Tests for the Unicode module
-Name: put1
-Run: ../obj/lib/unicode-utf8-t put
+Name: utf8_put (1)
+Run: ../obj/lib/unicode-t utf8_put
In: 0041 0048 004f 004a
Out: 41 48 4f 4a
-Name: put2
-Run: ../obj/lib/unicode-utf8-t put
+Name: utf8_put (2)
+Run: ../obj/lib/unicode-t utf8_put
In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
-Name: get1
-Run: ../obj/lib/unicode-utf8-t get
+Name: utf8_get (1)
+Run: ../obj/lib/unicode-t utf8_get
In: 41 48 4f 4a
Out: 0041 0048 004f 004a
-Name: get2
-Run: ../obj/lib/unicode-utf8-t get
+Name: utf8_get (2)
+Run: ../obj/lib/unicode-t utf8_get
In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5
Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5
-Name: get3
-Run: ../obj/lib/unicode-utf8-t get
+Name: utf8_get (3)
+Run: ../obj/lib/unicode-t utf8_get
In: 84 ff f9 f8 c2 aa 41
Out: fffc fffc fffc fffc 00aa 0041
-Name: put32
-Run: ../obj/lib/unicode-utf8-t put32
+Name: utf8_32_put
+Run: ../obj/lib/unicode-t utf8_32_put
In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
-Name: get32a
-Run: ../obj/lib/unicode-utf8-t get32
+Name: utf8_32_get (1)
+Run: ../obj/lib/unicode-t utf8_32_get
In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a
Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
-Name: get32b
-Run: ../obj/lib/unicode-utf8-t get32
+Name: utf8_32_get (2)
+Run: ../obj/lib/unicode-t utf8_32_get
In: fe 83 81
Out: fffc fffc fffc
+
+Name: utf16_be_put
+Run: ../obj/lib/unicode-t utf16_be_put
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+
+Name: utf16_le_put
+Run: ../obj/lib/unicode-t utf16_le_put
+In: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+Out: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+
+Name: utf16_be_get (1)
+Run: ../obj/lib/unicode-t utf16_be_get
+In: 00 41 00 4a 2a 5f fe ff 00 00 db ff df ff ff ff d8 00 dc 00
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: utf16_be_get (2)
+Run: ../obj/lib/unicode-t utf16_be_get
+In: dc 1a 2a 5f d8 01 d8 01 2a 5f d8 01
+Out: fffc 2a5f fffc fffc 2a5f fffc
+
+Name: utf16_le_get (1)
+Run: ../obj/lib/unicode-t utf16_le_get
+In: 41 00 4a 00 5f 2a ff fe 00 00 ff db ff df ff ff 00 d8 00 dc
+Out: 0041 004a 2a5f feff 0000 10ffff ffff 10000
+
+Name: utf16_le_get (2)
+Run: ../obj/lib/unicode-t utf16_le_get
+In: 1a dc 5f 2a 01 d8 01 d8 5f 2a 01 d8
+Out: fffc 2a5f fffc fffc 2a5f fffc