From: Pavel Charvat Date: Mon, 10 Dec 2007 11:44:19 +0000 (+0100) Subject: UCW: Renamed 'UTF-8' tests to 'Unicode' (I am going to add some UTF-16 test cases). X-Git-Tag: holmes-import~486 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=b09f6b2c8551baf93ea0576eab4f00e721338369;p=libucw.git UCW: Renamed 'UTF-8' tests to 'Unicode' (I am going to add some UTF-16 test cases). --- diff --git a/lib/Makefile b/lib/Makefile index ed6a3a79..f579a343 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -19,7 +19,7 @@ LIBUCW_MODS= \ profile \ fastbuf ff-binary ff-string ff-printf ff-unicode \ fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \ - str_ctype str_upper str_lower unicode-utf8 stkstring \ + str_ctype str_upper str_lower unicode stkstring \ wildmatch wordsplit ctmatch patimatch patmatch regex \ prime primetable random timer randomkey \ bit-ffs bit-fls \ @@ -99,11 +99,11 @@ $(o)/lib/lizard-test: $(o)/lib/lizard-test.o $(LIBUCW) $(o)/lib/kmp-test: $(o)/lib/kmp-test.o $(LIBUCW) $(LIBCHARSET) $(o)/lib/ipaccess-test: $(o)/lib/ipaccess-test.o $(LIBUCW) -TESTS+=$(addprefix $(o)/lib/,regex.test unicode-utf8.test hash-test.test mempool.test stkstring.test \ +TESTS+=$(addprefix $(o)/lib/,regex.test unicode.test hash-test.test mempool.test stkstring.test \ slists.test kmp-test.test bbuf.test getopt.test fastbuf.test eltpool.test) $(o)/lib/regex.test: $(o)/lib/regex-t -$(o)/lib/unicode-utf8.test: $(o)/lib/unicode-utf8-t +$(o)/lib/unicode.test: $(o)/lib/unicode-t $(o)/lib/hash-test.test: $(o)/lib/hash-test $(o)/lib/mempool.test: $(o)/lib/mempool-t $(o)/lib/mempool-fmt-t $(o)/lib/mempool-str-t $(o)/lib/stkstring.test: $(o)/lib/stkstring-t diff --git a/lib/unicode-utf8.c b/lib/unicode-utf8.c deleted file mode 100644 index e4a33af8..00000000 --- a/lib/unicode-utf8.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * UCW Library -- UTF-8 Functions - * - * (c) 1997--2004 Martin Mares - * (c) 2003 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/unicode.h" - -uns -utf8_strlen(const byte *str) -{ - uns len = 0; - while (*str) - { - UTF8_SKIP(str); - len++; - } - return len; -} - -uns -utf8_strnlen(const byte *str, uns n) -{ - uns len = 0; - const byte *end = str + n; - while (str < end) - { - UTF8_SKIP(str); - len++; - } - return len; -} - -#ifdef TEST -#include -#include -int main(int argc, char **argv) -{ - byte buf[256]; - if (argc > 1 && !strncmp(argv[1], "get", 3)) - { - int f32 = !strcmp(argv[1], "get32"); - byte *p = buf; - uns u; - while (scanf("%x", &u) == 1) - *p++ = u; - *p = 0; - p = buf; - while (*p) - { - if (p != buf) - putchar(' '); - if (f32) - GET_UTF8_32(p, u); - else - GET_UTF8(p, u); - printf("%04x", u); - } - putchar('\n'); - } - else if (argc > 1 && !strncmp(argv[1], "put", 3)) - { - uns u, i=0; - int f32 = !strcmp(argv[1], "put32"); - while (scanf("%x", &u) == 1) - { - byte *p = buf; - if (f32) - PUT_UTF8_32(p, u); - else - PUT_UTF8(p, u); - *p = 0; - for (p=buf; *p; p++) - { - if (i++) - putchar(' '); - printf("%02x", *p); - } - } - putchar('\n'); - } - else - puts("?"); - return 0; -} -#endif diff --git a/lib/unicode-utf8.t b/lib/unicode-utf8.t deleted file mode 100644 index 132bbbd2..00000000 --- a/lib/unicode-utf8.t +++ /dev/null @@ -1,41 +0,0 @@ -# Tests for the Unicode UTF-8 module - -Name: put1 -Run: ../obj/lib/unicode-utf8-t put -In: 0041 0048 004f 004a -Out: 41 48 4f 4a - -Name: put2 -Run: ../obj/lib/unicode-utf8-t put -In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5 -Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5 - -Name: get1 -Run: ../obj/lib/unicode-utf8-t get -In: 41 48 4f 4a -Out: 0041 0048 004f 004a - -Name: get2 -Run: ../obj/lib/unicode-utf8-t get -In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5 -Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5 - -Name: get3 -Run: ../obj/lib/unicode-utf8-t get -In: 84 ff f9 f8 c2 aa 41 -Out: fffc fffc fffc fffc 00aa 0041 - -Name: put32 -Run: ../obj/lib/unicode-utf8-t put32 -In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a -Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a - -Name: get32a -Run: ../obj/lib/unicode-utf8-t get32 -In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a -Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a - -Name: get32b -Run: ../obj/lib/unicode-utf8-t get32 -In: fe 83 81 -Out: fffc fffc fffc diff --git a/lib/unicode.c b/lib/unicode.c new file mode 100644 index 00000000..e4a33af8 --- /dev/null +++ b/lib/unicode.c @@ -0,0 +1,91 @@ +/* + * UCW Library -- UTF-8 Functions + * + * (c) 1997--2004 Martin Mares + * (c) 2003 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/unicode.h" + +uns +utf8_strlen(const byte *str) +{ + uns len = 0; + while (*str) + { + UTF8_SKIP(str); + len++; + } + return len; +} + +uns +utf8_strnlen(const byte *str, uns n) +{ + uns len = 0; + const byte *end = str + n; + while (str < end) + { + UTF8_SKIP(str); + len++; + } + return len; +} + +#ifdef TEST +#include +#include +int main(int argc, char **argv) +{ + byte buf[256]; + if (argc > 1 && !strncmp(argv[1], "get", 3)) + { + int f32 = !strcmp(argv[1], "get32"); + byte *p = buf; + uns u; + while (scanf("%x", &u) == 1) + *p++ = u; + *p = 0; + p = buf; + while (*p) + { + if (p != buf) + putchar(' '); + if (f32) + GET_UTF8_32(p, u); + else + GET_UTF8(p, u); + printf("%04x", u); + } + putchar('\n'); + } + else if (argc > 1 && !strncmp(argv[1], "put", 3)) + { + uns u, i=0; + int f32 = !strcmp(argv[1], "put32"); + while (scanf("%x", &u) == 1) + { + byte *p = buf; + if (f32) + PUT_UTF8_32(p, u); + else + PUT_UTF8(p, u); + *p = 0; + for (p=buf; *p; p++) + { + if (i++) + putchar(' '); + printf("%02x", *p); + } + } + putchar('\n'); + } + else + puts("?"); + return 0; +} +#endif diff --git a/lib/unicode.t b/lib/unicode.t new file mode 100644 index 00000000..132bbbd2 --- /dev/null +++ b/lib/unicode.t @@ -0,0 +1,41 @@ +# Tests for the Unicode UTF-8 module + +Name: put1 +Run: ../obj/lib/unicode-utf8-t put +In: 0041 0048 004f 004a +Out: 41 48 4f 4a + +Name: put2 +Run: ../obj/lib/unicode-utf8-t put +In: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5 +Out: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5 + +Name: get1 +Run: ../obj/lib/unicode-utf8-t get +In: 41 48 4f 4a +Out: 0041 0048 004f 004a + +Name: get2 +Run: ../obj/lib/unicode-utf8-t get +In: c2 aa c6 aa ca a5 d6 a5 e0 a9 9a e1 96 a5 e2 a9 9a e5 a9 9a ea 96 a5 +Out: 00aa 01aa 02a5 05a5 0a5a 15a5 2a5a 5a5a a5a5 + +Name: get3 +Run: ../obj/lib/unicode-utf8-t get +In: 84 ff f9 f8 c2 aa 41 +Out: fffc fffc fffc fffc 00aa 0041 + +Name: put32 +Run: ../obj/lib/unicode-utf8-t put32 +In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a +Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a + +Name: get32a +Run: ../obj/lib/unicode-utf8-t get32 +In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f5 9a 96 a5 f8 8a a5 a9 9a f8 96 a5 a9 9a f8 a9 9a 96 a5 f9 96 a5 a9 9a fa a9 9a 96 a5 fc 85 a9 9a 96 a5 fc 8a 96 a5 a9 9a fc 95 a9 9a 96 a5 fc aa 96 a5 a9 9a fd 9a 96 a5 a9 9a +Out: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a + +Name: get32b +Run: ../obj/lib/unicode-utf8-t get32 +In: fe 83 81 +Out: fffc fffc fffc