From: Pavel Charvat Date: Mon, 10 Dec 2007 10:12:49 +0000 (+0100) Subject: Renamed lib/ff-utf8.* to lib/ff-unicode.* X-Git-Tag: holmes-import~494 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=b18ffb4f5ef9c5d83d7a3e28c37fabbb7df44be2;p=libucw.git Renamed lib/ff-utf8.* to lib/ff-unicode.* --- diff --git a/lib/Makefile b/lib/Makefile index 52751e11..ed6a3a79 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -17,7 +17,7 @@ LIBUCW_MODS= \ conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section \ ipaccess \ profile \ - fastbuf ff-binary ff-string ff-printf ff-utf8 \ + fastbuf ff-binary ff-string ff-printf ff-unicode \ fb-file carefulio fb-mem fb-temp fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param \ str_ctype str_upper str_lower unicode-utf8 stkstring \ wildmatch wordsplit ctmatch patimatch patmatch regex \ @@ -49,7 +49,7 @@ LIBUCW_INCLUDES= \ bitops.h \ conf.h getopt.h ipaccess.h \ profile.h \ - fastbuf.h lfs.h ff-utf8.h ff-binary.h \ + fastbuf.h lfs.h ff-unicode.h ff-binary.h \ chartype.h unicode.h stkstring.h \ wildmatch.h patmatch.h \ db.h \ diff --git a/lib/ff-unicode.c b/lib/ff-unicode.c new file mode 100644 index 00000000..7f46aaab --- /dev/null +++ b/lib/ff-unicode.c @@ -0,0 +1,167 @@ +/* + * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams + * + * (c) 2001--2004 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "lib/lib.h" +#include "lib/fastbuf.h" +#include "lib/unicode.h" +#include "lib/ff-unicode.h" + +int +bget_utf8_slow(struct fastbuf *b) +{ + int c = bgetc(b); + int code; + + if (c < 0x80) /* Includes EOF */ + return c; + if (c < 0xc0) /* Incorrect combination */ + return UNI_REPLACEMENT; + if (c >= 0xf0) /* Too large, skip it */ + { + while ((c = bgetc(b)) >= 0x80 && c < 0xc0) + ; + goto wrong; + } + if (c >= 0xe0) /* 3 bytes */ + { + code = c & 0x0f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + else /* 2 bytes */ + { + code = c & 0x1f; + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + return code; + + wrong: + if (c >= 0) + bungetc(b); + return UNI_REPLACEMENT; +} + +int +bget_utf8_32_slow(struct fastbuf *b) +{ + int c = bgetc(b); + int code; + int nr; + + if (c < 0x80) /* Includes EOF */ + return c; + if (c < 0xc0) /* Incorrect combination */ + return UNI_REPLACEMENT; + if (c < 0xe0) + { + code = c & 0x1f; + nr = 1; + } + else if (c < 0xf0) + { + code = c & 0x0f; + nr = 2; + } + else if (c < 0xf8) + { + code = c & 0x07; + nr = 3; + } + else if (c < 0xfc) + { + code = c & 0x03; + nr = 4; + } + else if (c < 0xfe) + { + code = c & 0x01; + nr = 5; + } + else /* Too large, skip it */ + { + while ((c = bgetc(b)) >= 0x80 && c < 0xc0) + ; + goto wrong; + } + while (nr-- > 0) + { + if ((c = bgetc(b)) < 0x80 || c >= 0xc0) + goto wrong; + code = (code << 6) | (c & 0x3f); + } + return code; + + wrong: + if (c >= 0) + bungetc(b); + return UNI_REPLACEMENT; +} + +void +bput_utf8_slow(struct fastbuf *b, uns u) +{ + ASSERT(u < 65536); + if (u < 0x80) + bputc(b, u); + else + { + if (u < 0x800) + bputc(b, 0xc0 | (u >> 6)); + else + { + bputc(b, 0xe0 | (u >> 12)); + bputc(b, 0x80 | ((u >> 6) & 0x3f)); + } + bputc(b, 0x80 | (u & 0x3f)); + } +} + +void +bput_utf8_32_slow(struct fastbuf *b, uns u) +{ + ASSERT(u < (1U<<31)); + if (u < 0x80) + bputc(b, u); + else + { + if (u < 0x800) + bputc(b, 0xc0 | (u >> 6)); + else + { + if (u < (1<<16)) + bputc(b, 0xe0 | (u >> 12)); + else + { + if (u < (1<<21)) + bputc(b, 0xf0 | (u >> 18)); + else + { + if (u < (1<<26)) + bputc(b, 0xf8 | (u >> 24)); + else + { + bputc(b, 0xfc | (u >> 30)); + bputc(b, 0x80 | ((u >> 24) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 18) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 12) & 0x3f)); + } + bputc(b, 0x80 | ((u >> 6) & 0x3f)); + } + bputc(b, 0x80 | (u & 0x3f)); + } +} diff --git a/lib/ff-unicode.h b/lib/ff-unicode.h new file mode 100644 index 00000000..31510ff4 --- /dev/null +++ b/lib/ff-unicode.h @@ -0,0 +1,70 @@ +/* + * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams + * + * (c) 2001--2004 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_FF_UTF8_H +#define _UCW_FF_UTF8_H + +#include "lib/fastbuf.h" +#include "lib/unicode.h" + +int bget_utf8_slow(struct fastbuf *b); +int bget_utf8_32_slow(struct fastbuf *b); +void bput_utf8_slow(struct fastbuf *b, uns u); +void bput_utf8_32_slow(struct fastbuf *b, uns u); + +static inline int +bget_utf8(struct fastbuf *b) +{ + uns u; + + if (bavailr(b) >= 3) + { + GET_UTF8(b->bptr, u); + return u; + } + else + return bget_utf8_slow(b); +} + +static inline void +bput_utf8(struct fastbuf *b, uns u) +{ + ASSERT(u < 65536); + if (bavailw(b) >= 3) + PUT_UTF8(b->bptr, u); + else + bput_utf8_slow(b, u); +} + +static inline int +bget_utf8_32(struct fastbuf *b) +{ + uns u; + + if (bavailr(b) >= 6) + { + GET_UTF8_32(b->bptr, u); + return u; + } + else + return bget_utf8_32_slow(b); +} + +static inline void +bput_utf8_32(struct fastbuf *b, uns u) +{ + ASSERT(u < (1U<<31)); + if (bavailw(b) >= 6) + PUT_UTF8_32(b->bptr, u); + else + bput_utf8_32_slow(b, u); +} + +#endif diff --git a/lib/ff-utf8.c b/lib/ff-utf8.c deleted file mode 100644 index a7e40d3c..00000000 --- a/lib/ff-utf8.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams - * - * (c) 2001--2004 Martin Mares - * (c) 2004 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#include "lib/lib.h" -#include "lib/fastbuf.h" -#include "lib/unicode.h" -#include "lib/ff-utf8.h" - -int -bget_utf8_slow(struct fastbuf *b) -{ - int c = bgetc(b); - int code; - - if (c < 0x80) /* Includes EOF */ - return c; - if (c < 0xc0) /* Incorrect combination */ - return UNI_REPLACEMENT; - if (c >= 0xf0) /* Too large, skip it */ - { - while ((c = bgetc(b)) >= 0x80 && c < 0xc0) - ; - goto wrong; - } - if (c >= 0xe0) /* 3 bytes */ - { - code = c & 0x0f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - else /* 2 bytes */ - { - code = c & 0x1f; - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - return code; - - wrong: - if (c >= 0) - bungetc(b); - return UNI_REPLACEMENT; -} - -int -bget_utf8_32_slow(struct fastbuf *b) -{ - int c = bgetc(b); - int code; - int nr; - - if (c < 0x80) /* Includes EOF */ - return c; - if (c < 0xc0) /* Incorrect combination */ - return UNI_REPLACEMENT; - if (c < 0xe0) - { - code = c & 0x1f; - nr = 1; - } - else if (c < 0xf0) - { - code = c & 0x0f; - nr = 2; - } - else if (c < 0xf8) - { - code = c & 0x07; - nr = 3; - } - else if (c < 0xfc) - { - code = c & 0x03; - nr = 4; - } - else if (c < 0xfe) - { - code = c & 0x01; - nr = 5; - } - else /* Too large, skip it */ - { - while ((c = bgetc(b)) >= 0x80 && c < 0xc0) - ; - goto wrong; - } - while (nr-- > 0) - { - if ((c = bgetc(b)) < 0x80 || c >= 0xc0) - goto wrong; - code = (code << 6) | (c & 0x3f); - } - return code; - - wrong: - if (c >= 0) - bungetc(b); - return UNI_REPLACEMENT; -} - -void -bput_utf8_slow(struct fastbuf *b, uns u) -{ - ASSERT(u < 65536); - if (u < 0x80) - bputc(b, u); - else - { - if (u < 0x800) - bputc(b, 0xc0 | (u >> 6)); - else - { - bputc(b, 0xe0 | (u >> 12)); - bputc(b, 0x80 | ((u >> 6) & 0x3f)); - } - bputc(b, 0x80 | (u & 0x3f)); - } -} - -void -bput_utf8_32_slow(struct fastbuf *b, uns u) -{ - ASSERT(u < (1U<<31)); - if (u < 0x80) - bputc(b, u); - else - { - if (u < 0x800) - bputc(b, 0xc0 | (u >> 6)); - else - { - if (u < (1<<16)) - bputc(b, 0xe0 | (u >> 12)); - else - { - if (u < (1<<21)) - bputc(b, 0xf0 | (u >> 18)); - else - { - if (u < (1<<26)) - bputc(b, 0xf8 | (u >> 24)); - else - { - bputc(b, 0xfc | (u >> 30)); - bputc(b, 0x80 | ((u >> 24) & 0x3f)); - } - bputc(b, 0x80 | ((u >> 18) & 0x3f)); - } - bputc(b, 0x80 | ((u >> 12) & 0x3f)); - } - bputc(b, 0x80 | ((u >> 6) & 0x3f)); - } - bputc(b, 0x80 | (u & 0x3f)); - } -} diff --git a/lib/ff-utf8.h b/lib/ff-utf8.h deleted file mode 100644 index 31510ff4..00000000 --- a/lib/ff-utf8.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * UCW Library: Reading and writing of UTF-8 on Fastbuf Streams - * - * (c) 2001--2004 Martin Mares - * (c) 2004 Robert Spalek - * - * This software may be freely distributed and used according to the terms - * of the GNU Lesser General Public License. - */ - -#ifndef _UCW_FF_UTF8_H -#define _UCW_FF_UTF8_H - -#include "lib/fastbuf.h" -#include "lib/unicode.h" - -int bget_utf8_slow(struct fastbuf *b); -int bget_utf8_32_slow(struct fastbuf *b); -void bput_utf8_slow(struct fastbuf *b, uns u); -void bput_utf8_32_slow(struct fastbuf *b, uns u); - -static inline int -bget_utf8(struct fastbuf *b) -{ - uns u; - - if (bavailr(b) >= 3) - { - GET_UTF8(b->bptr, u); - return u; - } - else - return bget_utf8_slow(b); -} - -static inline void -bput_utf8(struct fastbuf *b, uns u) -{ - ASSERT(u < 65536); - if (bavailw(b) >= 3) - PUT_UTF8(b->bptr, u); - else - bput_utf8_slow(b, u); -} - -static inline int -bget_utf8_32(struct fastbuf *b) -{ - uns u; - - if (bavailr(b) >= 6) - { - GET_UTF8_32(b->bptr, u); - return u; - } - else - return bget_utf8_32_slow(b); -} - -static inline void -bput_utf8_32(struct fastbuf *b, uns u) -{ - ASSERT(u < (1U<<31)); - if (bavailw(b) >= 6) - PUT_UTF8_32(b->bptr, u); - else - bput_utf8_32_slow(b, u); -} - -#endif diff --git a/sherlock/xml/xml.c b/sherlock/xml/xml.c index 27ff8249..c5d5c5af 100644 --- a/sherlock/xml/xml.c +++ b/sherlock/xml/xml.c @@ -17,7 +17,7 @@ #include "lib/lib.h" #include "lib/mempool.h" #include "lib/fastbuf.h" -#include "lib/ff-utf8.h" +#include "lib/ff-unicode.h" #include "lib/ff-binary.h" #include "lib/chartype.h" #include "lib/unicode.h"