From: Martin Mares Date: Wed, 11 Jun 2003 13:50:09 +0000 (+0000) Subject: Functions working with tagged characters moved from index.h to a new X-Git-Tag: holmes-import~1254 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=c47621ff64bab8f5f1e609a667ade266c13a254c;p=libucw.git Functions working with tagged characters moved from index.h to a new header file tagged-text.h. This also revealed a couple of unintentional indirect includes. --- diff --git a/lib/finger.c b/lib/finger.c index 585678a0..b2e0460a 100644 --- a/lib/finger.c +++ b/lib/finger.c @@ -26,6 +26,8 @@ #include "lib/index.h" #include "lib/md5.h" +#include + void fingerprint(byte *string, struct fingerprint *fp) { diff --git a/lib/index.h b/lib/index.h index 8ee0b51e..ccb4b360 100644 --- a/lib/index.h +++ b/lib/index.h @@ -7,9 +7,7 @@ #ifndef _SHERLOCK_INDEX_H #define _SHERLOCK_INDEX_H -#include "lib/fastbuf.h" #include SHERLOCK_CUSTOM -#include "charset/unistream.h" #define INDEX_VERSION (0x32240100+sizeof(struct card_attr)) /* Increase with each incompatible change in index format */ @@ -113,73 +111,6 @@ fp_hash(struct fingerprint *fp) byte *url_key(byte *url, byte *buf); void url_fingerprint(byte *url, struct fingerprint *fp); -/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */ - -#define GET_TAGGED_CHAR(p,u) do { \ - u = *p; \ - if (u >= 0xc0) \ - GET_UTF8_CHAR(p,u); \ - else if (u >= 0x80) \ - { \ - p++; \ - if (u >= 0xb0) \ - { \ - ASSERT(u == 0xb0); \ - u += 0x80020000; \ - } \ - else if (u >= 0xa0) \ - { \ - ASSERT(*p >= 0x80 && *p <= 0xbf); \ - u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f); \ - } \ - else \ - u += 0x80000000; \ - } \ - else \ - p++; \ -} while (0) - -#define SKIP_TAGGED_CHAR(p) do { \ - if (*p >= 0x80 && *p < 0xc0) \ - { \ - uns u = *p++; \ - if (u >= 0xa0 && u < 0xb0 && *p >= 0x80 && *p < 0xc0) \ - p++; \ - } \ - else \ - UTF8_SKIP(p); \ -} while (0) - -static inline uns -bget_tagged_char(struct fastbuf *f) -{ - uns u = bgetc(f); - if ((int)u < 0x80) - ; - else if (u < 0xc0) - { - if (u >= 0xb0) - { - ASSERT(u == 0xb0); - u += 0x80020000; - } - else if (u >= 0xa0) - { - uns v = bgetc(f); - ASSERT(v >= 0x80 && v <= 0xbf); - u = 0x80010000 + ((u & 0x0f) << 6) + (v & 0x3f); - } - else - u += 0x80000000; - } - else - { - bungetc(f); - u = bget_utf8(f); - } - return u; -} - /* Conversion of document age from seconds to our internal units */ static inline int diff --git a/lib/tagged-text.h b/lib/tagged-text.h new file mode 100644 index 00000000..97c07eeb --- /dev/null +++ b/lib/tagged-text.h @@ -0,0 +1,80 @@ +/* + * Sherlock: Processing of tagged characters + * + * (c) 2001--2003 Martin Mares + */ + +#ifndef _SHERLOCK_TAGGED_TEXT_H +#define _SHERLOCK_TAGGED_TEXT_H + +#include "lib/fastbuf.h" +#include "charset/unistream.h" + +/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */ + +#define GET_TAGGED_CHAR(p,u) do { \ + u = *p; \ + if (u >= 0xc0) \ + GET_UTF8_CHAR(p,u); \ + else if (u >= 0x80) \ + { \ + p++; \ + if (u >= 0xb0) \ + { \ + ASSERT(u == 0xb0); \ + u += 0x80020000; \ + } \ + else if (u >= 0xa0) \ + { \ + ASSERT(*p >= 0x80 && *p <= 0xbf); \ + u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f); \ + } \ + else \ + u += 0x80000000; \ + } \ + else \ + p++; \ +} while (0) + +#define SKIP_TAGGED_CHAR(p) do { \ + if (*p >= 0x80 && *p < 0xc0) \ + { \ + uns u = *p++; \ + if (u >= 0xa0 && u < 0xb0 && *p >= 0x80 && *p < 0xc0) \ + p++; \ + } \ + else \ + UTF8_SKIP(p); \ +} while (0) + +static inline uns +bget_tagged_char(struct fastbuf *f) +{ + uns u = bgetc(f); + if ((int)u < 0x80) + ; + else if (u < 0xc0) + { + if (u >= 0xb0) + { + ASSERT(u == 0xb0); + u += 0x80020000; + } + else if (u >= 0xa0) + { + uns v = bgetc(f); + ASSERT(v >= 0x80 && v <= 0xbf); + u = 0x80010000 + ((u & 0x0f) << 6) + (v & 0x3f); + } + else + u += 0x80000000; + } + else + { + bungetc(f); + u = bget_utf8(f); + } + return u; +} + +#endif