]> mj.ucw.cz Git - libucw.git/blobdiff - lib/index.h
Added indexer names for word and string type classes.
[libucw.git] / lib / index.h
index 812dbeca47ba964a12d39c38c905edc18b3b0aa9..97ce26a80f562b68d98f22a30657bb6314b600c7 100644 (file)
@@ -25,6 +25,19 @@ enum word_type {
   WT_ALT                               /* Alternate texts for graphical elements */
 };
 
+#define WORD_TYPE_NAMES                                \
+       T(WORD, ~0)                             \
+       T(TEXT, 1 << WT_TEXT)                   \
+       T(EMPH, 1 << WT_EMPH)                   \
+       T(SMALL, 1 << WT_SMALL)                 \
+       T(TITLE, 1 << WT_TITLE)                 \
+       T(HDR, (1 << WT_SMALL_HEADING) | (1 << WT_BIG_HEADING))  \
+       T(HDR1, 1 << WT_SMALL_HEADING)          \
+       T(HDR2, 1 << WT_BIG_HEADING)            \
+       T(KEYWD, 1 << WT_KEYWORD)               \
+       T(META, 1 << WT_META)                   \
+       T(ALT, 1 << WT_ALT)
+
 /* String types */
 
 enum string_type {
@@ -32,9 +45,20 @@ enum string_type {
   ST_URL,                              /* URL of the document */
   ST_HOST,                             /* Host name */
   ST_DOMAIN,                           /* Domain name */
-  ST_REF                               /* URL reference */
+  ST_REF,                              /* URL reference */
+  ST_BACKREF,                          /* Back-reference (frame or redirect source) */
 };
 
+#define STRING_TYPE_NAMES                      \
+       T(URL, 1 << ST_URL)                     \
+       T(HOST, 1 << ST_HOST)                   \
+       T(DOMAIN, 1 << ST_DOMAIN)               \
+       T(REF, 1 << ST_REF)                     \
+       T(BACKREF, 1 << ST_BACKREF)
+
+#define STRING_TYPES_URL ((1 << ST_URL) | (1 << ST_REF) | (1 << ST_BACKREF))
+#define STRING_TYPES_CASE_INSENSITIVE ((1 << ST_HOST) | (1 << ST_DOMAIN))
+
 /* Index card attributes */
 
 struct card_attr {
@@ -52,6 +76,8 @@ enum card_flag {
   CARD_FLAG_MERGED = 8,                        /* Destination of a merge [merger] */
 };
 
+#define CARD_POS_SHIFT 5               /* Card positions are shifted this # of bytes to the right */
+
 /* String fingerprints */
 
 struct fingerprint {
@@ -59,3 +85,23 @@ struct fingerprint {
 };
 
 void fingerprint(byte *string, struct fingerprint *fp);
+
+/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
+
+#define GET_TAGGED_CHAR(p,u) do {                              \
+  u = *p;                                                      \
+  if (u >= 0xc0)                                               \
+    GET_UTF8(p,u);                                             \
+  else if (u >= 0x80)                                          \
+    {                                                          \
+      p++;                                                     \
+      if (u >= 0xb0)                                           \
+       u += 0x80020000;                                        \
+      else if (u >= 0xa0)                                      \
+       u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f);     \
+      else                                                     \
+       u += 0x80000000;                                        \
+    }                                                          \
+  else                                                         \
+    p++;                                                       \
+} while (0)