]> mj.ucw.cz Git - libucw.git/blobdiff - lib/index.h
Introduced obuck_get_pos(), converted gatherd limits to use it.
[libucw.git] / lib / index.h
index 017cf7b2d58771e540c106efca38d879a631ba8b..d484ee8ee8e177b3d008ff47cad5f56ae5f7f1e4 100644 (file)
@@ -25,6 +25,40 @@ enum word_type {
   WT_ALT                               /* Alternate texts for graphical elements */
 };
 
+#define WORD_TYPE_NAMES                                \
+       T(WORD, ~0)                             \
+       T(TEXT, 1 << WT_TEXT)                   \
+       T(EMPH, 1 << WT_EMPH)                   \
+       T(SMALL, 1 << WT_SMALL)                 \
+       T(TITLE, 1 << WT_TITLE)                 \
+       T(HDR, (1 << WT_SMALL_HEADING) | (1 << WT_BIG_HEADING))  \
+       T(HDR1, 1 << WT_SMALL_HEADING)          \
+       T(HDR2, 1 << WT_BIG_HEADING)            \
+       T(KEYWD, 1 << WT_KEYWORD)               \
+       T(META, 1 << WT_META)                   \
+       T(ALT, 1 << WT_ALT)
+
+/* String types */
+
+enum string_type {
+  ST_RESERVED,                         /* Reserved string type */
+  ST_URL,                              /* URL of the document */
+  ST_HOST,                             /* Host name */
+  ST_DOMAIN,                           /* Domain name */
+  ST_REF,                              /* URL reference */
+  ST_BACKREF,                          /* Back-reference (frame or redirect source) */
+};
+
+#define STRING_TYPE_NAMES                      \
+       T(URL, 1 << ST_URL)                     \
+       T(HOST, 1 << ST_HOST)                   \
+       T(DOMAIN, 1 << ST_DOMAIN)               \
+       T(REF, 1 << ST_REF)                     \
+       T(BACKREF, 1 << ST_BACKREF)
+
+#define STRING_TYPES_URL ((1 << ST_URL) | (1 << ST_REF) | (1 << ST_BACKREF))
+#define STRING_TYPES_CASE_INSENSITIVE ((1 << ST_HOST) | (1 << ST_DOMAIN))
+
 /* Index card attributes */
 
 struct card_attr {
@@ -42,6 +76,8 @@ enum card_flag {
   CARD_FLAG_MERGED = 8,                        /* Destination of a merge [merger] */
 };
 
+#define CARD_POS_SHIFT 5               /* Card positions are shifted this # of bytes to the right */
+
 /* String fingerprints */
 
 struct fingerprint {
@@ -49,3 +85,29 @@ struct fingerprint {
 };
 
 void fingerprint(byte *string, struct fingerprint *fp);
+
+static inline u32
+fp_hash(struct fingerprint *fp)
+{
+  return (fp->hash[0] << 24) | (fp->hash[1] << 16) | (fp->hash[2] << 8) | fp->hash[3];
+}
+
+/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
+
+#define GET_TAGGED_CHAR(p,u) do {                              \
+  u = *p;                                                      \
+  if (u >= 0xc0)                                               \
+    GET_UTF8(p,u);                                             \
+  else if (u >= 0x80)                                          \
+    {                                                          \
+      p++;                                                     \
+      if (u >= 0xb0)                                           \
+       u += 0x80020000;                                        \
+      else if (u >= 0xa0)                                      \
+       u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f);     \
+      else                                                     \
+       u += 0x80000000;                                        \
+    }                                                          \
+  else                                                         \
+    p++;                                                       \
+} while (0)