2 * Sherlock Gatherer: Data structures used in indices
4 * (c) 2001 Martin Mares <mj@ucw.cz>
7 #define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; })
11 #define MAX_WORD_LEN 64
16 WT_RESERVED, /* Reserved word type */
17 WT_TEXT, /* Ordinary text */
18 WT_EMPH, /* Emphasized text */
19 WT_SMALL, /* Small font */
20 WT_TITLE, /* Document title */
21 WT_SMALL_HEADING, /* Heading */
22 WT_BIG_HEADING, /* Larger heading */
23 WT_KEYWORD, /* Explicitly marked keyword */
24 WT_META, /* Various meta-information */
25 WT_ALT /* Alternate texts for graphical elements */
28 #define WORD_TYPE_NAMES \
30 T(TEXT, 1 << WT_TEXT) \
31 T(EMPH, 1 << WT_EMPH) \
32 T(SMALL, 1 << WT_SMALL) \
33 T(TITLE, 1 << WT_TITLE) \
34 T(HDR, (1 << WT_SMALL_HEADING) | (1 << WT_BIG_HEADING)) \
35 T(HDR1, 1 << WT_SMALL_HEADING) \
36 T(HDR2, 1 << WT_BIG_HEADING) \
37 T(KEYWD, 1 << WT_KEYWORD) \
38 T(META, 1 << WT_META) \
44 ST_RESERVED, /* Reserved string type */
45 ST_URL, /* URL of the document */
46 ST_HOST, /* Host name */
47 ST_DOMAIN, /* Domain name */
48 ST_REF, /* URL reference */
49 ST_BACKREF, /* Back-reference (frame or redirect source) */
52 #define STRING_TYPE_NAMES \
54 T(HOST, 1 << ST_HOST) \
55 T(DOMAIN, 1 << ST_DOMAIN) \
57 T(BACKREF, 1 << ST_BACKREF)
59 #define STRING_TYPES_URL ((1 << ST_URL) | (1 << ST_REF) | (1 << ST_BACKREF))
60 #define STRING_TYPES_CASE_INSENSITIVE ((1 << ST_HOST) | (1 << ST_DOMAIN))
62 /* Index card attributes */
65 u32 card; /* Reference to card description (either oid or filepos) */
73 CARD_FLAG_EMPTY = 1, /* Empty document (redirect, robot file etc.) [scanner] */
74 CARD_FLAG_ACCENTED = 2, /* Document contains accented characters [scanner] */
75 CARD_FLAG_DUP = 4, /* Removed as a duplicate [merger] */
76 CARD_FLAG_MERGED = 8, /* Destination of a merge [merger] */
79 #define CARD_POS_SHIFT 5 /* Card positions are shifted this # of bytes to the right */
81 /* String fingerprints */
87 void fingerprint(byte *string, struct fingerprint *fp);
89 /* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
91 #define GET_TAGGED_CHAR(p,u) do { \
100 else if (u >= 0xa0) \
101 u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f); \