2 * Sherlock Gatherer: Data structures used in indices
4 * (c) 2001 Martin Mares <mj@ucw.cz>
7 #define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; })
11 #define MAX_WORD_LEN 64
16 WT_RESERVED, /* Reserved word type */
17 WT_TEXT, /* Ordinary text */
18 WT_EMPH, /* Emphasized text */
19 WT_SMALL, /* Small font */
20 WT_TITLE, /* Document title */
21 WT_SMALL_HEADING, /* Heading */
22 WT_BIG_HEADING, /* Larger heading */
23 WT_KEYWORD, /* Explicitly marked keyword */
24 WT_META, /* Various meta-information */
25 WT_ALT /* Alternate texts for graphical elements */
31 ST_RESERVED, /* Reserved string type */
32 ST_URL, /* URL of the document */
33 ST_HOST, /* Host name */
34 ST_DOMAIN, /* Domain name */
35 ST_REF, /* URL reference */
36 ST_BACKREF, /* Back-reference (frame or redirect source) */
39 /* Index card attributes */
42 u32 card; /* Reference to card description (either oid or filepos) */
50 CARD_FLAG_EMPTY = 1, /* Empty document (redirect, robot file etc.) [scanner] */
51 CARD_FLAG_ACCENTED = 2, /* Document contains accented characters [scanner] */
52 CARD_FLAG_DUP = 4, /* Removed as a duplicate [merger] */
53 CARD_FLAG_MERGED = 8, /* Destination of a merge [merger] */
56 #define CARD_POS_SHIFT 5 /* Card positions are shifted this # of bytes to the right */
58 /* String fingerprints */
64 void fingerprint(byte *string, struct fingerprint *fp);
66 /* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
68 #define GET_TAGGED_CHAR(p,u) do { \
78 u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f); \