* (c) 2001 Martin Mares <mj@ucw.cz>
*/
+#define CLAMP(x,min,max) ({ int _t=x; (_t < min) ? min : (_t > max) ? max : _t; })
+
/* Words */
#define MAX_WORD_LEN 64
/* Word types */
enum word_type {
+ WT_RESERVED, /* Reserved word type */
WT_TEXT, /* Ordinary text */
WT_EMPH, /* Emphasized text */
WT_SMALL, /* Small font */
WT_META, /* Various meta-information */
WT_ALT /* Alternate texts for graphical elements */
};
+
+/* String types */
+
+enum string_type {
+ ST_RESERVED, /* Reserved string type */
+ ST_URL, /* URL of the document */
+ ST_HOST, /* Host name */
+ ST_DOMAIN, /* Domain name */
+ ST_REF, /* URL reference */
+ ST_BACKREF, /* Back-reference (frame or redirect source) */
+};
+
+/* Index card attributes */
+
+struct card_attr {
+ u32 card; /* Reference to card description (either oid or filepos) */
+ u32 site_id;
+ byte weight;
+ byte flags;
+ byte rfu[2];
+};
+
+enum card_flag {
+ CARD_FLAG_EMPTY = 1, /* Empty document (redirect, robot file etc.) [scanner] */
+ CARD_FLAG_ACCENTED = 2, /* Document contains accented characters [scanner] */
+ CARD_FLAG_DUP = 4, /* Removed as a duplicate [merger] */
+ CARD_FLAG_MERGED = 8, /* Destination of a merge [merger] */
+};
+
+#define CARD_POS_SHIFT 5 /* Card positions are shifted this # of bytes to the right */
+
+/* String fingerprints */
+
+struct fingerprint {
+ byte hash[12];
+};
+
+void fingerprint(byte *string, struct fingerprint *fp);
+
+/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
+
+#define GET_TAGGED_CHAR(p,u) do { \
+ u = *p; \
+ if (u >= 0xc0) \
+ GET_UTF8(p,u); \
+ else if (u >= 0x80) \
+ { \
+ p++; \
+ if (u >= 0xb0) \
+ u += 0x80020000; \
+ else if (u >= 0xa0) \
+ u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f); \
+ else \
+ u += 0x80000000; \
+ } \
+ else \
+ p++; \
+} while (0)