-enum word_type {
- WT_RESERVED, /* Reserved word type */
- WT_TEXT, /* Ordinary text */
- WT_EMPH, /* Emphasized text */
- WT_SMALL, /* Small font */
- WT_TITLE, /* Document title */
- WT_SMALL_HEADING, /* Heading */
- WT_BIG_HEADING, /* Larger heading */
- WT_KEYWORD, /* Explicitly marked keyword */
- WT_META, /* Various meta-information */
- WT_ALT /* Alternate texts for graphical elements */
-};
-
-#define WORD_TYPE_NAMES \
- T(WORD, ~0) \
- T(TEXT, 1 << WT_TEXT) \
- T(EMPH, 1 << WT_EMPH) \
- T(SMALL, 1 << WT_SMALL) \
- T(TITLE, 1 << WT_TITLE) \
- T(HDR, (1 << WT_SMALL_HEADING) | (1 << WT_BIG_HEADING)) \
- T(HDR1, 1 << WT_SMALL_HEADING) \
- T(HDR2, 1 << WT_BIG_HEADING) \
- T(KEYWD, 1 << WT_KEYWORD) \
- T(META, 1 << WT_META) \
- T(ALT, 1 << WT_ALT)
-
-/* String types */
-
-enum string_type {
- ST_RESERVED, /* Reserved string type */
- ST_URL, /* URL of the document */
- ST_HOST, /* Host name */
- ST_DOMAIN, /* Domain name */
- ST_REF, /* URL reference */
- ST_BACKREF, /* Back-reference (frame or redirect source) */
-};
-
-#define STRING_TYPE_NAMES \
- T(URL, 1 << ST_URL) \
- T(HOST, 1 << ST_HOST) \
- T(DOMAIN, 1 << ST_DOMAIN) \
- T(REF, 1 << ST_REF) \
- T(BACKREF, 1 << ST_BACKREF)
+/*
+ * Words
+ *
+ * MAX_WORD_LEN is the maximum length (measured in UTF-8 characters, excluding
+ * the terminating zero byte if there's any) of any word which may appear in the
+ * indices or in the bucket file. Naturally, the same constant also bounds
+ * the number of UCS-2 characters in a word.
+ *
+ * Caveat: If you are upcasing/downcasing the word, the UTF-8 encoding can
+ * expand, although at most twice, so you need to reserve 2*MAX_WORD_LEN bytes.
+ */