X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Findex.h;h=7a462942349cd93b1f351c36008dddc64c46d49b;hb=36fac2a8ffbf1275176e5eae0a9397288417bf0d;hp=8bfbe88d4d6862f58118de370d52f41d3d483553;hpb=7a84ff60630fb4211e60320843955ff2cf013ac2;p=libucw.git diff --git a/lib/index.h b/lib/index.h index 8bfbe88d..7a462942 100644 --- a/lib/index.h +++ b/lib/index.h @@ -1,7 +1,7 @@ /* * Sherlock: Data structures used in indices * - * (c) 2001--2002 Martin Mares + * (c) 2001--2003 Martin Mares */ #ifndef _SHERLOCK_INDEX_H @@ -11,9 +11,21 @@ #include SHERLOCK_CUSTOM #include "charset/unistream.h" -/* Words */ +/* + * Words and word complexes + * + * MAX_WORD_LEN is the maximum length (measured in UTF-8 characters, excluding + * the terminating zero byte if there's any) of any word which may appear in the + * indices or in the bucket file. Naturally, the same constant also bounds + * the number of UCS-2 characters in a word. + * + * Caveat: If you are upcasing/downcasing the word, the UTF-8 encoding can + * expand, although at most twice, so you need to reserve 2*MAX_WORD_LEN bytes. + * + * MAX_COMPLEX_LEN is the upper bound on number of words in any word complex. + */ -#define MAX_WORD_LEN 64 +#define MAX_WORD_LEN 64 /* a multiple of 4 */ #define MAX_COMPLEX_LEN 10 /* Word and string types are defined in lib/custom.h */ @@ -34,8 +46,12 @@ struct card_attr { CUSTOM_CARD_ATTRS /* Include all custom attributes */ byte weight; byte flags; +#ifdef CONFIG_LASTMOD byte age; /* Document age in pseudo-logarithmic units wrt. reference time */ - // byte rfu[1]; /* If no custom attributes are defined */ +#endif +#ifdef CONFIG_FILETYPE + byte type_flags; /* File type flags (see below) */ +#endif }; enum card_flag { @@ -44,9 +60,23 @@ enum card_flag { CARD_FLAG_DUP = 4, /* Removed as a duplicate [merger] */ CARD_FLAG_MERGED = 8, /* Destination of a merge [merger] */ CARD_FLAG_IMAGE = 16, /* Is an image object [scanner] */ + CARD_FLAG_FRAMESET = 32, /* Contains a frameset [scanner] */ }; -#define CARD_POS_SHIFT 5 /* Card positions are shifted this # of bytes to the right */ +#define CARD_POS_SHIFT 5 /* Card positions are shifted this # of bits to the right */ + +/* + * We store document type and several other properties in card_attr->type_flags. + * Here we define only the basic structure, the details and also how to match the + * types are defined in custom.h. + * + * bits 7--5 file type: (0-3: text types, 4-7: other types, defined by custom.h) + * bits 4--0 type-dependent information, for text types it's document language code + */ + +#define CA_GET_FILE_TYPE(a) ((a)->type_flags >> 5) +#define CA_GET_FILE_INFO(a) ((a)->type_flags & 0x1f) +#define CA_GET_FILE_LANG(a) ((a)->type_flags & 0x80 ? 0 : CA_GET_FILE_INFO(a)) /* String fingerprints */