From: Martin Mares Date: Fri, 30 Mar 2001 19:38:45 +0000 (+0000) Subject: Added indexing of URL words (partially ported from our old alter ego). X-Git-Tag: holmes-import~1487 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=981de590927a038ab4b844d16de5a8706daabd1b;p=libucw.git Added indexing of URL words (partially ported from our old alter ego). Robert, please ignore word types present in WORD_TYPES_HIDDEN when searching for contexts -- URL's and other tricky stuff shouldn't show up. --- diff --git a/lib/index.h b/lib/index.h index 0fe46f6b..f7ad7c9a 100644 --- a/lib/index.h +++ b/lib/index.h @@ -21,13 +21,14 @@ enum word_type { WT_KEYWORD, /* Explicitly marked keyword */ WT_META, /* Various meta-information */ WT_ALT, /* Alternate texts for graphical elements */ + WT_URL, /* Word extracted from document URL */ WT_MAX }; /* Descriptive names used for user output */ #define WORD_TYPE_USER_NAMES \ "reserved", "text", "emph", "small", "title", "hdr1", "hdr2", "keywd", \ - "meta", "alt", "type10", "type11", "type12", "type13", "type14", "type15" + "meta", "alt", "url", "type11", "type12", "type13", "type14", "type15" /* Keywords for word type names */ #define WORD_TYPE_NAMES \ @@ -43,6 +44,9 @@ enum word_type { T(META, 1 << WT_META) \ T(ALT, 1 << WT_ALT) +/* These types are not shown in document contexts */ +#define WORD_TYPES_HIDDEN (1 << WT_URL) + /* String types */ enum string_type {