]> mj.ucw.cz Git - libucw.git/blobdiff - lib/index.h
As usually, stuff in lib/* is LGPL'ed.
[libucw.git] / lib / index.h
index 696a92191c3996011a5cf3774e47cdd9b8ad2ec7..8bfbe88d4d6862f58118de370d52f41d3d483553 100644 (file)
@@ -8,11 +8,13 @@
 #define _SHERLOCK_INDEX_H
 
 #include "lib/fastbuf.h"
+#include SHERLOCK_CUSTOM
 #include "charset/unistream.h"
 
 /* Words */
 
 #define MAX_WORD_LEN           64
+#define MAX_COMPLEX_LEN                10
 
 /* Word and string types are defined in lib/custom.h */
 
@@ -26,10 +28,10 @@ struct index_params {
 
 struct card_attr {
   u32 card;                            /* Reference to card description (either oid or filepos) */
+#ifdef CONFIG_SITES
   u32 site_id;
-#define INT_ATTR(t,i,o,k,g,p) t i;
-  CUSTOM_ATTRS                         /* Include all custom attributes */
-#undef INT_ATTR
+#endif
+  CUSTOM_CARD_ATTRS                    /* Include all custom attributes */
   byte weight;
   byte flags;
   byte age;                            /* Document age in pseudo-logarithmic units wrt. reference time */
@@ -86,6 +88,17 @@ fp_hash(struct fingerprint *fp)
     p++;                                                       \
 } while (0)
 
+#define SKIP_TAGGED_CHAR(p) do {                               \
+  if (*p >= 0x80 && *p < 0xc0)                                 \
+    {                                                          \
+      uns u = *p++;                                            \
+      if (u >= 0xa0 && u < 0xb0 && *p >= 0x80 && *p < 0xc0)    \
+       p++;                                                    \
+    }                                                          \
+  else                                                         \
+    UTF8_SKIP(p);                                              \
+} while (0)
+
 static inline uns
 bget_tagged_char(struct fastbuf *f)
 {