]> mj.ucw.cz Git - libucw.git/blobdiff - lib/index.h
Forgot to commit this one during the "search by age" changes.
[libucw.git] / lib / index.h
index e4182c7c6ff34c5430b6301d420b5a4bb1f119f3..0585141245e440dd4b03e0dc75068956ee00abea 100644 (file)
@@ -1,26 +1,19 @@
 /*
- *     Sherlock Gatherer: Data structures used in indices
+ *     Sherlock: Data structures used in indices
  *
- *     (c) 2001 Martin Mares <mj@ucw.cz>
+ *     (c) 2001--2002 Martin Mares <mj@ucw.cz>
  */
 
 /* Words */
 
 #define MAX_WORD_LEN           64
 
-/* Word types */
-
-enum word_type {
-  WT_RESERVED,                         /* Reserved word type */
-  WT_TEXT,                             /* Ordinary text */
-  WT_EMPH,                             /* Emphasized text */
-  WT_SMALL,                            /* Small font */
-  WT_TITLE,                            /* Document title */
-  WT_SMALL_HEADING,                    /* Heading */
-  WT_BIG_HEADING,                      /* Larger heading */
-  WT_KEYWORD,                          /* Explicitly marked keyword */
-  WT_META,                             /* Various meta-information */
-  WT_ALT                               /* Alternate texts for graphical elements */
+/* Word and string types are defined in lib/custom.h */
+
+/* Global index parameters */
+
+struct index_params {
+  sh_time_t ref_time;                  /* Reference time (for document ages etc.) */
 };
 
 /* Index card attributes */
@@ -28,9 +21,13 @@ enum word_type {
 struct card_attr {
   u32 card;                            /* Reference to card description (either oid or filepos) */
   u32 site_id;
+#define INT_ATTR(t,i,o,k,g,p) t i;
+  CUSTOM_ATTRS                         /* Include all custom attributes */
+#undef INT_ATTR
   byte weight;
   byte flags;
-  byte rfu[2];
+  byte age;                            /* Document age in pseudo-logarithmic units wrt. reference time */
+  // byte rfu[1];                      /* If no custom attributes are defined */
 };
 
 enum card_flag {
@@ -40,6 +37,8 @@ enum card_flag {
   CARD_FLAG_MERGED = 8,                        /* Destination of a merge [merger] */
 };
 
+#define CARD_POS_SHIFT 5               /* Card positions are shifted this # of bytes to the right */
+
 /* String fingerprints */
 
 struct fingerprint {
@@ -47,3 +46,59 @@ struct fingerprint {
 };
 
 void fingerprint(byte *string, struct fingerprint *fp);
+
+static inline u32
+fp_hash(struct fingerprint *fp)
+{
+  return (fp->hash[0] << 24) | (fp->hash[1] << 16) | (fp->hash[2] << 8) | fp->hash[3];
+}
+
+/* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
+
+#define GET_TAGGED_CHAR(p,u) do {                              \
+  u = *p;                                                      \
+  if (u >= 0xc0)                                               \
+    GET_UTF8_CHAR(p,u);                                                \
+  else if (u >= 0x80)                                          \
+    {                                                          \
+      p++;                                                     \
+      if (u >= 0xb0)                                           \
+        {                                                      \
+         if (u != 0xb0)                                        \
+            ASSERT(0);                                         \
+         u += 0x80020000;                                      \
+        }                                                      \
+      else if (u >= 0xa0)                                      \
+        {                                                      \
+         ASSERT(*p >= 0x80 && *p <= 0xbf);                     \
+         u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f);   \
+        }                                                      \
+      else                                                     \
+       u += 0x80000000;                                        \
+    }                                                          \
+  else                                                         \
+    p++;                                                       \
+} while (0)
+
+/* Conversion of document age from seconds to our internal units */
+
+static inline int
+convert_age(sh_time_t lastmod, sh_time_t reftime)
+{
+  sh_time_t age;
+  if (reftime < lastmod)               /* past times */
+    return -1;
+  age = (reftime - lastmod) / 3600;
+  if (age < 48)                                /* last 2 days: 1 hour resolution */
+    return age;
+  age = (age-48) / 24;
+  if (age < 64)                                /* next 64 days: 1 day resolution */
+    return 48 + age;
+  age = (age-64) / 7;
+  if (age < 135)                       /* next 135 weeks: 1 week resolution */
+    return 112 + age;
+  age = (age-135) / 52;
+  if (age < 8)                         /* next 8 years: 1 year resolution */
+    return 247 + age;
+  return 255;                          /* then just "infinite future" */
+}