]> mj.ucw.cz Git - libucw.git/commitdiff
Experimental version of search tree construction...
authorPavel Charvat <pavel.charvat@netcentrum.cz>
Sun, 9 Apr 2006 15:33:51 +0000 (17:33 +0200)
committerPavel Charvat <pavel.charvat@netcentrum.cz>
Sun, 9 Apr 2006 15:33:51 +0000 (17:33 +0200)
images/image-idx.c
images/image-sig.c
images/images.h

index a755d5117cb84b96748e1c3979ff9c637d93f012..21024df39caf95125dfdba412dde1caa49efb354 100644 (file)
 static void
 generate_signatures(uns limit)
 {
-  struct fastbuf *cards = index_bopen("cards", O_RDONLY);
-  struct fastbuf *card_attrs = index_bopen("card-attrs", O_RDONLY);
-  struct fastbuf *signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
+  struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY);
+  struct fastbuf *fb_card_attrs = index_bopen("card-attrs", O_RDONLY);
+  struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
   struct card_attr ca;
   struct image_signature sig;
   struct mempool *pool = mp_new(1 << 16);
   struct buck2obj_buf *bob = buck2obj_alloc();
-  oid_t oid = 0;
+  uns count = 0;
 
-  DBG("Generating signatures");
+  log(L_INFO, "Generating image signatures");
+  bputl(fb_signatures, 0);
+  compute_image_signature_prepare();
 
-  for (; bread(card_attrs, &ca, sizeof(ca)); oid++)
+  for (oid_t oid = 0; bread(fb_card_attrs, &ca, sizeof(ca)); oid++)
     if ((uns)((ca.type_flags >> 4) - 8) < 4)
       {
-        bsetpos(cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
-        uns buck_len = bgetl(cards)-(LIZARD_COMPRESS_HEADER-1);
-        uns buck_type = bgetc(cards) + BUCKET_TYPE_PLAIN;
+        bsetpos(fb_cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
+        uns buck_len = bgetl(fb_cards) - (LIZARD_COMPRESS_HEADER - 1);
+        uns buck_type = bgetc(fb_cards) + BUCKET_TYPE_PLAIN;
         mp_flush(pool);
-        struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, cards, NULL);
+        struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb_cards, NULL);
         struct oattr *attr;
         if (!obj)
           die("Failed to read card");
@@ -72,9 +74,13 @@ generate_signatures(uns limit)
            int err = compute_image_signature(thumb, thumb_len, &sig);
            if (!err)
              {
-               bputl(signatures, oid);
-               bwrite(signatures, &sig, sizeof(sig));
-                if (!--limit)
+               bwrite(fb_signatures, &oid, sizeof(oid));
+               bwrite(fb_signatures, &sig.vec, sizeof(struct image_vector));
+               bputc(fb_signatures, sig.len);
+               if (sig.len)
+                 bwrite(fb_signatures, sig.reg, sig.len * sizeof(struct image_region));
+               count++;
+               if (count >= limit)
                  break;
              }
            else
@@ -83,14 +89,195 @@ generate_signatures(uns limit)
            bb_done(&buf);
          }
       }
+  brewind(fb_signatures);
+  bputl(fb_signatures, count);
+  DBG("%d signatures written", count);
 
+  compute_image_signature_finish();
   buck2obj_free(bob);
   mp_delete(pool);
-  bclose(cards);
-  bclose(card_attrs);
-  bclose(signatures);
+  bclose(fb_cards);
+  bclose(fb_card_attrs);
+  bclose(fb_signatures);
 }
 
+struct signature_record {
+  oid_t oid;
+  struct image_vector vec;
+};
+
+#define ASORT_PREFIX(x) build_search_tree_##x
+#define ASORT_KEY_TYPE struct signature_record *
+#define ASORT_ELT(i) rec[i]
+#define ASORT_LT(x,y) x->vec.f[dim] < y->vec.f[dim]
+#define ASORT_EXTRA_ARGS , uns dim, struct signature_record **rec
+#include "lib/arraysort.h"
+
+#if 0
+#define DBG_KD(x...) DBG(x)
+#else
+#define DBG_KD(x...) do{}while(0)
+#endif
+
+static void
+build_search_tree(void)
+{
+  log(L_INFO, "Building KD-tree");
+
+  struct fastbuf *fb_signatures = index_bopen("image-sig", O_RDONLY);
+  struct image_tree tree;
+  tree.count = bgetl(fb_signatures);
+  ASSERT(tree.count < 0x80000000);
+  if (!tree.count)
+    {
+      /* FIXME */
+      bclose(fb_signatures);
+      die("There are no signatures");
+    }
+  else
+    {
+      DBG("Reading %d signatures", tree.count);
+      struct signature_record *records = xmalloc(tree.count * sizeof(struct signature_record));
+      struct signature_record **precords = xmalloc(tree.count * sizeof(void *));
+      for (uns i = 0; i < tree.count; i++)
+        {
+         bread(fb_signatures, &records[i].oid, sizeof(oid_t));
+         bread(fb_signatures, &records[i].vec, sizeof(struct image_vector));
+         uns len = bgetc(fb_signatures);
+         bskip(fb_signatures, len * sizeof(struct image_region));
+         precords[i] = records + i;
+         if (likely(i))
+           for (uns j = 0; j < IMAGE_VEC_K; j++)
+             {
+               tree.bbox.vec[0].f[j] = MIN(tree.bbox.vec[0].f[j], records[i].vec.f[j]);
+               tree.bbox.vec[1].f[j] = MAX(tree.bbox.vec[1].f[j], records[i].vec.f[j]);
+             }
+         else
+            tree.bbox.vec[0] = tree.bbox.vec[1] = records[0].vec;
+       }
+      bclose(fb_signatures);
+      
+      for (tree.depth = 1; (uns)(2 << tree.depth) < tree.count; tree.depth++);
+      DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth, 
+         stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1));
+      uns leaves_index = 1 << (tree.depth - 1);
+      tree.nodes = xmalloc((1 << tree.depth) * sizeof(struct image_node));
+      tree.leaves = xmalloc(tree.count * sizeof(struct image_leaf));
+     
+      /* Initialize recursion */
+      struct stk {
+       struct image_bbox bbox;
+       uns index, count;
+       struct signature_record **start;
+      } stk_top[32], *stk = stk_top + 1;
+      stk->index = 1;
+      stk->start = precords;
+      stk->count = tree.count;
+      stk->bbox.vec[0] = tree.bbox.vec[0];
+      for (uns i = 0; i < IMAGE_VEC_K; i++)
+       stk->bbox.vec[1].f[i] = tree.bbox.vec[1].f[i] - tree.bbox.vec[0].f[i];
+      uns entry_index = 0;
+    
+      /* Main loop */
+      while (stk != stk_top)
+        {
+         DBG_KD("Main loop... depth=%d index=%d count=%d, start=%d, min=%s dif=%s",
+             stk - stk_top, stk->index, stk->count, stk->start - precords,
+             stk_print_image_vector(stk->bbox.vec + 0), stk_print_image_vector(stk->bbox.vec + 1));
+         ASSERT(stk->count);
+         
+         /* Create leaf node */
+         if (stk->index >= leaves_index || stk->count < 2)
+           {
+             tree.nodes[stk->index].val = IMAGE_NODE_LEAF | entry_index;
+             for (; stk->count--; stk->start++)
+               {
+                 struct image_leaf *leaf = &tree.leaves[entry_index++];
+                 struct signature_record *record = *stk->start;
+                 leaf->oid = record->oid;
+                 leaf->flags = 0;
+                 for (uns i = IMAGE_VEC_K; i--; )
+                   {
+                     uns bits = IMAGE_LEAF_BITS(i);
+                     leaf->flags <<= bits;
+                     if (stk->bbox.vec[1].f[i])
+                       {
+                         uns value =
+                           (record->vec.f[i] - stk->bbox.vec[0].f[i]) * 
+                           ((1 << bits) - 1) / stk->bbox.vec[1].f[i];
+                         ASSERT(value < (1 << bits));
+                         leaf->flags |= value;
+                       }
+                   }
+                 if (!stk->count)
+                   leaf->flags |= IMAGE_LEAF_LAST; 
+                 DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x", 
+                     leaf->oid, stk_print_image_vector(&record->vec), leaf->flags);
+               }
+             stk--;
+           }
+        
+         /* Create internal node */
+         else
+           {
+             /* Select dimension to splis */
+             uns dim = 0;
+             for (uns i = 1; i < IMAGE_VEC_K; i++)
+               if (stk->bbox.vec[1].f[i] > stk->bbox.vec[1].f[dim])
+                 dim = i;
+
+             /* Sort... FIXME: we only need the median */
+             build_search_tree_sort(stk->count, dim, stk->start);
+             
+             /* Split in the middle */
+             uns index = stk->index;
+             stk[1].index = stk[0].index * 2;
+             stk[0].index = stk[1].index + 1;
+             stk[1].count = stk[0].count >> 1;
+             stk[0].count -= stk[1].count;
+             stk[1].start = stk[0].start;
+             stk[0].start += stk[1].count;
+
+             /* Choose split value */
+             uns lval = stk->start[-1]->vec.f[dim];
+             uns rval = stk->start[0]->vec.f[dim];
+             uns pivot = stk->bbox.vec[0].f[dim] + (stk->bbox.vec[1].f[dim] >> 1);
+             if (pivot <= lval)
+               pivot = lval;
+             else if (pivot >= rval)
+               pivot = rval;
+
+             DBG_KD("Created internal node; dim=%d pivot=%d", dim, pivot);
+
+             /* Split the box */
+             stk[1].bbox = stk[0].bbox;
+              stk[1].bbox.vec[1].f[dim] = pivot - stk[0].bbox.vec[0].f[dim];
+             stk[0].bbox.vec[0].f[dim] += stk[1].bbox.vec[1].f[dim];
+             stk[0].bbox.vec[1].f[dim] -= stk[1].bbox.vec[1].f[dim];
+
+             /* Fill the node structure */
+             tree.nodes[index].val = dim + (pivot << 8);
+             stk++;
+           }
+       }
+
+      DBG("Tree constructed, saving...");
+
+      struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC);
+      bputl(fb_tree, tree.count);
+      bputl(fb_tree, tree.depth);
+      bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node));
+      bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf));
+      bclose(fb_tree);
+
+      xfree(tree.leaves);
+      xfree(tree.nodes);
+      xfree(precords);
+      xfree(records);
+    }
+}
+
+
 static char *shortopts = CF_SHORT_OPTS "";
 static struct option longopts[] =
 {
@@ -132,6 +319,7 @@ main(int argc UNUSED, char **argv)
     usage("Invalid usage");
 
   generate_signatures(~0U);
+  build_search_tree();
   
   return 0;
 }
index 43459341b4e8708f78e8c07528c2f08addf850a0..c2482cc7c3fed99bac0784310859ff98152746ff 100644 (file)
@@ -71,7 +71,7 @@ compute_image_area_signature(PixelPacket *pixels, uns width, uns height, struct
   /* Every 4x4 block (FIXME: deal with smaller blocks near the edges) */
   PixelPacket *p = pixels;
   for (uns block_y = 0; block_y < h; block_y++, p += width & 3 + width * 3)
-    for (uns block_x = 0; block_x < w; block_x++, p += 4 - 4 * width, block++)
+    for (uns block_x = 0; block_x < w; block_x++, p -= 4 * width - 4, block++)
       {
         int t[16], s[16], *tp = t;
        
@@ -159,54 +159,59 @@ compute_image_area_signature(PixelPacket *pixels, uns width, uns height, struct
       hh_sum += blocks[i].hh;
     }
 
-  sig->vec[0] = l_sum / blocks_count;
-  sig->vec[1] = u_sum / blocks_count;
-  sig->vec[2] = v_sum / blocks_count;
-  sig->vec[3] = lh_sum / blocks_count;
-  sig->vec[4] = hl_sum / blocks_count;
-  sig->vec[5] = hh_sum / blocks_count;
+  sig->vec.f[0] = l_sum / blocks_count;
+  sig->vec.f[1] = u_sum / blocks_count;
+  sig->vec.f[2] = v_sum / blocks_count;
+  sig->vec.f[3] = lh_sum / blocks_count;
+  sig->vec.f[4] = hl_sum / blocks_count;
+  sig->vec.f[5] = hh_sum / blocks_count;
+
+  sig->len = 0;
 
   xfree(blocks);
 
-  DBG("Resulting signature is (%d, %d, %d, %d, %d, %d)", sig->vec[0], sig->vec[1], sig->vec[2], sig->vec[3], sig->vec[4], sig->vec[5]);
+  DBG("Resulting signature is (%s)", stk_print_image_vector(&sig->vec));
 }
 
-int
-compute_image_signature(void *data, uns len, struct image_signature *sig)
+static ExceptionInfo exception;
+static QuantizeInfo quantize_info;
+static ImageInfo *image_info;
+
+void
+compute_image_signature_prepare(void)
 {
-  int retval = 0;
-  
-  InitializeMagick(NULL); /* FIXME: call only once */
-  ExceptionInfo exception;
+  InitializeMagick(NULL); 
   GetExceptionInfo(&exception);
-  ImageInfo *image_info = CloneImageInfo(NULL);
+  image_info = CloneImageInfo(NULL);
   image_info->subrange = 1;
+  GetQuantizeInfo(&quantize_info);
+  quantize_info.colorspace = RGBColorspace;
+}
+
+void
+compute_image_signature_finish(void)
+{
+  DestroyImageInfo(image_info);
+  DestroyExceptionInfo(&exception);
+  DestroyMagick();
+}
 
-  DBG("Decoding");
-  Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... most of the time :-/ */
+int
+compute_image_signature(void *data, uns len, struct image_signature *sig)
+{
+  Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... takes most of CPU time :-/ */
   if (!image)
     die("Invalid image format");
   if (image->columns < 4 || image->rows < 4)
     {
       DBG("Image too small (%dx%d)", (int)image->columns, (int)image->rows);
-      retval = -1;
-      goto exit;
+      DestroyImage(image);
+      return -1;
     }
-  
-  QuantizeInfo quantize_info;
-  GetQuantizeInfo(&quantize_info);
-  quantize_info.colorspace = RGBColorspace;
-  QuantizeImage(&quantize_info, image);
-
+  QuantizeImage(&quantize_info, image); /* Also slow... and propably not necessary... */
   PixelPacket *pixels = (PixelPacket *) AcquireImagePixels(image, 0, 0, image->columns, image->rows, &exception);
-
   compute_image_area_signature(pixels, image->columns, image->rows, sig);
-  
-exit:  
   DestroyImage(image);
-  DestroyImageInfo(image_info);
-  DestroyExceptionInfo(&exception);
-  DestroyMagick();
-  return retval;
+  return 0;
 }
 
index 2399c4f4fe7c4016a2d9136ab8cf619de5691d3b..49f10c5b340759fb90cac4575307c47b61402bb5 100644 (file)
@@ -1,40 +1,69 @@
-#ifndef _IMAGES_H
-#define _IMAGES_H
+#ifndef _IMAGES_IMAGES_H
+#define _IMAGES_IMAGES_H
 
+#include <alloca.h>
 #include <stdio.h>
 #include <magick/api.h>
 
-#define IMAGE_VEC_K            6
-#define IMAGE_VEC_LOG_K                3
+#define IMAGE_VEC_K    6
+#define IMAGE_REG_K    9
+#define IMAGE_REG_MAX  4
 
-typedef u32 image_vector[IMAGE_VEC_K];
-typedef u32 image_box[2][IMAGE_VEC_K];
+typedef u16 image_feature_t;   /* 8 or 16 bits precision */
 
+/* K-dimensional feature vector */
+struct image_vector {
+  image_feature_t f[IMAGE_VEC_K];
+};
+
+/* K-dimensional interval */
+struct image_bbox {
+  struct image_vector vec[2];
+};
+
+/* Fetures for image regions */
+struct image_region {
+  image_feature_t f[IMAGE_REG_K];
+};
+
+/* Image signature */
 struct image_signature {
-  image_vector vec; 
+  struct image_vector vec;     /* Combination of all regions... simplier signature */
+  image_feature_t len;         /* Number of regions */
+  struct image_region reg[IMAGE_REG_MAX];/* Feature vector for every region */
 };
 
+/* Similarity search tree... will be changed */
 struct image_tree {
-  uns count;
-  uns depth;
-  image_box box;
-  struct image_node *nodes;
-  struct image_entry *entries;
+  uns count;                   /* Number of images in the tree */
+  uns depth;                   /* Tree depth */
+  struct image_bbox bbox;      /* Bounding box containing all the */
+  struct image_node *nodes;    /* Internal nodes */
+  struct image_leaf *leaves;   /* Leaves */
 };
 
-#define IMAGE_NODE_LEAF                0x80000000
-#define IMAGE_NODE_DIM         ((1 << IMAGE_VEC_LOG_K) - 1)
-
+/* Internal node in the search tree */
+#define IMAGE_NODE_LEAF                0x80000000              /* Node contains pointer to leaves array */
+#define IMAGE_NODE_DIM         0xff                    /* Split dimension */
 struct image_node {
-  u32 value;
+  u32 val;
 };
 
-#define IMAGE_ENTRY_LAST       (1 << (sizeof(oid_t) * 8 - 1))
-
-struct image_entry {
+/* Leaves in the search tree */
+#define IMAGE_LEAF_LAST                0x80000000              /* Last entry in the list */
+#define IMAGE_LEAF_BITS(i)     (31 / IMAGE_VEC_K)      /* Number of bits for relative position in i-th dimension */
+struct image_leaf {
+  u32 flags;           /* Relative position in bbox and last node flag */ 
   oid_t oid;
 };
 
+#define stk_print_image_vector(v) ({ struct image_vector *_v = v; \
+    byte *_s = (byte *) alloca(IMAGE_VEC_K * 6), *_p = _s + sprintf(_s, "%d", _v->f[0]); \
+    for (uns _i = 1; _i < IMAGE_VEC_K; _i++) _p += sprintf(_p, " %d", _v->f[_i]); _s; })
+
+void compute_image_signature_prepare(void);
+void compute_image_signature_finish(void);
 int compute_image_signature(void *data, uns len, struct image_signature *sig);
 
 #endif
+