]> mj.ucw.cz Git - libucw.git/commitdiff
Simple image search... under construction
authorPavel Charvat <pavel.charvat@netcentrum.cz>
Sun, 9 Apr 2006 21:03:39 +0000 (23:03 +0200)
committerPavel Charvat <pavel.charvat@netcentrum.cz>
Sun, 9 Apr 2006 21:03:39 +0000 (23:03 +0200)
images/Makefile
images/image-idx.c
images/image-search.h [new file with mode: 0644]
images/image-sig.c
images/image-test.c [new file with mode: 0644]
images/images.c [new file with mode: 0644]
images/images.h

index 76a2d24512a8366ae64f43cd5ca548113bf38630..e19e8343a4aafcbace484e823de861e92c2e808d 100644 (file)
@@ -2,10 +2,11 @@
 
 DIRS+=images
 
-PROGS+=$(addprefix $(o)/images/,image-idx)
+PROGS+=$(addprefix $(o)/images/,image-idx image-test)
 
 $(o)/images/image-sig.o $(o)/images/image-sig.oo: CFLAGS+=-I/usr/include/GraphicsMagick
 $(o)/images/image-idx.o $(o)/images/image-idx.oo: CFLAGS+=-I/usr/include/GraphicsMagick
-$(o)/images/image-idx: $(o)/images/image-idx.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET)
+$(o)/images/image-idx: $(o)/images/images.o $(o)/images/image-idx.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET)
 $(o)/images/image-idx: LIBS+=-lGraphicsMagick
+$(o)/images/image-test: $(o)/images/images.o $(o)/images/image-test.o $(LIBSH)
 
index 21024df39caf95125dfdba412dde1caa49efb354..074b60bceecf2bfebe12ee5f5350781fab2329ab 100644 (file)
@@ -28,7 +28,7 @@
 #include <string.h>
 
 /* This should happen in gatherer or scanner */
-static void
+UNUSED static void
 generate_signatures(uns limit)
 {
   struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY);
@@ -101,6 +101,25 @@ generate_signatures(uns limit)
   bclose(fb_signatures);
 }
 
+UNUSED static void
+generate_random_signatures(uns count)
+{
+  log(L_INFO, "Generating %d random signatures", count);
+  struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
+  bputl(fb_signatures, count);
+  for (uns i = 0; i < count; i++)
+    {
+      oid_t oid = i;
+      struct image_vector vec;
+      for (uns j = 0; j < IMAGE_VEC_K; j++)
+       vec.f[j] = random_max(256); 
+      bwrite(fb_signatures, &oid, sizeof(oid));
+      bwrite(fb_signatures, &vec, sizeof(vec));
+      bputc(fb_signatures, 0);
+    }
+  bclose(fb_signatures); 
+}
+
 struct signature_record {
   oid_t oid;
   struct image_vector vec;
@@ -161,8 +180,8 @@ build_search_tree(void)
       DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth, 
          stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1));
       uns leaves_index = 1 << (tree.depth - 1);
-      tree.nodes = xmalloc((1 << tree.depth) * sizeof(struct image_node));
-      tree.leaves = xmalloc(tree.count * sizeof(struct image_leaf));
+      tree.nodes = xmalloc_zero((1 << tree.depth) * sizeof(struct image_node));
+      tree.leaves = xmalloc_zero(tree.count * sizeof(struct image_leaf));
      
       /* Initialize recursion */
       struct stk {
@@ -205,7 +224,7 @@ build_search_tree(void)
                          uns value =
                            (record->vec.f[i] - stk->bbox.vec[0].f[i]) * 
                            ((1 << bits) - 1) / stk->bbox.vec[1].f[i];
-                         ASSERT(value < (1 << bits));
+                         ASSERT(value < (uns)(1 << bits));
                          leaf->flags |= value;
                        }
                    }
@@ -266,6 +285,7 @@ build_search_tree(void)
       struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC);
       bputl(fb_tree, tree.count);
       bputl(fb_tree, tree.depth);
+      bwrite(fb_tree, &tree.bbox, sizeof(struct image_bbox));
       bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node));
       bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf));
       bclose(fb_tree);
@@ -319,6 +339,7 @@ main(int argc UNUSED, char **argv)
     usage("Invalid usage");
 
   generate_signatures(~0U);
+  //generate_random_signatures(1000000);
   build_search_tree();
   
   return 0;
diff --git a/images/image-search.h b/images/image-search.h
new file mode 100644 (file)
index 0000000..25d3417
--- /dev/null
@@ -0,0 +1,181 @@
+#include "lib/heap.h"
+
+#define IMAGE_SEARCH_DIST_UNLIMITED    (~0U)
+
+/* FIXME: support full length of oid_t, currently must be <2^31 */
+#define IMAGE_SEARCH_ITEM_TYPE         0x80000000U
+struct image_search_item {
+  u32 dist;
+  u32 index;
+  struct image_bbox bbox;
+};
+
+#define IMAGE_SEARCH_CMP(x,y) (is->buf[x].dist < is->buf[y].dist)
+
+struct image_search {
+  struct image_tree *tree;
+  struct image_node *nodes;
+  struct image_leaf *leaves;
+  struct image_vector query;
+  struct image_search_item *buf;
+  u32 *heap;
+  uns count, visited, size, max_dist;
+};
+
+#define SQR(x) ((x)*(x))
+
+static void
+image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist)
+{
+  // FIXME: empty tree
+  is->tree = tree;
+  is->nodes = tree->nodes;
+  is->leaves = tree->leaves;
+  is->query = *query;
+  is->max_dist = max_dist;
+  is->size = 0x1000;
+  is->buf = xmalloc((is->size + 1) * sizeof(struct image_search_item));
+  is->heap = xmalloc((is->size + 1) * sizeof(u32));
+  is->visited = is->count = 1;
+  is->heap[1] = 1;
+  struct image_search_item *item = is->buf + 1;
+  item->index = 1;
+  item->bbox = tree->bbox;
+  item->dist = 0;
+  for (uns i = 0; i < IMAGE_VEC_K; i++)
+    {
+      if (query->f[i] < item->bbox.vec[0].f[i])
+       item->dist += SQR(item->bbox.vec[0].f[i] - query->f[i]);
+      else if (query->f[i] > item->bbox.vec[1].f[i])
+       item->dist += SQR(query->f[i] - item->bbox.vec[0].f[i]);
+      else
+        {
+         item->dist = 0;
+         break;
+       }
+    }
+}
+
+static void
+image_search_done(struct image_search *is)
+{
+  xfree(is->buf);
+  xfree(is->heap);
+}
+
+static void
+image_search_grow_slow(struct image_search *is)
+{
+  is->size *= 2;
+  is->buf = xrealloc(is->buf, (is->size + 1) * sizeof(struct image_search_item));
+  is->heap = xrealloc(is->heap, (is->size + 1) * sizeof(u32));
+}
+
+static inline struct image_search_item *
+image_search_grow(struct image_search *is)
+{
+  if (is->count == is->visited)
+  {
+    if (is->count == is->size)
+      image_search_grow_slow(is);
+    is->visited++;
+    is->heap[is->visited] = is->visited;
+  }
+  return is->buf + is->heap[++is->count];
+}
+
+static inline uns
+image_search_leaf_dist(struct image_search *is, struct image_bbox *bbox, struct image_leaf *leaf)
+{
+  uns dist = 0;
+  uns flags = leaf->flags; 
+  for (uns i = 0; i < IMAGE_VEC_K; i++)
+    {
+      uns bits = IMAGE_LEAF_BITS(i);
+      uns mask = (1 << bits) - 1;
+      uns value = flags & mask;
+      flags >>= bits;
+      int dif = bbox->vec[0].f[i] + (bbox->vec[1].f[i] - bbox->vec[0].f[i]) * value / ((1 << bits) - 1) - is->query.f[i];
+      dist += dif * dif;
+    }
+  return dist;
+}
+
+static int
+image_search_next(struct image_search *is, oid_t *oid, uns *dist)
+{
+  while (likely(is->count))
+    {
+      struct image_search_item *item = is->buf + is->heap[1];
+      DBG("Main loop... dist=%d count=%d visited=%d size=%d index=0x%08x bbox=[(%s),(%s)]", 
+         item->dist, is->count, is->visited, is->size, item->index, 
+         stk_print_image_vector(&item->bbox.vec[0]), stk_print_image_vector(&item->bbox.vec[1]));
+      if (unlikely(item->dist > is->max_dist))
+        {
+         DBG("Maximum distance reached");
+         return 0;
+       }
+      
+      /* Expand leaf */
+      if (item->index & IMAGE_SEARCH_ITEM_TYPE)
+        {
+         *oid = item->index & ~IMAGE_SEARCH_ITEM_TYPE;
+         *dist = item->dist;
+         DBG("Found item %d at distance %d", *oid, *dist);
+         HEAP_DELMIN(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+         return 1;
+       }
+      
+      /* Expand node with leaves */
+      else if (is->nodes[item->index].val & IMAGE_NODE_LEAF)
+        {
+         DBG("Expanding node to list of leaves");
+         struct image_leaf *leaf = is->leaves + (is->nodes[item->index].val & ~IMAGE_NODE_LEAF);
+         item->dist = image_search_leaf_dist(is, &item->bbox, leaf);
+         item->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
+         HEAP_INCREASE(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP, 1);
+         while (!((leaf++)->flags & IMAGE_LEAF_LAST))
+           {
+             struct image_search_item *nitem = image_search_grow(is);
+             nitem->dist = image_search_leaf_dist(is, &item->bbox, leaf);
+             nitem->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
+             HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+           }
+       }
+
+      /* Expand internal node */
+      else
+        {
+         DBG("Expanding internal node");
+         struct image_search_item *nitem = image_search_grow(is);
+         uns dim = is->nodes[item->index].val & IMAGE_NODE_DIM;
+         uns pivot = is->nodes[item->index].val >> 8;
+         item->index *= 2;
+         nitem->bbox = item->bbox;
+         nitem->dist = item->dist;
+         uns query = is->query.f[dim];
+         int dif = query - pivot;
+         if (dif > 0)
+           {
+             nitem->index = item->index++;
+             item->bbox.vec[0].f[dim] = pivot;
+             nitem->bbox.vec[1].f[dim] = pivot;
+             if (query > item->bbox.vec[1].f[dim])
+               nitem->dist -= SQR(query - item->bbox.vec[1].f[dim]);
+           }
+         else
+           {
+             nitem->index = item->index + 1;
+             item->bbox.vec[1].f[dim] = pivot;
+             nitem->bbox.vec[0].f[dim] = pivot;
+             if (query < item->bbox.vec[0].f[dim])
+               nitem->dist -= SQR(item->bbox.vec[0].f[dim] - query);
+           }
+         nitem->dist += SQR(dif);
+         HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+       }
+    }
+  DBG("Heap is empty");
+  return 0;
+}
+
index c2482cc7c3fed99bac0784310859ff98152746ff..ebce51972a24f61b0e7e42784c2b36d9f25aad93 100644 (file)
@@ -5,6 +5,8 @@
 #include "lib/fastbuf.h"
 #include "images/images.h"
 
+#include <magick/api.h>
+
 /*
  * Color spaces
  * 
diff --git a/images/image-test.c b/images/image-test.c
new file mode 100644 (file)
index 0000000..6ea0e23
--- /dev/null
@@ -0,0 +1,85 @@
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/fastbuf.h"
+#include "images/images.h"
+#include "images/image-search.h"
+#include "sherlock/index.h"
+#include "lib/mempool.h"
+#include "sherlock/object.h"
+#include "sherlock/lizard-fb.h"
+#include "sherlock/lizard-fb.h"
+#include <fcntl.h>
+#include <stdio.h>
+
+#define BEST_CNT 20
+
+int
+main(int argc, char **argv)
+{
+  struct image_vector query;
+  if (argc != IMAGE_VEC_K + 1)
+    die("Invalid number of arguments");
+
+  for (uns i = 0; i < IMAGE_VEC_K; i++)
+    {
+      uns v;
+      if (sscanf(argv[i + 1], "%d", &v) != 1)
+       die("Invalid numeric format");
+      query.f[i] = v;
+    }
+  
+  
+  struct image_search is;
+  oid_t best[BEST_CNT];
+  uns dist[BEST_CNT];
+  uns cardpos[BEST_CNT];
+  uns best_n = 0;
+  
+  image_tree_init();
+  log(L_INFO, "Executing query (%s)", stk_print_image_vector(&query));
+  image_search_init(&is, &image_tree, &query, IMAGE_SEARCH_DIST_UNLIMITED);
+  for (uns i = 0; i < BEST_CNT; i++)
+    {
+      if (!image_search_next(&is, best + i, dist + i))
+        {
+          log(L_INFO, "No more images");
+          break;
+        }
+      log(L_INFO, "*** Found %d. best image with oid=%d", i + 1, best[i]);
+      best_n++;
+    }
+  image_search_done(&is);
+  image_tree_done();
+
+  log(L_INFO, "Resolving URLs");
+  struct mempool *pool = mp_new(1 << 16);
+  struct buck2obj_buf *bob = buck2obj_alloc();
+  struct fastbuf *fb = bopen("index/card-attrs", O_RDONLY, 1 << 10);
+  for (uns i = 0; i < best_n; i++)
+    {
+      bsetpos(fb, best[i] * sizeof(struct card_attr));
+      struct card_attr ca;
+      bread(fb, &ca, sizeof(ca));
+      cardpos[i] = ca.card;
+    }
+  bclose(fb);
+  fb = bopen("index/cards", O_RDONLY, 1 << 14);
+  for (uns i = 0; i < best_n; i++)
+    {
+      bsetpos(fb, (sh_off_t)cardpos[i] << CARD_POS_SHIFT);
+      uns buck_len = bgetl(fb) - (LIZARD_COMPRESS_HEADER - 1);
+      uns buck_type = bgetc(fb) + BUCKET_TYPE_PLAIN;
+      mp_flush(pool);
+      struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb, NULL);
+
+      printf("%2d. match: dist=%-8d oid=%-8d url=%s\n", i + 1, dist[i], best[i],
+         obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'));
+    }
+  bclose(fb);
+  buck2obj_free(bob);
+  mp_delete(pool);
+     
+  return 0;
+}
+
diff --git a/images/images.c b/images/images.c
new file mode 100644 (file)
index 0000000..28178c5
--- /dev/null
@@ -0,0 +1,37 @@
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/fastbuf.h"
+#include "images/images.h"
+#include "sherlock/index.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+
+struct image_tree image_tree;
+
+void
+image_tree_init(void)
+{
+  DBG("Initializing image search structures");
+  struct fastbuf *fb = bopen("index/image-tree", O_RDONLY, 1 << 16); /* FIXME: filename hack */
+  image_tree.count = bgetl(fb);
+  image_tree.depth = bgetl(fb);
+  ASSERT(image_tree.count < 0x80000000 && image_tree.depth > 0 && image_tree.depth < 30);
+  image_tree.nodes = xmalloc((1 << image_tree.depth) * sizeof(struct image_node));
+  image_tree.leaves = xmalloc(image_tree.count * sizeof(struct image_leaf));
+  bread(fb, &image_tree.bbox, sizeof(struct image_bbox));
+  bread(fb, image_tree.nodes + 1, ((1 << image_tree.depth) - 1) * sizeof(struct image_node));
+  bread(fb, image_tree.leaves, image_tree.count * sizeof(struct image_leaf));
+  DBG("Search tree with depth %d and %d leaves loaded", image_tree.depth, image_tree.count);
+  bclose(fb);
+}
+
+void
+image_tree_done(void)
+{
+  DBG("Freeing image search structures");
+  xfree(image_tree.nodes);
+  xfree(image_tree.leaves);
+}
+
index 49f10c5b340759fb90cac4575307c47b61402bb5..763006d9d4cc200158d192a6810f82bfda412480 100644 (file)
@@ -3,7 +3,6 @@
 
 #include <alloca.h>
 #include <stdio.h>
-#include <magick/api.h>
 
 #define IMAGE_VEC_K    6
 #define IMAGE_REG_K    9
@@ -61,6 +60,11 @@ struct image_leaf {
     byte *_s = (byte *) alloca(IMAGE_VEC_K * 6), *_p = _s + sprintf(_s, "%d", _v->f[0]); \
     for (uns _i = 1; _i < IMAGE_VEC_K; _i++) _p += sprintf(_p, " %d", _v->f[_i]); _s; })
 
+extern struct image_tree image_tree;
+
+void image_tree_init(void);
+void image_tree_done(void);
+
 void compute_image_signature_prepare(void);
 void compute_image_signature_finish(void);
 int compute_image_signature(void *data, uns len, struct image_signature *sig);