DIRS+=images
-PROGS+=$(addprefix $(o)/images/,image-idx)
+PROGS+=$(addprefix $(o)/images/,image-idx image-test)
$(o)/images/image-sig.o $(o)/images/image-sig.oo: CFLAGS+=-I/usr/include/GraphicsMagick
$(o)/images/image-idx.o $(o)/images/image-idx.oo: CFLAGS+=-I/usr/include/GraphicsMagick
-$(o)/images/image-idx: $(o)/images/image-idx.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET)
+$(o)/images/image-idx: $(o)/images/images.o $(o)/images/image-idx.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET)
$(o)/images/image-idx: LIBS+=-lGraphicsMagick
+$(o)/images/image-test: $(o)/images/images.o $(o)/images/image-test.o $(LIBSH)
#include <string.h>
/* This should happen in gatherer or scanner */
-static void
+UNUSED static void
generate_signatures(uns limit)
{
struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY);
bclose(fb_signatures);
}
+UNUSED static void
+generate_random_signatures(uns count)
+{
+ log(L_INFO, "Generating %d random signatures", count);
+ struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
+ bputl(fb_signatures, count);
+ for (uns i = 0; i < count; i++)
+ {
+ oid_t oid = i;
+ struct image_vector vec;
+ for (uns j = 0; j < IMAGE_VEC_K; j++)
+ vec.f[j] = random_max(256);
+ bwrite(fb_signatures, &oid, sizeof(oid));
+ bwrite(fb_signatures, &vec, sizeof(vec));
+ bputc(fb_signatures, 0);
+ }
+ bclose(fb_signatures);
+}
+
struct signature_record {
oid_t oid;
struct image_vector vec;
DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth,
stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1));
uns leaves_index = 1 << (tree.depth - 1);
- tree.nodes = xmalloc((1 << tree.depth) * sizeof(struct image_node));
- tree.leaves = xmalloc(tree.count * sizeof(struct image_leaf));
+ tree.nodes = xmalloc_zero((1 << tree.depth) * sizeof(struct image_node));
+ tree.leaves = xmalloc_zero(tree.count * sizeof(struct image_leaf));
/* Initialize recursion */
struct stk {
uns value =
(record->vec.f[i] - stk->bbox.vec[0].f[i]) *
((1 << bits) - 1) / stk->bbox.vec[1].f[i];
- ASSERT(value < (1 << bits));
+ ASSERT(value < (uns)(1 << bits));
leaf->flags |= value;
}
}
struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC);
bputl(fb_tree, tree.count);
bputl(fb_tree, tree.depth);
+ bwrite(fb_tree, &tree.bbox, sizeof(struct image_bbox));
bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node));
bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf));
bclose(fb_tree);
usage("Invalid usage");
generate_signatures(~0U);
+ //generate_random_signatures(1000000);
build_search_tree();
return 0;
--- /dev/null
+#include "lib/heap.h"
+
+#define IMAGE_SEARCH_DIST_UNLIMITED (~0U)
+
+/* FIXME: support full length of oid_t, currently must be <2^31 */
+#define IMAGE_SEARCH_ITEM_TYPE 0x80000000U
+struct image_search_item {
+ u32 dist;
+ u32 index;
+ struct image_bbox bbox;
+};
+
+#define IMAGE_SEARCH_CMP(x,y) (is->buf[x].dist < is->buf[y].dist)
+
+struct image_search {
+ struct image_tree *tree;
+ struct image_node *nodes;
+ struct image_leaf *leaves;
+ struct image_vector query;
+ struct image_search_item *buf;
+ u32 *heap;
+ uns count, visited, size, max_dist;
+};
+
+#define SQR(x) ((x)*(x))
+
+static void
+image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist)
+{
+ // FIXME: empty tree
+ is->tree = tree;
+ is->nodes = tree->nodes;
+ is->leaves = tree->leaves;
+ is->query = *query;
+ is->max_dist = max_dist;
+ is->size = 0x1000;
+ is->buf = xmalloc((is->size + 1) * sizeof(struct image_search_item));
+ is->heap = xmalloc((is->size + 1) * sizeof(u32));
+ is->visited = is->count = 1;
+ is->heap[1] = 1;
+ struct image_search_item *item = is->buf + 1;
+ item->index = 1;
+ item->bbox = tree->bbox;
+ item->dist = 0;
+ for (uns i = 0; i < IMAGE_VEC_K; i++)
+ {
+ if (query->f[i] < item->bbox.vec[0].f[i])
+ item->dist += SQR(item->bbox.vec[0].f[i] - query->f[i]);
+ else if (query->f[i] > item->bbox.vec[1].f[i])
+ item->dist += SQR(query->f[i] - item->bbox.vec[0].f[i]);
+ else
+ {
+ item->dist = 0;
+ break;
+ }
+ }
+}
+
+static void
+image_search_done(struct image_search *is)
+{
+ xfree(is->buf);
+ xfree(is->heap);
+}
+
+static void
+image_search_grow_slow(struct image_search *is)
+{
+ is->size *= 2;
+ is->buf = xrealloc(is->buf, (is->size + 1) * sizeof(struct image_search_item));
+ is->heap = xrealloc(is->heap, (is->size + 1) * sizeof(u32));
+}
+
+static inline struct image_search_item *
+image_search_grow(struct image_search *is)
+{
+ if (is->count == is->visited)
+ {
+ if (is->count == is->size)
+ image_search_grow_slow(is);
+ is->visited++;
+ is->heap[is->visited] = is->visited;
+ }
+ return is->buf + is->heap[++is->count];
+}
+
+static inline uns
+image_search_leaf_dist(struct image_search *is, struct image_bbox *bbox, struct image_leaf *leaf)
+{
+ uns dist = 0;
+ uns flags = leaf->flags;
+ for (uns i = 0; i < IMAGE_VEC_K; i++)
+ {
+ uns bits = IMAGE_LEAF_BITS(i);
+ uns mask = (1 << bits) - 1;
+ uns value = flags & mask;
+ flags >>= bits;
+ int dif = bbox->vec[0].f[i] + (bbox->vec[1].f[i] - bbox->vec[0].f[i]) * value / ((1 << bits) - 1) - is->query.f[i];
+ dist += dif * dif;
+ }
+ return dist;
+}
+
+static int
+image_search_next(struct image_search *is, oid_t *oid, uns *dist)
+{
+ while (likely(is->count))
+ {
+ struct image_search_item *item = is->buf + is->heap[1];
+ DBG("Main loop... dist=%d count=%d visited=%d size=%d index=0x%08x bbox=[(%s),(%s)]",
+ item->dist, is->count, is->visited, is->size, item->index,
+ stk_print_image_vector(&item->bbox.vec[0]), stk_print_image_vector(&item->bbox.vec[1]));
+ if (unlikely(item->dist > is->max_dist))
+ {
+ DBG("Maximum distance reached");
+ return 0;
+ }
+
+ /* Expand leaf */
+ if (item->index & IMAGE_SEARCH_ITEM_TYPE)
+ {
+ *oid = item->index & ~IMAGE_SEARCH_ITEM_TYPE;
+ *dist = item->dist;
+ DBG("Found item %d at distance %d", *oid, *dist);
+ HEAP_DELMIN(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+ return 1;
+ }
+
+ /* Expand node with leaves */
+ else if (is->nodes[item->index].val & IMAGE_NODE_LEAF)
+ {
+ DBG("Expanding node to list of leaves");
+ struct image_leaf *leaf = is->leaves + (is->nodes[item->index].val & ~IMAGE_NODE_LEAF);
+ item->dist = image_search_leaf_dist(is, &item->bbox, leaf);
+ item->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
+ HEAP_INCREASE(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP, 1);
+ while (!((leaf++)->flags & IMAGE_LEAF_LAST))
+ {
+ struct image_search_item *nitem = image_search_grow(is);
+ nitem->dist = image_search_leaf_dist(is, &item->bbox, leaf);
+ nitem->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
+ HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+ }
+ }
+
+ /* Expand internal node */
+ else
+ {
+ DBG("Expanding internal node");
+ struct image_search_item *nitem = image_search_grow(is);
+ uns dim = is->nodes[item->index].val & IMAGE_NODE_DIM;
+ uns pivot = is->nodes[item->index].val >> 8;
+ item->index *= 2;
+ nitem->bbox = item->bbox;
+ nitem->dist = item->dist;
+ uns query = is->query.f[dim];
+ int dif = query - pivot;
+ if (dif > 0)
+ {
+ nitem->index = item->index++;
+ item->bbox.vec[0].f[dim] = pivot;
+ nitem->bbox.vec[1].f[dim] = pivot;
+ if (query > item->bbox.vec[1].f[dim])
+ nitem->dist -= SQR(query - item->bbox.vec[1].f[dim]);
+ }
+ else
+ {
+ nitem->index = item->index + 1;
+ item->bbox.vec[1].f[dim] = pivot;
+ nitem->bbox.vec[0].f[dim] = pivot;
+ if (query < item->bbox.vec[0].f[dim])
+ nitem->dist -= SQR(item->bbox.vec[0].f[dim] - query);
+ }
+ nitem->dist += SQR(dif);
+ HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+ }
+ }
+ DBG("Heap is empty");
+ return 0;
+}
+
#include "lib/fastbuf.h"
#include "images/images.h"
+#include <magick/api.h>
+
/*
* Color spaces
*
--- /dev/null
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/fastbuf.h"
+#include "images/images.h"
+#include "images/image-search.h"
+#include "sherlock/index.h"
+#include "lib/mempool.h"
+#include "sherlock/object.h"
+#include "sherlock/lizard-fb.h"
+#include "sherlock/lizard-fb.h"
+#include <fcntl.h>
+#include <stdio.h>
+
+#define BEST_CNT 20
+
+int
+main(int argc, char **argv)
+{
+ struct image_vector query;
+ if (argc != IMAGE_VEC_K + 1)
+ die("Invalid number of arguments");
+
+ for (uns i = 0; i < IMAGE_VEC_K; i++)
+ {
+ uns v;
+ if (sscanf(argv[i + 1], "%d", &v) != 1)
+ die("Invalid numeric format");
+ query.f[i] = v;
+ }
+
+
+ struct image_search is;
+ oid_t best[BEST_CNT];
+ uns dist[BEST_CNT];
+ uns cardpos[BEST_CNT];
+ uns best_n = 0;
+
+ image_tree_init();
+ log(L_INFO, "Executing query (%s)", stk_print_image_vector(&query));
+ image_search_init(&is, &image_tree, &query, IMAGE_SEARCH_DIST_UNLIMITED);
+ for (uns i = 0; i < BEST_CNT; i++)
+ {
+ if (!image_search_next(&is, best + i, dist + i))
+ {
+ log(L_INFO, "No more images");
+ break;
+ }
+ log(L_INFO, "*** Found %d. best image with oid=%d", i + 1, best[i]);
+ best_n++;
+ }
+ image_search_done(&is);
+ image_tree_done();
+
+ log(L_INFO, "Resolving URLs");
+ struct mempool *pool = mp_new(1 << 16);
+ struct buck2obj_buf *bob = buck2obj_alloc();
+ struct fastbuf *fb = bopen("index/card-attrs", O_RDONLY, 1 << 10);
+ for (uns i = 0; i < best_n; i++)
+ {
+ bsetpos(fb, best[i] * sizeof(struct card_attr));
+ struct card_attr ca;
+ bread(fb, &ca, sizeof(ca));
+ cardpos[i] = ca.card;
+ }
+ bclose(fb);
+ fb = bopen("index/cards", O_RDONLY, 1 << 14);
+ for (uns i = 0; i < best_n; i++)
+ {
+ bsetpos(fb, (sh_off_t)cardpos[i] << CARD_POS_SHIFT);
+ uns buck_len = bgetl(fb) - (LIZARD_COMPRESS_HEADER - 1);
+ uns buck_type = bgetc(fb) + BUCKET_TYPE_PLAIN;
+ mp_flush(pool);
+ struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb, NULL);
+
+ printf("%2d. match: dist=%-8d oid=%-8d url=%s\n", i + 1, dist[i], best[i],
+ obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'));
+ }
+ bclose(fb);
+ buck2obj_free(bob);
+ mp_delete(pool);
+
+ return 0;
+}
+
--- /dev/null
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/fastbuf.h"
+#include "images/images.h"
+#include "sherlock/index.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+
+struct image_tree image_tree;
+
+void
+image_tree_init(void)
+{
+ DBG("Initializing image search structures");
+ struct fastbuf *fb = bopen("index/image-tree", O_RDONLY, 1 << 16); /* FIXME: filename hack */
+ image_tree.count = bgetl(fb);
+ image_tree.depth = bgetl(fb);
+ ASSERT(image_tree.count < 0x80000000 && image_tree.depth > 0 && image_tree.depth < 30);
+ image_tree.nodes = xmalloc((1 << image_tree.depth) * sizeof(struct image_node));
+ image_tree.leaves = xmalloc(image_tree.count * sizeof(struct image_leaf));
+ bread(fb, &image_tree.bbox, sizeof(struct image_bbox));
+ bread(fb, image_tree.nodes + 1, ((1 << image_tree.depth) - 1) * sizeof(struct image_node));
+ bread(fb, image_tree.leaves, image_tree.count * sizeof(struct image_leaf));
+ DBG("Search tree with depth %d and %d leaves loaded", image_tree.depth, image_tree.count);
+ bclose(fb);
+}
+
+void
+image_tree_done(void)
+{
+ DBG("Freeing image search structures");
+ xfree(image_tree.nodes);
+ xfree(image_tree.leaves);
+}
+
#include <alloca.h>
#include <stdio.h>
-#include <magick/api.h>
#define IMAGE_VEC_K 6
#define IMAGE_REG_K 9
byte *_s = (byte *) alloca(IMAGE_VEC_K * 6), *_p = _s + sprintf(_s, "%d", _v->f[0]); \
for (uns _i = 1; _i < IMAGE_VEC_K; _i++) _p += sprintf(_p, " %d", _v->f[_i]); _s; })
+extern struct image_tree image_tree;
+
+void image_tree_init(void);
+void image_tree_done(void);
+
void compute_image_signature_prepare(void);
void compute_image_signature_finish(void);
int compute_image_signature(void *data, uns len, struct image_signature *sig);