From cf1fb1b7fa95f06ff3f3d6017506d5ec4f53f470 Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Sun, 9 Apr 2006 23:03:39 +0200 Subject: [PATCH] Simple image search... under construction --- images/Makefile | 5 +- images/image-idx.c | 29 ++++++- images/image-search.h | 181 ++++++++++++++++++++++++++++++++++++++++++ images/image-sig.c | 2 + images/image-test.c | 85 ++++++++++++++++++++ images/images.c | 37 +++++++++ images/images.h | 6 +- 7 files changed, 338 insertions(+), 7 deletions(-) create mode 100644 images/image-search.h create mode 100644 images/image-test.c create mode 100644 images/images.c diff --git a/images/Makefile b/images/Makefile index 76a2d245..e19e8343 100644 --- a/images/Makefile +++ b/images/Makefile @@ -2,10 +2,11 @@ DIRS+=images -PROGS+=$(addprefix $(o)/images/,image-idx) +PROGS+=$(addprefix $(o)/images/,image-idx image-test) $(o)/images/image-sig.o $(o)/images/image-sig.oo: CFLAGS+=-I/usr/include/GraphicsMagick $(o)/images/image-idx.o $(o)/images/image-idx.oo: CFLAGS+=-I/usr/include/GraphicsMagick -$(o)/images/image-idx: $(o)/images/image-idx.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET) +$(o)/images/image-idx: $(o)/images/images.o $(o)/images/image-idx.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET) $(o)/images/image-idx: LIBS+=-lGraphicsMagick +$(o)/images/image-test: $(o)/images/images.o $(o)/images/image-test.o $(LIBSH) diff --git a/images/image-idx.c b/images/image-idx.c index 21024df3..074b60bc 100644 --- a/images/image-idx.c +++ b/images/image-idx.c @@ -28,7 +28,7 @@ #include /* This should happen in gatherer or scanner */ -static void +UNUSED static void generate_signatures(uns limit) { struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY); @@ -101,6 +101,25 @@ generate_signatures(uns limit) bclose(fb_signatures); } +UNUSED static void +generate_random_signatures(uns count) +{ + log(L_INFO, "Generating %d random signatures", count); + struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC); + bputl(fb_signatures, count); + for (uns i = 0; i < count; i++) + { + oid_t oid = i; + struct image_vector vec; + for (uns j = 0; j < IMAGE_VEC_K; j++) + vec.f[j] = random_max(256); + bwrite(fb_signatures, &oid, sizeof(oid)); + bwrite(fb_signatures, &vec, sizeof(vec)); + bputc(fb_signatures, 0); + } + bclose(fb_signatures); +} + struct signature_record { oid_t oid; struct image_vector vec; @@ -161,8 +180,8 @@ build_search_tree(void) DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth, stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1)); uns leaves_index = 1 << (tree.depth - 1); - tree.nodes = xmalloc((1 << tree.depth) * sizeof(struct image_node)); - tree.leaves = xmalloc(tree.count * sizeof(struct image_leaf)); + tree.nodes = xmalloc_zero((1 << tree.depth) * sizeof(struct image_node)); + tree.leaves = xmalloc_zero(tree.count * sizeof(struct image_leaf)); /* Initialize recursion */ struct stk { @@ -205,7 +224,7 @@ build_search_tree(void) uns value = (record->vec.f[i] - stk->bbox.vec[0].f[i]) * ((1 << bits) - 1) / stk->bbox.vec[1].f[i]; - ASSERT(value < (1 << bits)); + ASSERT(value < (uns)(1 << bits)); leaf->flags |= value; } } @@ -266,6 +285,7 @@ build_search_tree(void) struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC); bputl(fb_tree, tree.count); bputl(fb_tree, tree.depth); + bwrite(fb_tree, &tree.bbox, sizeof(struct image_bbox)); bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node)); bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf)); bclose(fb_tree); @@ -319,6 +339,7 @@ main(int argc UNUSED, char **argv) usage("Invalid usage"); generate_signatures(~0U); + //generate_random_signatures(1000000); build_search_tree(); return 0; diff --git a/images/image-search.h b/images/image-search.h new file mode 100644 index 00000000..25d34171 --- /dev/null +++ b/images/image-search.h @@ -0,0 +1,181 @@ +#include "lib/heap.h" + +#define IMAGE_SEARCH_DIST_UNLIMITED (~0U) + +/* FIXME: support full length of oid_t, currently must be <2^31 */ +#define IMAGE_SEARCH_ITEM_TYPE 0x80000000U +struct image_search_item { + u32 dist; + u32 index; + struct image_bbox bbox; +}; + +#define IMAGE_SEARCH_CMP(x,y) (is->buf[x].dist < is->buf[y].dist) + +struct image_search { + struct image_tree *tree; + struct image_node *nodes; + struct image_leaf *leaves; + struct image_vector query; + struct image_search_item *buf; + u32 *heap; + uns count, visited, size, max_dist; +}; + +#define SQR(x) ((x)*(x)) + +static void +image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist) +{ + // FIXME: empty tree + is->tree = tree; + is->nodes = tree->nodes; + is->leaves = tree->leaves; + is->query = *query; + is->max_dist = max_dist; + is->size = 0x1000; + is->buf = xmalloc((is->size + 1) * sizeof(struct image_search_item)); + is->heap = xmalloc((is->size + 1) * sizeof(u32)); + is->visited = is->count = 1; + is->heap[1] = 1; + struct image_search_item *item = is->buf + 1; + item->index = 1; + item->bbox = tree->bbox; + item->dist = 0; + for (uns i = 0; i < IMAGE_VEC_K; i++) + { + if (query->f[i] < item->bbox.vec[0].f[i]) + item->dist += SQR(item->bbox.vec[0].f[i] - query->f[i]); + else if (query->f[i] > item->bbox.vec[1].f[i]) + item->dist += SQR(query->f[i] - item->bbox.vec[0].f[i]); + else + { + item->dist = 0; + break; + } + } +} + +static void +image_search_done(struct image_search *is) +{ + xfree(is->buf); + xfree(is->heap); +} + +static void +image_search_grow_slow(struct image_search *is) +{ + is->size *= 2; + is->buf = xrealloc(is->buf, (is->size + 1) * sizeof(struct image_search_item)); + is->heap = xrealloc(is->heap, (is->size + 1) * sizeof(u32)); +} + +static inline struct image_search_item * +image_search_grow(struct image_search *is) +{ + if (is->count == is->visited) + { + if (is->count == is->size) + image_search_grow_slow(is); + is->visited++; + is->heap[is->visited] = is->visited; + } + return is->buf + is->heap[++is->count]; +} + +static inline uns +image_search_leaf_dist(struct image_search *is, struct image_bbox *bbox, struct image_leaf *leaf) +{ + uns dist = 0; + uns flags = leaf->flags; + for (uns i = 0; i < IMAGE_VEC_K; i++) + { + uns bits = IMAGE_LEAF_BITS(i); + uns mask = (1 << bits) - 1; + uns value = flags & mask; + flags >>= bits; + int dif = bbox->vec[0].f[i] + (bbox->vec[1].f[i] - bbox->vec[0].f[i]) * value / ((1 << bits) - 1) - is->query.f[i]; + dist += dif * dif; + } + return dist; +} + +static int +image_search_next(struct image_search *is, oid_t *oid, uns *dist) +{ + while (likely(is->count)) + { + struct image_search_item *item = is->buf + is->heap[1]; + DBG("Main loop... dist=%d count=%d visited=%d size=%d index=0x%08x bbox=[(%s),(%s)]", + item->dist, is->count, is->visited, is->size, item->index, + stk_print_image_vector(&item->bbox.vec[0]), stk_print_image_vector(&item->bbox.vec[1])); + if (unlikely(item->dist > is->max_dist)) + { + DBG("Maximum distance reached"); + return 0; + } + + /* Expand leaf */ + if (item->index & IMAGE_SEARCH_ITEM_TYPE) + { + *oid = item->index & ~IMAGE_SEARCH_ITEM_TYPE; + *dist = item->dist; + DBG("Found item %d at distance %d", *oid, *dist); + HEAP_DELMIN(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP); + return 1; + } + + /* Expand node with leaves */ + else if (is->nodes[item->index].val & IMAGE_NODE_LEAF) + { + DBG("Expanding node to list of leaves"); + struct image_leaf *leaf = is->leaves + (is->nodes[item->index].val & ~IMAGE_NODE_LEAF); + item->dist = image_search_leaf_dist(is, &item->bbox, leaf); + item->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid; + HEAP_INCREASE(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP, 1); + while (!((leaf++)->flags & IMAGE_LEAF_LAST)) + { + struct image_search_item *nitem = image_search_grow(is); + nitem->dist = image_search_leaf_dist(is, &item->bbox, leaf); + nitem->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid; + HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP); + } + } + + /* Expand internal node */ + else + { + DBG("Expanding internal node"); + struct image_search_item *nitem = image_search_grow(is); + uns dim = is->nodes[item->index].val & IMAGE_NODE_DIM; + uns pivot = is->nodes[item->index].val >> 8; + item->index *= 2; + nitem->bbox = item->bbox; + nitem->dist = item->dist; + uns query = is->query.f[dim]; + int dif = query - pivot; + if (dif > 0) + { + nitem->index = item->index++; + item->bbox.vec[0].f[dim] = pivot; + nitem->bbox.vec[1].f[dim] = pivot; + if (query > item->bbox.vec[1].f[dim]) + nitem->dist -= SQR(query - item->bbox.vec[1].f[dim]); + } + else + { + nitem->index = item->index + 1; + item->bbox.vec[1].f[dim] = pivot; + nitem->bbox.vec[0].f[dim] = pivot; + if (query < item->bbox.vec[0].f[dim]) + nitem->dist -= SQR(item->bbox.vec[0].f[dim] - query); + } + nitem->dist += SQR(dif); + HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP); + } + } + DBG("Heap is empty"); + return 0; +} + diff --git a/images/image-sig.c b/images/image-sig.c index c2482cc7..ebce5197 100644 --- a/images/image-sig.c +++ b/images/image-sig.c @@ -5,6 +5,8 @@ #include "lib/fastbuf.h" #include "images/images.h" +#include + /* * Color spaces * diff --git a/images/image-test.c b/images/image-test.c new file mode 100644 index 00000000..6ea0e236 --- /dev/null +++ b/images/image-test.c @@ -0,0 +1,85 @@ +#define LOCAL_DEBUG + +#include "sherlock/sherlock.h" +#include "lib/fastbuf.h" +#include "images/images.h" +#include "images/image-search.h" +#include "sherlock/index.h" +#include "lib/mempool.h" +#include "sherlock/object.h" +#include "sherlock/lizard-fb.h" +#include "sherlock/lizard-fb.h" +#include +#include + +#define BEST_CNT 20 + +int +main(int argc, char **argv) +{ + struct image_vector query; + if (argc != IMAGE_VEC_K + 1) + die("Invalid number of arguments"); + + for (uns i = 0; i < IMAGE_VEC_K; i++) + { + uns v; + if (sscanf(argv[i + 1], "%d", &v) != 1) + die("Invalid numeric format"); + query.f[i] = v; + } + + + struct image_search is; + oid_t best[BEST_CNT]; + uns dist[BEST_CNT]; + uns cardpos[BEST_CNT]; + uns best_n = 0; + + image_tree_init(); + log(L_INFO, "Executing query (%s)", stk_print_image_vector(&query)); + image_search_init(&is, &image_tree, &query, IMAGE_SEARCH_DIST_UNLIMITED); + for (uns i = 0; i < BEST_CNT; i++) + { + if (!image_search_next(&is, best + i, dist + i)) + { + log(L_INFO, "No more images"); + break; + } + log(L_INFO, "*** Found %d. best image with oid=%d", i + 1, best[i]); + best_n++; + } + image_search_done(&is); + image_tree_done(); + + log(L_INFO, "Resolving URLs"); + struct mempool *pool = mp_new(1 << 16); + struct buck2obj_buf *bob = buck2obj_alloc(); + struct fastbuf *fb = bopen("index/card-attrs", O_RDONLY, 1 << 10); + for (uns i = 0; i < best_n; i++) + { + bsetpos(fb, best[i] * sizeof(struct card_attr)); + struct card_attr ca; + bread(fb, &ca, sizeof(ca)); + cardpos[i] = ca.card; + } + bclose(fb); + fb = bopen("index/cards", O_RDONLY, 1 << 14); + for (uns i = 0; i < best_n; i++) + { + bsetpos(fb, (sh_off_t)cardpos[i] << CARD_POS_SHIFT); + uns buck_len = bgetl(fb) - (LIZARD_COMPRESS_HEADER - 1); + uns buck_type = bgetc(fb) + BUCKET_TYPE_PLAIN; + mp_flush(pool); + struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb, NULL); + + printf("%2d. match: dist=%-8d oid=%-8d url=%s\n", i + 1, dist[i], best[i], + obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U')); + } + bclose(fb); + buck2obj_free(bob); + mp_delete(pool); + + return 0; +} + diff --git a/images/images.c b/images/images.c new file mode 100644 index 00000000..28178c56 --- /dev/null +++ b/images/images.c @@ -0,0 +1,37 @@ +#define LOCAL_DEBUG + +#include "sherlock/sherlock.h" +#include "lib/fastbuf.h" +#include "images/images.h" +#include "sherlock/index.h" + +#include +#include + +struct image_tree image_tree; + +void +image_tree_init(void) +{ + DBG("Initializing image search structures"); + struct fastbuf *fb = bopen("index/image-tree", O_RDONLY, 1 << 16); /* FIXME: filename hack */ + image_tree.count = bgetl(fb); + image_tree.depth = bgetl(fb); + ASSERT(image_tree.count < 0x80000000 && image_tree.depth > 0 && image_tree.depth < 30); + image_tree.nodes = xmalloc((1 << image_tree.depth) * sizeof(struct image_node)); + image_tree.leaves = xmalloc(image_tree.count * sizeof(struct image_leaf)); + bread(fb, &image_tree.bbox, sizeof(struct image_bbox)); + bread(fb, image_tree.nodes + 1, ((1 << image_tree.depth) - 1) * sizeof(struct image_node)); + bread(fb, image_tree.leaves, image_tree.count * sizeof(struct image_leaf)); + DBG("Search tree with depth %d and %d leaves loaded", image_tree.depth, image_tree.count); + bclose(fb); +} + +void +image_tree_done(void) +{ + DBG("Freeing image search structures"); + xfree(image_tree.nodes); + xfree(image_tree.leaves); +} + diff --git a/images/images.h b/images/images.h index 49f10c5b..763006d9 100644 --- a/images/images.h +++ b/images/images.h @@ -3,7 +3,6 @@ #include #include -#include #define IMAGE_VEC_K 6 #define IMAGE_REG_K 9 @@ -61,6 +60,11 @@ struct image_leaf { byte *_s = (byte *) alloca(IMAGE_VEC_K * 6), *_p = _s + sprintf(_s, "%d", _v->f[0]); \ for (uns _i = 1; _i < IMAGE_VEC_K; _i++) _p += sprintf(_p, " %d", _v->f[_i]); _s; }) +extern struct image_tree image_tree; + +void image_tree_init(void); +void image_tree_done(void); + void compute_image_signature_prepare(void); void compute_image_signature_finish(void); int compute_image_signature(void *data, uns len, struct image_signature *sig); -- 2.39.2