static void
generate_signatures(uns limit)
{
- struct fastbuf *cards = index_bopen("cards", O_RDONLY);
- struct fastbuf *card_attrs = index_bopen("card-attrs", O_RDONLY);
- struct fastbuf *signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
+ struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY);
+ struct fastbuf *fb_card_attrs = index_bopen("card-attrs", O_RDONLY);
+ struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
struct card_attr ca;
struct image_signature sig;
struct mempool *pool = mp_new(1 << 16);
struct buck2obj_buf *bob = buck2obj_alloc();
- oid_t oid = 0;
+ uns count = 0;
- DBG("Generating signatures");
+ log(L_INFO, "Generating image signatures");
+ bputl(fb_signatures, 0);
+ compute_image_signature_prepare();
- for (; bread(card_attrs, &ca, sizeof(ca)); oid++)
+ for (oid_t oid = 0; bread(fb_card_attrs, &ca, sizeof(ca)); oid++)
if ((uns)((ca.type_flags >> 4) - 8) < 4)
{
- bsetpos(cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
- uns buck_len = bgetl(cards)-(LIZARD_COMPRESS_HEADER-1);
- uns buck_type = bgetc(cards) + BUCKET_TYPE_PLAIN;
+ bsetpos(fb_cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
+ uns buck_len = bgetl(fb_cards) - (LIZARD_COMPRESS_HEADER - 1);
+ uns buck_type = bgetc(fb_cards) + BUCKET_TYPE_PLAIN;
mp_flush(pool);
- struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, cards, NULL);
+ struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb_cards, NULL);
struct oattr *attr;
if (!obj)
die("Failed to read card");
int err = compute_image_signature(thumb, thumb_len, &sig);
if (!err)
{
- bputl(signatures, oid);
- bwrite(signatures, &sig, sizeof(sig));
- if (!--limit)
+ bwrite(fb_signatures, &oid, sizeof(oid));
+ bwrite(fb_signatures, &sig.vec, sizeof(struct image_vector));
+ bputc(fb_signatures, sig.len);
+ if (sig.len)
+ bwrite(fb_signatures, sig.reg, sig.len * sizeof(struct image_region));
+ count++;
+ if (count >= limit)
break;
}
else
bb_done(&buf);
}
}
+ brewind(fb_signatures);
+ bputl(fb_signatures, count);
+ DBG("%d signatures written", count);
+ compute_image_signature_finish();
buck2obj_free(bob);
mp_delete(pool);
- bclose(cards);
- bclose(card_attrs);
- bclose(signatures);
+ bclose(fb_cards);
+ bclose(fb_card_attrs);
+ bclose(fb_signatures);
}
+struct signature_record {
+ oid_t oid;
+ struct image_vector vec;
+};
+
+#define ASORT_PREFIX(x) build_search_tree_##x
+#define ASORT_KEY_TYPE struct signature_record *
+#define ASORT_ELT(i) rec[i]
+#define ASORT_LT(x,y) x->vec.f[dim] < y->vec.f[dim]
+#define ASORT_EXTRA_ARGS , uns dim, struct signature_record **rec
+#include "lib/arraysort.h"
+
+#if 0
+#define DBG_KD(x...) DBG(x)
+#else
+#define DBG_KD(x...) do{}while(0)
+#endif
+
+static void
+build_search_tree(void)
+{
+ log(L_INFO, "Building KD-tree");
+
+ struct fastbuf *fb_signatures = index_bopen("image-sig", O_RDONLY);
+ struct image_tree tree;
+ tree.count = bgetl(fb_signatures);
+ ASSERT(tree.count < 0x80000000);
+ if (!tree.count)
+ {
+ /* FIXME */
+ bclose(fb_signatures);
+ die("There are no signatures");
+ }
+ else
+ {
+ DBG("Reading %d signatures", tree.count);
+ struct signature_record *records = xmalloc(tree.count * sizeof(struct signature_record));
+ struct signature_record **precords = xmalloc(tree.count * sizeof(void *));
+ for (uns i = 0; i < tree.count; i++)
+ {
+ bread(fb_signatures, &records[i].oid, sizeof(oid_t));
+ bread(fb_signatures, &records[i].vec, sizeof(struct image_vector));
+ uns len = bgetc(fb_signatures);
+ bskip(fb_signatures, len * sizeof(struct image_region));
+ precords[i] = records + i;
+ if (likely(i))
+ for (uns j = 0; j < IMAGE_VEC_K; j++)
+ {
+ tree.bbox.vec[0].f[j] = MIN(tree.bbox.vec[0].f[j], records[i].vec.f[j]);
+ tree.bbox.vec[1].f[j] = MAX(tree.bbox.vec[1].f[j], records[i].vec.f[j]);
+ }
+ else
+ tree.bbox.vec[0] = tree.bbox.vec[1] = records[0].vec;
+ }
+ bclose(fb_signatures);
+
+ for (tree.depth = 1; (uns)(2 << tree.depth) < tree.count; tree.depth++);
+ DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth,
+ stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1));
+ uns leaves_index = 1 << (tree.depth - 1);
+ tree.nodes = xmalloc((1 << tree.depth) * sizeof(struct image_node));
+ tree.leaves = xmalloc(tree.count * sizeof(struct image_leaf));
+
+ /* Initialize recursion */
+ struct stk {
+ struct image_bbox bbox;
+ uns index, count;
+ struct signature_record **start;
+ } stk_top[32], *stk = stk_top + 1;
+ stk->index = 1;
+ stk->start = precords;
+ stk->count = tree.count;
+ stk->bbox.vec[0] = tree.bbox.vec[0];
+ for (uns i = 0; i < IMAGE_VEC_K; i++)
+ stk->bbox.vec[1].f[i] = tree.bbox.vec[1].f[i] - tree.bbox.vec[0].f[i];
+ uns entry_index = 0;
+
+ /* Main loop */
+ while (stk != stk_top)
+ {
+ DBG_KD("Main loop... depth=%d index=%d count=%d, start=%d, min=%s dif=%s",
+ stk - stk_top, stk->index, stk->count, stk->start - precords,
+ stk_print_image_vector(stk->bbox.vec + 0), stk_print_image_vector(stk->bbox.vec + 1));
+ ASSERT(stk->count);
+
+ /* Create leaf node */
+ if (stk->index >= leaves_index || stk->count < 2)
+ {
+ tree.nodes[stk->index].val = IMAGE_NODE_LEAF | entry_index;
+ for (; stk->count--; stk->start++)
+ {
+ struct image_leaf *leaf = &tree.leaves[entry_index++];
+ struct signature_record *record = *stk->start;
+ leaf->oid = record->oid;
+ leaf->flags = 0;
+ for (uns i = IMAGE_VEC_K; i--; )
+ {
+ uns bits = IMAGE_LEAF_BITS(i);
+ leaf->flags <<= bits;
+ if (stk->bbox.vec[1].f[i])
+ {
+ uns value =
+ (record->vec.f[i] - stk->bbox.vec[0].f[i]) *
+ ((1 << bits) - 1) / stk->bbox.vec[1].f[i];
+ ASSERT(value < (1 << bits));
+ leaf->flags |= value;
+ }
+ }
+ if (!stk->count)
+ leaf->flags |= IMAGE_LEAF_LAST;
+ DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x",
+ leaf->oid, stk_print_image_vector(&record->vec), leaf->flags);
+ }
+ stk--;
+ }
+
+ /* Create internal node */
+ else
+ {
+ /* Select dimension to splis */
+ uns dim = 0;
+ for (uns i = 1; i < IMAGE_VEC_K; i++)
+ if (stk->bbox.vec[1].f[i] > stk->bbox.vec[1].f[dim])
+ dim = i;
+
+ /* Sort... FIXME: we only need the median */
+ build_search_tree_sort(stk->count, dim, stk->start);
+
+ /* Split in the middle */
+ uns index = stk->index;
+ stk[1].index = stk[0].index * 2;
+ stk[0].index = stk[1].index + 1;
+ stk[1].count = stk[0].count >> 1;
+ stk[0].count -= stk[1].count;
+ stk[1].start = stk[0].start;
+ stk[0].start += stk[1].count;
+
+ /* Choose split value */
+ uns lval = stk->start[-1]->vec.f[dim];
+ uns rval = stk->start[0]->vec.f[dim];
+ uns pivot = stk->bbox.vec[0].f[dim] + (stk->bbox.vec[1].f[dim] >> 1);
+ if (pivot <= lval)
+ pivot = lval;
+ else if (pivot >= rval)
+ pivot = rval;
+
+ DBG_KD("Created internal node; dim=%d pivot=%d", dim, pivot);
+
+ /* Split the box */
+ stk[1].bbox = stk[0].bbox;
+ stk[1].bbox.vec[1].f[dim] = pivot - stk[0].bbox.vec[0].f[dim];
+ stk[0].bbox.vec[0].f[dim] += stk[1].bbox.vec[1].f[dim];
+ stk[0].bbox.vec[1].f[dim] -= stk[1].bbox.vec[1].f[dim];
+
+ /* Fill the node structure */
+ tree.nodes[index].val = dim + (pivot << 8);
+ stk++;
+ }
+ }
+
+ DBG("Tree constructed, saving...");
+
+ struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC);
+ bputl(fb_tree, tree.count);
+ bputl(fb_tree, tree.depth);
+ bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node));
+ bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf));
+ bclose(fb_tree);
+
+ xfree(tree.leaves);
+ xfree(tree.nodes);
+ xfree(precords);
+ xfree(records);
+ }
+}
+
+
static char *shortopts = CF_SHORT_OPTS "";
static struct option longopts[] =
{
usage("Invalid usage");
generate_signatures(~0U);
+ build_search_tree();
return 0;
}
/* Every 4x4 block (FIXME: deal with smaller blocks near the edges) */
PixelPacket *p = pixels;
for (uns block_y = 0; block_y < h; block_y++, p += width & 3 + width * 3)
- for (uns block_x = 0; block_x < w; block_x++, p += 4 - 4 * width, block++)
+ for (uns block_x = 0; block_x < w; block_x++, p -= 4 * width - 4, block++)
{
int t[16], s[16], *tp = t;
hh_sum += blocks[i].hh;
}
- sig->vec[0] = l_sum / blocks_count;
- sig->vec[1] = u_sum / blocks_count;
- sig->vec[2] = v_sum / blocks_count;
- sig->vec[3] = lh_sum / blocks_count;
- sig->vec[4] = hl_sum / blocks_count;
- sig->vec[5] = hh_sum / blocks_count;
+ sig->vec.f[0] = l_sum / blocks_count;
+ sig->vec.f[1] = u_sum / blocks_count;
+ sig->vec.f[2] = v_sum / blocks_count;
+ sig->vec.f[3] = lh_sum / blocks_count;
+ sig->vec.f[4] = hl_sum / blocks_count;
+ sig->vec.f[5] = hh_sum / blocks_count;
+
+ sig->len = 0;
xfree(blocks);
- DBG("Resulting signature is (%d, %d, %d, %d, %d, %d)", sig->vec[0], sig->vec[1], sig->vec[2], sig->vec[3], sig->vec[4], sig->vec[5]);
+ DBG("Resulting signature is (%s)", stk_print_image_vector(&sig->vec));
}
-int
-compute_image_signature(void *data, uns len, struct image_signature *sig)
+static ExceptionInfo exception;
+static QuantizeInfo quantize_info;
+static ImageInfo *image_info;
+
+void
+compute_image_signature_prepare(void)
{
- int retval = 0;
-
- InitializeMagick(NULL); /* FIXME: call only once */
- ExceptionInfo exception;
+ InitializeMagick(NULL);
GetExceptionInfo(&exception);
- ImageInfo *image_info = CloneImageInfo(NULL);
+ image_info = CloneImageInfo(NULL);
image_info->subrange = 1;
+ GetQuantizeInfo(&quantize_info);
+ quantize_info.colorspace = RGBColorspace;
+}
+
+void
+compute_image_signature_finish(void)
+{
+ DestroyImageInfo(image_info);
+ DestroyExceptionInfo(&exception);
+ DestroyMagick();
+}
- DBG("Decoding");
- Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... most of the time :-/ */
+int
+compute_image_signature(void *data, uns len, struct image_signature *sig)
+{
+ Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... takes most of CPU time :-/ */
if (!image)
die("Invalid image format");
if (image->columns < 4 || image->rows < 4)
{
DBG("Image too small (%dx%d)", (int)image->columns, (int)image->rows);
- retval = -1;
- goto exit;
+ DestroyImage(image);
+ return -1;
}
-
- QuantizeInfo quantize_info;
- GetQuantizeInfo(&quantize_info);
- quantize_info.colorspace = RGBColorspace;
- QuantizeImage(&quantize_info, image);
-
+ QuantizeImage(&quantize_info, image); /* Also slow... and propably not necessary... */
PixelPacket *pixels = (PixelPacket *) AcquireImagePixels(image, 0, 0, image->columns, image->rows, &exception);
-
compute_image_area_signature(pixels, image->columns, image->rows, sig);
-
-exit:
DestroyImage(image);
- DestroyImageInfo(image_info);
- DestroyExceptionInfo(&exception);
- DestroyMagick();
- return retval;
+ return 0;
}
-#ifndef _IMAGES_H
-#define _IMAGES_H
+#ifndef _IMAGES_IMAGES_H
+#define _IMAGES_IMAGES_H
+#include <alloca.h>
#include <stdio.h>
#include <magick/api.h>
-#define IMAGE_VEC_K 6
-#define IMAGE_VEC_LOG_K 3
+#define IMAGE_VEC_K 6
+#define IMAGE_REG_K 9
+#define IMAGE_REG_MAX 4
-typedef u32 image_vector[IMAGE_VEC_K];
-typedef u32 image_box[2][IMAGE_VEC_K];
+typedef u16 image_feature_t; /* 8 or 16 bits precision */
+/* K-dimensional feature vector */
+struct image_vector {
+ image_feature_t f[IMAGE_VEC_K];
+};
+
+/* K-dimensional interval */
+struct image_bbox {
+ struct image_vector vec[2];
+};
+
+/* Fetures for image regions */
+struct image_region {
+ image_feature_t f[IMAGE_REG_K];
+};
+
+/* Image signature */
struct image_signature {
- image_vector vec;
+ struct image_vector vec; /* Combination of all regions... simplier signature */
+ image_feature_t len; /* Number of regions */
+ struct image_region reg[IMAGE_REG_MAX];/* Feature vector for every region */
};
+/* Similarity search tree... will be changed */
struct image_tree {
- uns count;
- uns depth;
- image_box box;
- struct image_node *nodes;
- struct image_entry *entries;
+ uns count; /* Number of images in the tree */
+ uns depth; /* Tree depth */
+ struct image_bbox bbox; /* Bounding box containing all the */
+ struct image_node *nodes; /* Internal nodes */
+ struct image_leaf *leaves; /* Leaves */
};
-#define IMAGE_NODE_LEAF 0x80000000
-#define IMAGE_NODE_DIM ((1 << IMAGE_VEC_LOG_K) - 1)
-
+/* Internal node in the search tree */
+#define IMAGE_NODE_LEAF 0x80000000 /* Node contains pointer to leaves array */
+#define IMAGE_NODE_DIM 0xff /* Split dimension */
struct image_node {
- u32 value;
+ u32 val;
};
-#define IMAGE_ENTRY_LAST (1 << (sizeof(oid_t) * 8 - 1))
-
-struct image_entry {
+/* Leaves in the search tree */
+#define IMAGE_LEAF_LAST 0x80000000 /* Last entry in the list */
+#define IMAGE_LEAF_BITS(i) (31 / IMAGE_VEC_K) /* Number of bits for relative position in i-th dimension */
+struct image_leaf {
+ u32 flags; /* Relative position in bbox and last node flag */
oid_t oid;
};
+#define stk_print_image_vector(v) ({ struct image_vector *_v = v; \
+ byte *_s = (byte *) alloca(IMAGE_VEC_K * 6), *_p = _s + sprintf(_s, "%d", _v->f[0]); \
+ for (uns _i = 1; _i < IMAGE_VEC_K; _i++) _p += sprintf(_p, " %d", _v->f[_i]); _s; })
+
+void compute_image_signature_prepare(void);
+void compute_image_signature_finish(void);
int compute_image_signature(void *data, uns len, struct image_signature *sig);
#endif
+