From a0d62822d4c0ff65522bf5a80a8a8610f32b2cfc Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Sun, 9 Apr 2006 17:33:51 +0200 Subject: [PATCH] Experimental version of search tree construction... --- images/image-idx.c | 220 +++++++++++++++++++++++++++++++++++++++++---- images/image-sig.c | 69 +++++++------- images/images.h | 67 ++++++++++---- 3 files changed, 289 insertions(+), 67 deletions(-) diff --git a/images/image-idx.c b/images/image-idx.c index a755d511..21024df3 100644 --- a/images/image-idx.c +++ b/images/image-idx.c @@ -31,25 +31,27 @@ static void generate_signatures(uns limit) { - struct fastbuf *cards = index_bopen("cards", O_RDONLY); - struct fastbuf *card_attrs = index_bopen("card-attrs", O_RDONLY); - struct fastbuf *signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC); + struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY); + struct fastbuf *fb_card_attrs = index_bopen("card-attrs", O_RDONLY); + struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC); struct card_attr ca; struct image_signature sig; struct mempool *pool = mp_new(1 << 16); struct buck2obj_buf *bob = buck2obj_alloc(); - oid_t oid = 0; + uns count = 0; - DBG("Generating signatures"); + log(L_INFO, "Generating image signatures"); + bputl(fb_signatures, 0); + compute_image_signature_prepare(); - for (; bread(card_attrs, &ca, sizeof(ca)); oid++) + for (oid_t oid = 0; bread(fb_card_attrs, &ca, sizeof(ca)); oid++) if ((uns)((ca.type_flags >> 4) - 8) < 4) { - bsetpos(cards, (sh_off_t)ca.card << CARD_POS_SHIFT); - uns buck_len = bgetl(cards)-(LIZARD_COMPRESS_HEADER-1); - uns buck_type = bgetc(cards) + BUCKET_TYPE_PLAIN; + bsetpos(fb_cards, (sh_off_t)ca.card << CARD_POS_SHIFT); + uns buck_len = bgetl(fb_cards) - (LIZARD_COMPRESS_HEADER - 1); + uns buck_type = bgetc(fb_cards) + BUCKET_TYPE_PLAIN; mp_flush(pool); - struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, cards, NULL); + struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb_cards, NULL); struct oattr *attr; if (!obj) die("Failed to read card"); @@ -72,9 +74,13 @@ generate_signatures(uns limit) int err = compute_image_signature(thumb, thumb_len, &sig); if (!err) { - bputl(signatures, oid); - bwrite(signatures, &sig, sizeof(sig)); - if (!--limit) + bwrite(fb_signatures, &oid, sizeof(oid)); + bwrite(fb_signatures, &sig.vec, sizeof(struct image_vector)); + bputc(fb_signatures, sig.len); + if (sig.len) + bwrite(fb_signatures, sig.reg, sig.len * sizeof(struct image_region)); + count++; + if (count >= limit) break; } else @@ -83,14 +89,195 @@ generate_signatures(uns limit) bb_done(&buf); } } + brewind(fb_signatures); + bputl(fb_signatures, count); + DBG("%d signatures written", count); + compute_image_signature_finish(); buck2obj_free(bob); mp_delete(pool); - bclose(cards); - bclose(card_attrs); - bclose(signatures); + bclose(fb_cards); + bclose(fb_card_attrs); + bclose(fb_signatures); } +struct signature_record { + oid_t oid; + struct image_vector vec; +}; + +#define ASORT_PREFIX(x) build_search_tree_##x +#define ASORT_KEY_TYPE struct signature_record * +#define ASORT_ELT(i) rec[i] +#define ASORT_LT(x,y) x->vec.f[dim] < y->vec.f[dim] +#define ASORT_EXTRA_ARGS , uns dim, struct signature_record **rec +#include "lib/arraysort.h" + +#if 0 +#define DBG_KD(x...) DBG(x) +#else +#define DBG_KD(x...) do{}while(0) +#endif + +static void +build_search_tree(void) +{ + log(L_INFO, "Building KD-tree"); + + struct fastbuf *fb_signatures = index_bopen("image-sig", O_RDONLY); + struct image_tree tree; + tree.count = bgetl(fb_signatures); + ASSERT(tree.count < 0x80000000); + if (!tree.count) + { + /* FIXME */ + bclose(fb_signatures); + die("There are no signatures"); + } + else + { + DBG("Reading %d signatures", tree.count); + struct signature_record *records = xmalloc(tree.count * sizeof(struct signature_record)); + struct signature_record **precords = xmalloc(tree.count * sizeof(void *)); + for (uns i = 0; i < tree.count; i++) + { + bread(fb_signatures, &records[i].oid, sizeof(oid_t)); + bread(fb_signatures, &records[i].vec, sizeof(struct image_vector)); + uns len = bgetc(fb_signatures); + bskip(fb_signatures, len * sizeof(struct image_region)); + precords[i] = records + i; + if (likely(i)) + for (uns j = 0; j < IMAGE_VEC_K; j++) + { + tree.bbox.vec[0].f[j] = MIN(tree.bbox.vec[0].f[j], records[i].vec.f[j]); + tree.bbox.vec[1].f[j] = MAX(tree.bbox.vec[1].f[j], records[i].vec.f[j]); + } + else + tree.bbox.vec[0] = tree.bbox.vec[1] = records[0].vec; + } + bclose(fb_signatures); + + for (tree.depth = 1; (uns)(2 << tree.depth) < tree.count; tree.depth++); + DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth, + stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1)); + uns leaves_index = 1 << (tree.depth - 1); + tree.nodes = xmalloc((1 << tree.depth) * sizeof(struct image_node)); + tree.leaves = xmalloc(tree.count * sizeof(struct image_leaf)); + + /* Initialize recursion */ + struct stk { + struct image_bbox bbox; + uns index, count; + struct signature_record **start; + } stk_top[32], *stk = stk_top + 1; + stk->index = 1; + stk->start = precords; + stk->count = tree.count; + stk->bbox.vec[0] = tree.bbox.vec[0]; + for (uns i = 0; i < IMAGE_VEC_K; i++) + stk->bbox.vec[1].f[i] = tree.bbox.vec[1].f[i] - tree.bbox.vec[0].f[i]; + uns entry_index = 0; + + /* Main loop */ + while (stk != stk_top) + { + DBG_KD("Main loop... depth=%d index=%d count=%d, start=%d, min=%s dif=%s", + stk - stk_top, stk->index, stk->count, stk->start - precords, + stk_print_image_vector(stk->bbox.vec + 0), stk_print_image_vector(stk->bbox.vec + 1)); + ASSERT(stk->count); + + /* Create leaf node */ + if (stk->index >= leaves_index || stk->count < 2) + { + tree.nodes[stk->index].val = IMAGE_NODE_LEAF | entry_index; + for (; stk->count--; stk->start++) + { + struct image_leaf *leaf = &tree.leaves[entry_index++]; + struct signature_record *record = *stk->start; + leaf->oid = record->oid; + leaf->flags = 0; + for (uns i = IMAGE_VEC_K; i--; ) + { + uns bits = IMAGE_LEAF_BITS(i); + leaf->flags <<= bits; + if (stk->bbox.vec[1].f[i]) + { + uns value = + (record->vec.f[i] - stk->bbox.vec[0].f[i]) * + ((1 << bits) - 1) / stk->bbox.vec[1].f[i]; + ASSERT(value < (1 << bits)); + leaf->flags |= value; + } + } + if (!stk->count) + leaf->flags |= IMAGE_LEAF_LAST; + DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x", + leaf->oid, stk_print_image_vector(&record->vec), leaf->flags); + } + stk--; + } + + /* Create internal node */ + else + { + /* Select dimension to splis */ + uns dim = 0; + for (uns i = 1; i < IMAGE_VEC_K; i++) + if (stk->bbox.vec[1].f[i] > stk->bbox.vec[1].f[dim]) + dim = i; + + /* Sort... FIXME: we only need the median */ + build_search_tree_sort(stk->count, dim, stk->start); + + /* Split in the middle */ + uns index = stk->index; + stk[1].index = stk[0].index * 2; + stk[0].index = stk[1].index + 1; + stk[1].count = stk[0].count >> 1; + stk[0].count -= stk[1].count; + stk[1].start = stk[0].start; + stk[0].start += stk[1].count; + + /* Choose split value */ + uns lval = stk->start[-1]->vec.f[dim]; + uns rval = stk->start[0]->vec.f[dim]; + uns pivot = stk->bbox.vec[0].f[dim] + (stk->bbox.vec[1].f[dim] >> 1); + if (pivot <= lval) + pivot = lval; + else if (pivot >= rval) + pivot = rval; + + DBG_KD("Created internal node; dim=%d pivot=%d", dim, pivot); + + /* Split the box */ + stk[1].bbox = stk[0].bbox; + stk[1].bbox.vec[1].f[dim] = pivot - stk[0].bbox.vec[0].f[dim]; + stk[0].bbox.vec[0].f[dim] += stk[1].bbox.vec[1].f[dim]; + stk[0].bbox.vec[1].f[dim] -= stk[1].bbox.vec[1].f[dim]; + + /* Fill the node structure */ + tree.nodes[index].val = dim + (pivot << 8); + stk++; + } + } + + DBG("Tree constructed, saving..."); + + struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC); + bputl(fb_tree, tree.count); + bputl(fb_tree, tree.depth); + bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node)); + bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf)); + bclose(fb_tree); + + xfree(tree.leaves); + xfree(tree.nodes); + xfree(precords); + xfree(records); + } +} + + static char *shortopts = CF_SHORT_OPTS ""; static struct option longopts[] = { @@ -132,6 +319,7 @@ main(int argc UNUSED, char **argv) usage("Invalid usage"); generate_signatures(~0U); + build_search_tree(); return 0; } diff --git a/images/image-sig.c b/images/image-sig.c index 43459341..c2482cc7 100644 --- a/images/image-sig.c +++ b/images/image-sig.c @@ -71,7 +71,7 @@ compute_image_area_signature(PixelPacket *pixels, uns width, uns height, struct /* Every 4x4 block (FIXME: deal with smaller blocks near the edges) */ PixelPacket *p = pixels; for (uns block_y = 0; block_y < h; block_y++, p += width & 3 + width * 3) - for (uns block_x = 0; block_x < w; block_x++, p += 4 - 4 * width, block++) + for (uns block_x = 0; block_x < w; block_x++, p -= 4 * width - 4, block++) { int t[16], s[16], *tp = t; @@ -159,54 +159,59 @@ compute_image_area_signature(PixelPacket *pixels, uns width, uns height, struct hh_sum += blocks[i].hh; } - sig->vec[0] = l_sum / blocks_count; - sig->vec[1] = u_sum / blocks_count; - sig->vec[2] = v_sum / blocks_count; - sig->vec[3] = lh_sum / blocks_count; - sig->vec[4] = hl_sum / blocks_count; - sig->vec[5] = hh_sum / blocks_count; + sig->vec.f[0] = l_sum / blocks_count; + sig->vec.f[1] = u_sum / blocks_count; + sig->vec.f[2] = v_sum / blocks_count; + sig->vec.f[3] = lh_sum / blocks_count; + sig->vec.f[4] = hl_sum / blocks_count; + sig->vec.f[5] = hh_sum / blocks_count; + + sig->len = 0; xfree(blocks); - DBG("Resulting signature is (%d, %d, %d, %d, %d, %d)", sig->vec[0], sig->vec[1], sig->vec[2], sig->vec[3], sig->vec[4], sig->vec[5]); + DBG("Resulting signature is (%s)", stk_print_image_vector(&sig->vec)); } -int -compute_image_signature(void *data, uns len, struct image_signature *sig) +static ExceptionInfo exception; +static QuantizeInfo quantize_info; +static ImageInfo *image_info; + +void +compute_image_signature_prepare(void) { - int retval = 0; - - InitializeMagick(NULL); /* FIXME: call only once */ - ExceptionInfo exception; + InitializeMagick(NULL); GetExceptionInfo(&exception); - ImageInfo *image_info = CloneImageInfo(NULL); + image_info = CloneImageInfo(NULL); image_info->subrange = 1; + GetQuantizeInfo(&quantize_info); + quantize_info.colorspace = RGBColorspace; +} + +void +compute_image_signature_finish(void) +{ + DestroyImageInfo(image_info); + DestroyExceptionInfo(&exception); + DestroyMagick(); +} - DBG("Decoding"); - Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... most of the time :-/ */ +int +compute_image_signature(void *data, uns len, struct image_signature *sig) +{ + Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... takes most of CPU time :-/ */ if (!image) die("Invalid image format"); if (image->columns < 4 || image->rows < 4) { DBG("Image too small (%dx%d)", (int)image->columns, (int)image->rows); - retval = -1; - goto exit; + DestroyImage(image); + return -1; } - - QuantizeInfo quantize_info; - GetQuantizeInfo(&quantize_info); - quantize_info.colorspace = RGBColorspace; - QuantizeImage(&quantize_info, image); - + QuantizeImage(&quantize_info, image); /* Also slow... and propably not necessary... */ PixelPacket *pixels = (PixelPacket *) AcquireImagePixels(image, 0, 0, image->columns, image->rows, &exception); - compute_image_area_signature(pixels, image->columns, image->rows, sig); - -exit: DestroyImage(image); - DestroyImageInfo(image_info); - DestroyExceptionInfo(&exception); - DestroyMagick(); - return retval; + return 0; } diff --git a/images/images.h b/images/images.h index 2399c4f4..49f10c5b 100644 --- a/images/images.h +++ b/images/images.h @@ -1,40 +1,69 @@ -#ifndef _IMAGES_H -#define _IMAGES_H +#ifndef _IMAGES_IMAGES_H +#define _IMAGES_IMAGES_H +#include #include #include -#define IMAGE_VEC_K 6 -#define IMAGE_VEC_LOG_K 3 +#define IMAGE_VEC_K 6 +#define IMAGE_REG_K 9 +#define IMAGE_REG_MAX 4 -typedef u32 image_vector[IMAGE_VEC_K]; -typedef u32 image_box[2][IMAGE_VEC_K]; +typedef u16 image_feature_t; /* 8 or 16 bits precision */ +/* K-dimensional feature vector */ +struct image_vector { + image_feature_t f[IMAGE_VEC_K]; +}; + +/* K-dimensional interval */ +struct image_bbox { + struct image_vector vec[2]; +}; + +/* Fetures for image regions */ +struct image_region { + image_feature_t f[IMAGE_REG_K]; +}; + +/* Image signature */ struct image_signature { - image_vector vec; + struct image_vector vec; /* Combination of all regions... simplier signature */ + image_feature_t len; /* Number of regions */ + struct image_region reg[IMAGE_REG_MAX];/* Feature vector for every region */ }; +/* Similarity search tree... will be changed */ struct image_tree { - uns count; - uns depth; - image_box box; - struct image_node *nodes; - struct image_entry *entries; + uns count; /* Number of images in the tree */ + uns depth; /* Tree depth */ + struct image_bbox bbox; /* Bounding box containing all the */ + struct image_node *nodes; /* Internal nodes */ + struct image_leaf *leaves; /* Leaves */ }; -#define IMAGE_NODE_LEAF 0x80000000 -#define IMAGE_NODE_DIM ((1 << IMAGE_VEC_LOG_K) - 1) - +/* Internal node in the search tree */ +#define IMAGE_NODE_LEAF 0x80000000 /* Node contains pointer to leaves array */ +#define IMAGE_NODE_DIM 0xff /* Split dimension */ struct image_node { - u32 value; + u32 val; }; -#define IMAGE_ENTRY_LAST (1 << (sizeof(oid_t) * 8 - 1)) - -struct image_entry { +/* Leaves in the search tree */ +#define IMAGE_LEAF_LAST 0x80000000 /* Last entry in the list */ +#define IMAGE_LEAF_BITS(i) (31 / IMAGE_VEC_K) /* Number of bits for relative position in i-th dimension */ +struct image_leaf { + u32 flags; /* Relative position in bbox and last node flag */ oid_t oid; }; +#define stk_print_image_vector(v) ({ struct image_vector *_v = v; \ + byte *_s = (byte *) alloca(IMAGE_VEC_K * 6), *_p = _s + sprintf(_s, "%d", _v->f[0]); \ + for (uns _i = 1; _i < IMAGE_VEC_K; _i++) _p += sprintf(_p, " %d", _v->f[_i]); _s; }) + +void compute_image_signature_prepare(void); +void compute_image_signature_finish(void); int compute_image_signature(void *data, uns len, struct image_signature *sig); #endif + -- 2.39.5