From 6b5e227f5965a1d63cef2e1391cbebb25fa2bdd7 Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Tue, 2 May 2006 08:56:56 +0200 Subject: [PATCH] - parts of duplicates search, still very slow and full of insects - signature vector converted from u16 to byte - several improvements and fixes... and new bugs of course :-) --- images/Makefile | 9 +- images/color.h | 3 + images/{image-dup.c => dup-cmp.c} | 13 +- images/{image-dup.h => dup-cmp.h} | 5 +- images/image-idx.c | 358 ++++++++++++++++++++++++--- images/image-obj.c | 8 +- images/image-sig.c | 75 +----- images/image-sig.h | 2 +- images/image-test.c | 3 +- images/images.h | 25 ++ images/{image-search.h => kd-tree.c} | 36 +-- images/kd-tree.h | 28 +++ 12 files changed, 433 insertions(+), 132 deletions(-) rename images/{image-dup.c => dup-cmp.c} (96%) rename images/{image-dup.h => dup-cmp.h} (86%) rename images/{image-search.h => kd-tree.c} (90%) create mode 100644 images/kd-tree.h diff --git a/images/Makefile b/images/Makefile index 71e4c79b..b35ab2ec 100644 --- a/images/Makefile +++ b/images/Makefile @@ -7,10 +7,15 @@ PROGS+=$(addprefix $(o)/images/,image-idx image-test decomp) $(o)/images/image-sig.o $(o)/images/image-sig.oo: CFLAGS+=-I/usr/include/GraphicsMagick $(o)/images/image-idx.o $(o)/images/image-idx.oo: CFLAGS+=-I/usr/include/GraphicsMagick $(o)/images/image-obj.o $(o)/images/image-obj.oo: CFLAGS+=-I/usr/include/GraphicsMagick -$(o)/images/image-idx: $(o)/images/image-idx.o $(o)/images/image-obj.o $(o)/images/image-dup.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET) +$(o)/images/image-idx: $(o)/images/image-idx.o $(o)/images/image-obj.o $(o)/images/dup-cmp.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(o)/images/kd-tree.o $(o)/images/color.o $(LIBSH) $(LIBLANG) $(LIBCHARSET) $(o)/images/image-idx: LIBS+=-lGraphicsMagick -ljpeg -lpng -$(o)/images/image-test: $(o)/images/image-test.o $(LIBSH) +$(o)/images/image-test: $(o)/images/image-test.o $(o)/images/kd-tree.o $(LIBSH) +$(o)/images/color-t: LIBS+=-lm + +TESTS+=$(addprefix $(o)/images/,color.test) + +$(o)/images/color.test: $(o)/images/color-t # By :;DF $(o)/images/block_info.o $(o)/images/block_info.oo: CFLAGS+=-I/usr/include/GraphicsMagick diff --git a/images/color.h b/images/color.h index 28daa822..59acd32d 100644 --- a/images/color.h +++ b/images/color.h @@ -15,6 +15,9 @@ * - SIMD should help to speed up conversion of large arrays * - maybe try to generate a long switch in color_conv_pixel() * with optimized entries instead of access to interpolation table + * - most of multiplications in srgb_to_luv_pixels can be replaced + * with tables lookup... tests shows almost the speed for random + * input and cca 40% gain when input colors fit in CPU chache */ #ifndef _IMAGES_COLOR_H diff --git a/images/image-dup.c b/images/dup-cmp.c similarity index 96% rename from images/image-dup.c rename to images/dup-cmp.c index 688ee50b..6a249ec3 100644 --- a/images/image-dup.c +++ b/images/dup-cmp.c @@ -6,6 +6,7 @@ * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. * + * * FIXME: * - many possible optimization * - compare normalized pictures (brightness, ...) @@ -21,11 +22,11 @@ #include "sherlock/sherlock.h" #include "lib/mempool.h" #include "images/images.h" -#include "images/image-dup.h" +#include "images/dup-cmp.h" -static uns image_dup_scale_min_size = 8; +static uns image_dup_scale_min_size = 16; static uns image_dup_ratio_threshold = 140; -static uns image_dup_error_threshold = 2000; +static uns image_dup_error_threshold = 50; static inline byte * image_dup_block(struct image_dup *dup, uns col, uns row) @@ -50,9 +51,9 @@ image_dup_init(struct image_dup *dup, struct image *image, struct mempool *pool) dup->image = image; dup->width = image->width; dup->height = image->height; - for (dup->cols = 0; (uns)(1 << dup->cols) < image->width; dup->cols++); - for (dup->rows = 0; (uns)(1 << dup->rows) < image->height; dup->rows++); - dup->buf = mp_alloc(pool, 12 << (dup->cols + dup->rows)); + for (dup->cols = 0; (uns)(2 << dup->cols) < image->width; dup->cols++); + for (dup->rows = 0; (uns)(2 << dup->rows) < image->height; dup->rows++); + dup->buf = mp_alloc(pool, dup->buf_size = (12 << (dup->cols + dup->rows))); dup->line = 6 << dup->cols; dup->flags = 0; if (image->width >= image_dup_scale_min_size && image->height >= image_dup_scale_min_size) diff --git a/images/image-dup.h b/images/dup-cmp.h similarity index 86% rename from images/image-dup.h rename to images/dup-cmp.h index 524db4e1..d730f0e6 100644 --- a/images/image-dup.h +++ b/images/dup-cmp.h @@ -1,11 +1,12 @@ -#ifndef _IMAGES_IMAGE_DUP_H -#define _IMAGES_IMAGE_DUP_H +#ifndef _IMAGES_DUP_CMP_H +#define _IMAGES_DUP_CMP_H struct image; struct image_dup { struct image *image; byte *buf; + uns buf_size; uns flags; uns cols; uns rows; diff --git a/images/image-idx.c b/images/image-idx.c index 8868dfe3..d98947c0 100644 --- a/images/image-idx.c +++ b/images/image-idx.c @@ -1,4 +1,4 @@ -#define LOCAL_DEBUG +#undef LOCAL_DEBUG #include "sherlock/sherlock.h" #include "lib/mempool.h" @@ -20,22 +20,29 @@ #include "lang/lang.h" #include "lib/base224.h" #include "lib/bbuf.h" +#include "lib/clists.h" #include "images/images.h" #include "images/image-obj.h" #include "images/image-sig.h" -#include "images/image-dup.h" +#include "images/dup-cmp.h" +#include "images/kd-tree.h" +#include "images/color.h" #include #include #include +static struct fastbuf *fb_cards; +static struct fastbuf *fb_card_attrs; +static struct buck2obj_buf *buck2obj; + /* This should happen in gatherer or scanner */ static void generate_signatures(uns limit) { - struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY); - struct fastbuf *fb_card_attrs = index_bopen("card-attrs", O_RDONLY); + fb_cards = index_bopen("cards", O_RDONLY); + fb_card_attrs = index_bopen("card-attrs", O_RDONLY); struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC); struct card_attr ca; struct image_signature sig; @@ -43,7 +50,10 @@ generate_signatures(uns limit) struct buck2obj_buf *bob = buck2obj_alloc(); uns count = 0; - log(L_INFO, "Generating image signatures"); + if (limit == ~0U) + log(L_INFO, "Generating image signatures"); + else + log(L_INFO, "Generating at most %d image signatures", limit); bputl(fb_signatures, 0); imo_decompress_thumbnails_init(); @@ -60,8 +70,10 @@ generate_signatures(uns limit) die("Failed to read card"); if (attr = obj_find_attr(obj, 'N')) { +#ifdef LOCAL_DEBUG byte *url = obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'); DBG("Reading oid=%d url=%s", oid, url); +#endif struct image_obj imo; imo_init(&imo, pool, obj); if (imo_decompress_thumbnail(&imo)) @@ -74,6 +86,8 @@ generate_signatures(uns limit) if (sig.len) bwrite(fb_signatures, sig.reg, sig.len * sizeof(struct image_region)); count++; + if (count % 10000 == 0) + log(L_DEBUG, "... passed %d images", count); if (count >= limit) break; } @@ -114,13 +128,16 @@ struct signature_record { #define DBG_KD(x...) do{}while(0) #endif +static struct image_tree tree; +static struct signature_record *records; +static struct signature_record **precords; + static void -build_search_tree(void) +build_kd_tree(void) { log(L_INFO, "Building KD-tree"); struct fastbuf *fb_signatures = index_bopen("image-sig", O_RDONLY); - struct image_tree tree; tree.count = bgetl(fb_signatures); ASSERT(tree.count < 0x80000000); if (!tree.count) @@ -132,8 +149,8 @@ build_search_tree(void) else { DBG("Reading %d signatures", tree.count); - struct signature_record *records = xmalloc(tree.count * sizeof(struct signature_record)); - struct signature_record **precords = xmalloc(tree.count * sizeof(void *)); + records = xmalloc(tree.count * sizeof(struct signature_record)); + precords = xmalloc(tree.count * sizeof(void *)); for (uns i = 0; i < tree.count; i++) { bread(fb_signatures, &records[i].oid, sizeof(oid_t)); @@ -151,14 +168,14 @@ build_search_tree(void) tree.bbox.vec[0] = tree.bbox.vec[1] = records[0].vec; } bclose(fb_signatures); - + for (tree.depth = 1; (uns)(2 << tree.depth) < tree.count; tree.depth++); - DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth, + DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth, stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1)); uns leaves_index = 1 << (tree.depth - 1); tree.nodes = xmalloc_zero((1 << tree.depth) * sizeof(struct image_node)); tree.leaves = xmalloc_zero(tree.count * sizeof(struct image_leaf)); - + /* Initialize recursion */ struct stk { struct image_bbox bbox; @@ -172,7 +189,7 @@ build_search_tree(void) for (uns i = 0; i < IMAGE_VEC_K; i++) stk->bbox.vec[1].f[i] = tree.bbox.vec[1].f[i] - tree.bbox.vec[0].f[i]; uns entry_index = 0; - + /* Main loop */ while (stk != stk_top) { @@ -180,7 +197,7 @@ build_search_tree(void) stk - stk_top, stk->index, stk->count, stk->start - precords, stk_print_image_vector(stk->bbox.vec + 0), stk_print_image_vector(stk->bbox.vec + 1)); ASSERT(stk->count); - + /* Create leaf node */ if (stk->index >= leaves_index || stk->count < 2) { @@ -198,20 +215,20 @@ build_search_tree(void) if (stk->bbox.vec[1].f[i]) { uns value = - (record->vec.f[i] - stk->bbox.vec[0].f[i]) * + (record->vec.f[i] - stk->bbox.vec[0].f[i]) * ((1 << bits) - 1) / stk->bbox.vec[1].f[i]; ASSERT(value < (uns)(1 << bits)); leaf->flags |= value; } } if (!stk->count) - leaf->flags |= IMAGE_LEAF_LAST; - DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x", + leaf->flags |= IMAGE_LEAF_LAST; + DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x", leaf->oid, stk_print_image_vector(&record->vec), leaf->flags); } stk--; } - + /* Create internal node */ else { @@ -223,7 +240,7 @@ build_search_tree(void) /* Sort... FIXME: we only need the median */ build_search_tree_sort(stk->count, dim, stk->start); - + /* Split in the middle */ uns index = stk->index; stk[1].index = stk[0].index * 2; @@ -266,13 +283,295 @@ build_search_tree(void) bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf)); bclose(fb_tree); - xfree(tree.leaves); - xfree(tree.nodes); - xfree(precords); - xfree(records); + //xfree(tree.leaves); + //xfree(tree.nodes); + //xfree(precords); + //xfree(records); + } +} + +/*********************************************************************************/ + +static uns pass1_buf_size = 400 << 20; +static uns pass1_max_count = 100000; +static uns pass1_search_dist = 40; +static uns pass1_search_count = 500; + +static struct mempool *pass1_pool; +static byte *pass1_buf_start; +static byte *pass1_buf_pos; +static uns pass1_buf_free; +static uns pass1_buf_used; +static clist pass1_buf_list; +static clist pass1_lru_list; +static u64 pass1_lookups; +static u64 pass1_reads; +static u64 pass1_pairs; +static u64 pass1_dups; +static u64 pass1_shrinks; + +struct pass1_node { + cnode lru_node; + cnode buf_node; + uns buf_size; + byte *buf; + oid_t oid; + byte *url; + struct image image; + struct image_dup dup; +}; + +#define HASH_PREFIX(x) pass1_hash_##x +#define HASH_NODE struct pass1_node +#define HASH_KEY_ATOMIC oid +#define HASH_WANT_CLEANUP +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_WANT_REMOVE +#include "lib/hashtable.h" + +static inline void +pass1_buf_init(void) +{ + //DBG("pass1_buf_init()"); + pass1_buf_free = pass1_buf_size; + pass1_buf_start = pass1_buf_pos = xmalloc(pass1_buf_size); + pass1_buf_used = 0; +} + +static inline void +pass1_buf_cleanup(void) +{ + //DBG("pass1_buf_cleanup()"); + xfree(pass1_buf_start); +} + +static void +pass1_node_free(struct pass1_node *node) +{ + //DBG("pass1_node_free(%d)", (uns)node->oid); + if (node->buf_size) + { + pass1_buf_used -= node->buf_size; + clist_remove(&node->buf_node); + } + clist_remove(&node->lru_node); + pass1_hash_remove(node); +} + +static inline void +pass1_node_free_lru(void) +{ + ASSERT(!clist_empty(&pass1_lru_list)); + pass1_node_free(SKIP_BACK(struct pass1_node, lru_node, clist_head(&pass1_lru_list))); +} + +static inline void +pass1_node_after_move(struct pass1_node *node, addr_int_t move) +{ + //DBG("pass1_node_after_mode(%d, %d)", (uns)node->oid, (uns)move); + /* adjust internal pointers */ +#define MOVE(x) x = (byte *)(x) - move + MOVE(node->url); + MOVE(node->image.pixels); + MOVE(node->dup.buf); +#undef MOVE +} + +static inline void +pass1_buf_shrink(void) +{ + DBG("pass1_buf_shrink()"); + pass1_shrinks++; + pass1_buf_free = pass1_buf_size; + pass1_buf_pos = pass1_buf_start; + CLIST_FOR_EACH(void *, p, pass1_buf_list) + { + struct pass1_node *node = SKIP_BACK(struct pass1_node, buf_node, p); + if (node->buf != pass1_buf_pos) + { + memmove(pass1_buf_pos, node->buf, node->buf_size); + pass1_node_after_move(node, node->buf - pass1_buf_pos); + node->buf = pass1_buf_pos; + } + pass1_buf_pos += node->buf_size; + pass1_buf_free -= node->buf_size; } } +static void * +pass1_buf_alloc(uns size) +{ + //DBG("pass1_buf_alloc(%d)", size); + + /* if there is not enough free space at the end of the buffer */ + if (size > pass1_buf_free) + { + /* free some lru nodes */ + //DBG("freeing lru nodes"); + while (size > pass1_buf_size - pass1_buf_used || pass1_buf_used * 2 > pass1_buf_size) + { + if (unlikely(clist_empty(&pass1_lru_list))) // FIXME + die("Buffer too small"); + pass1_node_free_lru(); + } + + pass1_buf_shrink(); + } + + /* final allocation */ + void *result = pass1_buf_pos; + pass1_buf_pos += size; + pass1_buf_free -= size; + pass1_buf_used += size; + return result; +} + +static struct pass1_node * +pass1_node_new(oid_t oid) +{ + DBG("pass1_node_new(%d)", (uns)oid); + if (pass1_hash_table.hash_count == pass1_max_count) + pass1_node_free_lru(); + struct pass1_node *node = pass1_hash_new(oid); + mp_flush(pass1_pool); + pass1_reads++; + + /* read object */ + struct card_attr ca; + bsetpos(fb_card_attrs, (sh_off_t)oid * sizeof(ca)); /* FIXME: these seeks can be easily removed */ + bread(fb_card_attrs, &ca, sizeof(ca)); + + bsetpos(fb_cards, (sh_off_t)ca.card << CARD_POS_SHIFT); /* FIXME: maybe a presort should handle these random seeks */ + uns buck_len = bgetl(fb_cards) - (LIZARD_COMPRESS_HEADER - 1); + uns buck_type = bgetc(fb_cards) + BUCKET_TYPE_PLAIN; + struct odes *obj = obj_read_bucket(buck2obj, pass1_pool, buck_type, buck_len, fb_cards, NULL); + if (unlikely(!obj)) + die("Failed to read card"); + byte *url = obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'); + uns url_len = strlen(url); + + /* decompress thumbnail */ + struct image_obj imo; + imo_init(&imo, pass1_pool, obj); + if (unlikely(!imo_decompress_thumbnail(&imo))) + die("Cannot decompress thumbnail"); + node->image = imo.thumb; + + /* create duplicates comparision object */ + image_dup_init(&node->dup, &node->image, pass1_pool); + + /* copy data */ + //DBG("loaded image %s s=%d d=%d", url, node->image.size, node->dup.buf_size); + node->buf_size = node->image.size + node->dup.buf_size + url_len + 1; + if (node->buf_size) + { + byte *buf = node->buf = pass1_buf_alloc(node->buf_size); + clist_add_tail(&pass1_buf_list, &node->buf_node); +#define COPY(ptr, size) ({ void *_p=buf; uns _size=(size); buf+=_size; memcpy(_p,(ptr),_size); _p; }) + node->url = COPY(url, url_len + 1); + node->image.pixels = COPY(node->image.pixels, node->image.size); + node->dup.buf = COPY(node->dup.buf, node->dup.buf_size); +#undef COPY + } + + /* add to lru list */ + return node; +} + +static inline struct pass1_node * +pass1_node_lock(oid_t oid) +{ + DBG("pass1_node_lock(%d)", (uns)oid); + pass1_lookups++; + struct pass1_node *node = pass1_hash_find(oid); + if (node) + { + clist_remove(&node->lru_node); + return node; + } + else + return pass1_node_new(oid); +} + +static inline void +pass1_node_unlock(struct pass1_node *node) +{ + //DBG("pass1_node_unlock(%d)", (uns)node->oid); + clist_add_tail(&pass1_lru_list, &node->lru_node); +} + +static void +pass1(void) +{ + log(L_INFO, "Looking for duplicates"); + ASSERT(tree.nodes); + + /* initialization */ + pass1_lookups = pass1_reads = pass1_pairs = pass1_dups = pass1_shrinks = 0; + fb_cards = bopen("index/cards", O_RDONLY, 10000); // FIXME + fb_card_attrs = bopen("index/card-attrs", O_RDONLY, sizeof(struct card_attr)); // FIXME + buck2obj = buck2obj_alloc(); + imo_decompress_thumbnails_init(); + clist_init(&pass1_lru_list); + clist_init(&pass1_buf_list); + pass1_hash_init(); + pass1_buf_init(); + pass1_pool = mp_new(1 << 20); + + /* main loop */ + for (uns i = 0; i < tree.count; ) + { + /* lookup next image */ + oid_t oid = tree.leaves[i].oid; + struct pass1_node *node = pass1_node_lock(oid); + + /* compare with all near images */ + struct image_search search; + image_search_init(&search, &tree, &precords[i]->vec, pass1_search_dist); + /* FIXME: can be faster than general search in KD-tree */ + oid_t oid2; + uns dist; + for (uns j = 0; j < pass1_search_count && image_search_next(&search, &oid2, &dist); j++) + { + if (oid < oid2) + { + struct pass1_node *node2 = pass1_node_lock(oid2); + DBG("comparing %d and %d", oid, oid2); + if (image_dup_compare(&node->dup, &node2->dup, IMAGE_DUP_TRANS_ALL)) + { + pass1_dups++; + log(L_DEBUG, "*** Found duplicates oid1=0x%x oid=0x%x", (uns)node->oid, (uns)node2->oid); + log(L_DEBUG, " %s", node->url); + log(L_DEBUG, " %s", node2->url); + } + pass1_pairs++; + pass1_node_unlock(node2); + } + } + image_search_done(&search); + pass1_node_unlock(node); + i++; + if (i % 1000 == 0) + log(L_DEBUG, "... passed %d images", i); + } + + /* clean up */ + pass1_hash_cleanup(); + pass1_buf_cleanup(); + mp_delete(pass1_pool); + bclose(fb_cards); + bclose(fb_card_attrs); + buck2obj_free(buck2obj); + imo_decompress_thumbnails_done(); + + /* print statistics */ + log(L_INFO, "%d count, %Ld lookups, %Ld reads, %Ld pairs, %Ld dups, %Ld shrinks", tree.count, + (long long int)pass1_lookups, (long long int)pass1_reads, + (long long int)pass1_pairs, (long long int)pass1_dups, (long long int)pass1_shrinks); +} + +/*********************************************************************************/ static char *shortopts = CF_SHORT_OPTS ""; static struct option longopts[] = @@ -298,12 +597,12 @@ usage(byte *msg) exit(1); } - + int main(int argc UNUSED, char **argv) { int opt; - + log_init(argv[0]); while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0) switch (opt) @@ -314,8 +613,11 @@ main(int argc UNUSED, char **argv) if (optind != argc) usage("Invalid usage"); - generate_signatures(~0U); - build_search_tree(); - + srgb_to_luv_init(); + + generate_signatures(20000); + build_kd_tree(); + pass1(); + return 0; } diff --git a/images/image-obj.c b/images/image-obj.c index 06753ca6..58230487 100644 --- a/images/image-obj.c +++ b/images/image-obj.c @@ -150,11 +150,11 @@ libpng_decompress_thumbnail(struct image_obj *imo) case PNG_COLOR_TYPE_GRAY: imo->thumb.flags |= IMAGE_GRAYSCALE; png_set_gray_to_rgb(png_ptr); - png_set_strip_alpha(png_ptr); break; case PNG_COLOR_TYPE_GRAY_ALPHA: imo->thumb.flags |= IMAGE_GRAYSCALE; png_set_gray_to_rgb(png_ptr); + png_set_strip_alpha(png_ptr); break; case PNG_COLOR_TYPE_RGB: break; @@ -169,7 +169,7 @@ libpng_decompress_thumbnail(struct image_obj *imo) /* Read image data */ DBG("Reading image data"); - byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, width * height * 3); + byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, imo->thumb.size = width * height * 3); png_bytep rows[height]; for (uns i = 0; i < height; i++, pixels += width * 3) rows[i] = (png_bytep)pixels; @@ -289,7 +289,7 @@ libjpeg_decompress_thumbnail(struct image_obj *imo) jpeg_start_decompress(&cinfo); ASSERT(imo->thumb.width == cinfo.output_width && imo->thumb.height == cinfo.output_height); ASSERT(sizeof(JSAMPLE) == 1); - byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, cinfo.output_width * cinfo.output_height * 3); + byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, imo->thumb.size = cinfo.output_width * cinfo.output_height * 3); if (cinfo.out_color_space == JCS_RGB) { /* Read RGB pixels */ uns size = cinfo.output_width * 3; @@ -369,7 +369,7 @@ magick_decompress_thumbnail(struct image_obj *imo) PixelPacket *pixels = (PixelPacket *)AcquireImagePixels(image, 0, 0, image->columns, image->rows, &magick_exception); ASSERT(pixels); uns size = image->columns * image->rows; - byte *p = imo->thumb.pixels = mp_alloc(imo->pool, size * 3); + byte *p = imo->thumb.pixels = mp_alloc(imo->pool, imo->thumb.size = size * 3); for (uns i = 0; i < size; i++) { p[0] = pixels->red >> (QuantumDepth - 8); diff --git a/images/image-sig.c b/images/image-sig.c index fab1de39..b22b4e44 100644 --- a/images/image-sig.c +++ b/images/image-sig.c @@ -1,4 +1,4 @@ -#define LOCAL_DEBUG +#undef LOCAL_DEBUG #include "sherlock/sherlock.h" #include "lib/math.h" @@ -6,56 +6,9 @@ #include "images/images.h" #include "images/image-obj.h" #include "images/image-sig.h" +#include "images/color.h" #include -#include - -/* - * Color spaces - * - * http://www.tecgraf.puc-rio.br/~mgattass/color/ColorIndex.html - * - */ - -#define REF_WHITE_X 0.96422 -#define REF_WHITE_Y 1. -#define REF_WHITE_Z 0.82521 - -/* sRGB to XYZ */ -static void -srgb_to_xyz_slow(double srgb[3], double xyz[3]) -{ - double a[3]; - for (uns i = 0; i < 3; i++) - if (srgb[i] > 0.04045) - a[i] = pow((srgb[i] + 0.055) * (1 / 1.055), 2.4); - else - a[i] = srgb[i] * (1 / 12.92); - xyz[0] = 0.412424 * a[0] + 0.357579 * a[1] + 0.180464 * a[2]; - xyz[1] = 0.212656 * a[0] + 0.715158 * a[1] + 0.072186 * a[2]; - xyz[2] = 0.019332 * a[0] + 0.119193 * a[1] + 0.950444 * a[2]; -} - -/* XYZ to CIE-Luv */ -static void -xyz_to_luv_slow(double xyz[3], double luv[3]) -{ - double sum = xyz[0] + 15 * xyz[1] + 3 * xyz[2]; - if (sum < 0.000001) - luv[0] = luv[1] = luv[2] = 0; - else - { - double var_u = 4 * xyz[0] / sum; - double var_v = 9 * xyz[1] / sum; - if (xyz[1] > 0.008856) - luv[0] = 116 * pow(xyz[1], 1 / 3.) - 16; - else - luv[0] = (116 * 7.787) * xyz[1]; - luv[1] = luv[0] * (13 * (var_u - 4 * REF_WHITE_X / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z))); - luv[2] = luv[0] * (13 * (var_v - 9 * REF_WHITE_Y / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z))); - /* intervals [0..100], [-134..220], [-140..122] */ - } -} struct block { uns l, u, v; /* average Luv coefficients */ @@ -97,20 +50,16 @@ compute_image_signature(struct image *image, struct image_signature *sig) for (uns y = 0; y < 4; y++, p += 3 * (width - 4)) for (uns x = 0; x < 4; x++, p += 3) { - double rgb[3], luv[3], xyz[3]; - rgb[0] = p[0] / 255.; - rgb[1] = p[1] / 255.; - rgb[2] = p[2] / 255.; - srgb_to_xyz_slow(rgb, xyz); - xyz_to_luv_slow(xyz, luv); + byte luv[3]; + srgb_to_luv_pixel(luv, p); l_sum += *tp++ = luv[0]; - u_sum += luv[1] + 150; - v_sum += luv[2] + 150; + u_sum += luv[1]; + v_sum += luv[2]; } - block->l = l_sum; - block->u = u_sum; - block->v = v_sum; + block->l = (l_sum >> 4); + block->u = (u_sum >> 4); + block->v = (v_sum >> 4); /* Apply Daubechies wavelet transformation * FIXME: @@ -149,9 +98,9 @@ compute_image_signature(struct image *image, struct image_signature *sig) } /* Extract energies in LH, HL and HH bands */ - block->lh = sqrt(t[8] * t[8] + t[9] * t[9] + t[12] * t[12] + t[13] * t[13]); - block->hl = sqrt(t[2] * t[2] + t[3] * t[3] + t[6] * t[6] + t[7] * t[7]); - block->hh = sqrt(t[10] * t[10] + t[11] * t[11] + t[14] * t[14] + t[15] * t[15]); + block->lh = CLAMP((int)(sqrt(t[8] * t[8] + t[9] * t[9] + t[12] * t[12] + t[13] * t[13]) / 16), 0, 255); + block->hl = CLAMP((int)(sqrt(t[2] * t[2] + t[3] * t[3] + t[6] * t[6] + t[7] * t[7]) / 16), 0, 255); + block->hh = CLAMP((int)(sqrt(t[10] * t[10] + t[11] * t[11] + t[14] * t[14] + t[15] * t[15]) / 16), 0, 255); } /* FIXME: simple average is for testing pusposes only */ diff --git a/images/image-sig.h b/images/image-sig.h index 96e2e25b..3532262a 100644 --- a/images/image-sig.h +++ b/images/image-sig.h @@ -7,7 +7,7 @@ #define IMAGE_REG_K 9 #define IMAGE_REG_MAX 4 -typedef u16 image_feature_t; /* 8 or 16 bits precision */ +typedef byte image_feature_t; /* 8 or 16 bits precision */ /* K-dimensional feature vector */ struct image_vector { diff --git a/images/image-test.c b/images/image-test.c index 69820fd4..9ff9df9e 100644 --- a/images/image-test.c +++ b/images/image-test.c @@ -4,7 +4,7 @@ #include "lib/fastbuf.h" #include "images/images.h" #include "images/image-sig.h" -#include "images/image-search.h" +#include "images/kd-tree.h" #include "sherlock/index.h" #include "lib/mempool.h" #include "sherlock/object.h" @@ -19,6 +19,7 @@ #include #include +#include #define BEST_CNT 30 diff --git a/images/images.h b/images/images.h index 4f2bd2aa..61d09bf0 100644 --- a/images/images.h +++ b/images/images.h @@ -9,8 +9,33 @@ struct image { uns flags; /* enum image_flag */ uns width; /* number of columns */ uns height; /* number of rows */ + uns size; /* buffer size in bytes */ byte *pixels; /* RGB */ }; +enum image_format { + IMAGE_FORMAT_UNDEFINED = 0, + IMAGE_FORMAT_JPEG, + IMAGE_FORMAT_PNG, + IMAGE_FORMAT_GIF +}; + +struct image_info { + uns width; + uns height; + enum image_format format; + union { + struct { + } jpeg; + struct { + } png; + struct { + } gif; + }; +}; + +int read_image_header(struct image_info *info); +int read_image_data(struct image_info *info); + #endif diff --git a/images/image-search.h b/images/kd-tree.c similarity index 90% rename from images/image-search.h rename to images/kd-tree.c index 4907545e..f482854d 100644 --- a/images/image-search.h +++ b/images/kd-tree.c @@ -1,31 +1,17 @@ -#include "lib/heap.h" -#include - -#define IMAGE_SEARCH_DIST_UNLIMITED (~0U) +#undef LOCAL_DEBUG -/* FIXME: support full length of oid_t, currently must be <2^31 */ -#define IMAGE_SEARCH_ITEM_TYPE 0x80000000U -struct image_search_item { - u32 dist; - u32 index; - struct image_bbox bbox; -}; - -#define IMAGE_SEARCH_CMP(x,y) (is->buf[x].dist < is->buf[y].dist) +#include "sherlock/sherlock.h" +#include "lib/heap.h" +#include "images/images.h" +#include "images/image-sig.h" +#include "images/kd-tree.h" -struct image_search { - struct image_tree *tree; - struct image_node *nodes; - struct image_leaf *leaves; - struct image_vector query; - struct image_search_item *buf; - u32 *heap; - uns count, visited, size, max_dist; -}; +#include #define SQR(x) ((x)*(x)) +#define IMAGE_SEARCH_CMP(x,y) (is->buf[x].dist < is->buf[y].dist) -static void +void image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist) { // FIXME: empty tree @@ -57,7 +43,7 @@ image_search_init(struct image_search *is, struct image_tree *tree, struct image } } -static void +void image_search_done(struct image_search *is) { xfree(is->buf); @@ -102,7 +88,7 @@ image_search_leaf_dist(struct image_search *is, struct image_bbox *bbox, struct return dist; } -static int +int image_search_next(struct image_search *is, oid_t *oid, uns *dist) { while (likely(is->count)) diff --git a/images/kd-tree.h b/images/kd-tree.h new file mode 100644 index 00000000..1cd1095b --- /dev/null +++ b/images/kd-tree.h @@ -0,0 +1,28 @@ +#ifndef _IMAGES_KD_TREE_H +#define _IMAGES_KD_TREE_H + +#define IMAGE_SEARCH_DIST_UNLIMITED (~0U) + +/* FIXME: support full length of oid_t, currently must be <2^31 */ +#define IMAGE_SEARCH_ITEM_TYPE 0x80000000U +struct image_search_item { + u32 dist; + u32 index; + struct image_bbox bbox; +}; + +struct image_search { + struct image_tree *tree; + struct image_node *nodes; + struct image_leaf *leaves; + struct image_vector query; + struct image_search_item *buf; + u32 *heap; + uns count, visited, size, max_dist; +}; + +void image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist); +void image_search_done(struct image_search *is); +int image_search_next(struct image_search *is, oid_t *oid, uns *dist); + +#endif -- 2.39.5