$(o)/images/image-sig.o $(o)/images/image-sig.oo: CFLAGS+=-I/usr/include/GraphicsMagick
$(o)/images/image-idx.o $(o)/images/image-idx.oo: CFLAGS+=-I/usr/include/GraphicsMagick
$(o)/images/image-obj.o $(o)/images/image-obj.oo: CFLAGS+=-I/usr/include/GraphicsMagick
-$(o)/images/image-idx: $(o)/images/image-idx.o $(o)/images/image-obj.o $(o)/images/image-dup.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(LIBSH) $(LIBLANG) $(LIBCHARSET)
+$(o)/images/image-idx: $(o)/images/image-idx.o $(o)/images/image-obj.o $(o)/images/dup-cmp.o $(o)/indexer/iconfig.o $(o)/images/image-sig.o $(o)/images/kd-tree.o $(o)/images/color.o $(LIBSH) $(LIBLANG) $(LIBCHARSET)
$(o)/images/image-idx: LIBS+=-lGraphicsMagick -ljpeg -lpng
-$(o)/images/image-test: $(o)/images/image-test.o $(LIBSH)
+$(o)/images/image-test: $(o)/images/image-test.o $(o)/images/kd-tree.o $(LIBSH)
+$(o)/images/color-t: LIBS+=-lm
+
+TESTS+=$(addprefix $(o)/images/,color.test)
+
+$(o)/images/color.test: $(o)/images/color-t
# By :;DF
$(o)/images/block_info.o $(o)/images/block_info.oo: CFLAGS+=-I/usr/include/GraphicsMagick
* - SIMD should help to speed up conversion of large arrays
* - maybe try to generate a long switch in color_conv_pixel()
* with optimized entries instead of access to interpolation table
+ * - most of multiplications in srgb_to_luv_pixels can be replaced
+ * with tables lookup... tests shows almost the speed for random
+ * input and cca 40% gain when input colors fit in CPU chache
*/
#ifndef _IMAGES_COLOR_H
--- /dev/null
+/*
+ * Image Library -- Duplicates Comparison
+ *
+ * (c) 2006 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ *
+ *
+ * FIXME:
+ * - many possible optimization
+ * - compare normalized pictures (brightness, ...)
+ * - better image scale... now it can completely miss some rows/cols of pixels
+ * - maybe better/slower last step
+ * - different thresholds for various transformations
+ * - do not test all transformations for symetric pictures
+ * - ... secret ideas :-)
+ */
+
+#undef LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/mempool.h"
+#include "images/images.h"
+#include "images/dup-cmp.h"
+
+static uns image_dup_scale_min_size = 16;
+static uns image_dup_ratio_threshold = 140;
+static uns image_dup_error_threshold = 50;
+
+static inline byte *
+image_dup_block(struct image_dup *dup, uns col, uns row)
+{
+ ASSERT(col <= dup->cols && row <= dup->rows);
+ return dup->buf + (dup->line << row) + (3 << (row + col));
+}
+
+static inline void
+pixels_average(byte *dest, byte *src1, byte *src2)
+{
+ dest[0] = ((uns)src1[0] + (uns)src2[0]) >> 1;
+ dest[1] = ((uns)src1[1] + (uns)src2[1]) >> 1;
+ dest[2] = ((uns)src1[2] + (uns)src2[2]) >> 1;
+}
+
+void
+image_dup_init(struct image_dup *dup, struct image *image, struct mempool *pool)
+{
+ ASSERT(image->width && image->height);
+
+ dup->image = image;
+ dup->width = image->width;
+ dup->height = image->height;
+ for (dup->cols = 0; (uns)(2 << dup->cols) < image->width; dup->cols++);
+ for (dup->rows = 0; (uns)(2 << dup->rows) < image->height; dup->rows++);
+ dup->buf = mp_alloc(pool, dup->buf_size = (12 << (dup->cols + dup->rows)));
+ dup->line = 6 << dup->cols;
+ dup->flags = 0;
+ if (image->width >= image_dup_scale_min_size && image->height >= image_dup_scale_min_size)
+ dup->flags |= IMAGE_DUP_FLAG_SCALE;
+
+ /* Scale original image to right bottom block */
+ {
+ byte *d = image_dup_block(dup, dup->cols, dup->rows);
+ uns width = 1 << dup->cols;
+ uns height = 1 << dup->rows;
+ uns line_size = 3 * image->width;
+ uns src_y = 0;
+ for (uns y = 0; y < height; y++)
+ {
+ byte *line = image->pixels + line_size * (src_y >> dup->rows);
+ uns src_x = 0;
+ for (uns x = 0; x < width; x++)
+ {
+ byte *s = line + 3 * (src_x >> dup->cols);
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d += 3;
+ src_x += image->width;
+ }
+ src_y += image->height;
+ }
+ }
+
+ /* Complete bottom row */
+ for (uns i = dup->cols; i--; )
+ {
+ byte *d = image_dup_block(dup, i, dup->rows);
+ byte *s = image_dup_block(dup, i + 1, dup->rows);
+ for (uns y = 0; y < (uns)(1 << dup->rows); y++)
+ for (uns x = 0; x < (uns)(1 << i); x++)
+ {
+ pixels_average(d, s, s + 3);
+ d += 3;
+ s += 6;
+ }
+ }
+
+ /* Complete remaining blocks */
+ for (uns i = 0; i <= dup->cols; i++)
+ {
+ uns line_size = (3 << i);
+ for (uns j = dup->rows; j--; )
+ {
+ byte *d = image_dup_block(dup, i, j);
+ byte *s = image_dup_block(dup, i, j + 1);
+ for (uns y = 0; y < (uns)(1 << j); y++)
+ {
+ for (uns x = 0; x < (uns)(1 << i); x++)
+ {
+ pixels_average(d, s, s + line_size);
+ d += 3;
+ s += 3;
+ }
+ s += line_size;
+ }
+ }
+ }
+}
+
+static inline uns
+err (int a, int b)
+{
+ a -= b;
+ return a * a;
+}
+
+static inline uns
+err_sum(byte *pos1, byte *end1, byte *pos2)
+{
+ uns e = 0;
+ while (pos1 != end1)
+ e += err(*pos1++, *pos2++);
+ return e;
+}
+
+static inline uns
+err_sum_transformed(byte *pos1, byte *end1, byte *pos2, uns width, int add1, int add2)
+{
+ DBG("err_sum_transformed(): %p %p %p %d %d %d", pos1, end1, pos2, width, add1, add2);
+ uns e = 0;
+ while (pos1 != end1)
+ {
+ for (uns i = 0; i < width; i++, pos2 += add1)
+ {
+ e += err(pos1[0], pos2[0]);
+ e += err(pos1[1], pos2[1]);
+ e += err(pos1[2], pos2[2]);
+ pos1 += 3;
+ }
+ pos2 += add2;
+ }
+ return e;
+}
+
+static inline int
+aspect_ratio_test(uns width1, uns height1, uns width2, uns height2)
+{
+ uns r1 = width1 * height2;
+ uns r2 = height1 * width2;
+ return
+ r1 <= ((r2 * image_dup_ratio_threshold) >> 5) &&
+ r2 <= ((r1 * image_dup_ratio_threshold) >> 5);
+}
+
+static inline int
+average_compare(struct image_dup *dup1, struct image_dup *dup2)
+{
+ byte *block1 = image_dup_block(dup1, 0, 0);
+ byte *block2 = image_dup_block(dup2, 0, 0);
+ uns e =
+ err(block1[0], block2[0]) +
+ err(block1[1], block2[1]) +
+ err(block1[2], block2[2]);
+ return e <= image_dup_error_threshold;
+}
+
+static int
+blocks_compare(struct image_dup *dup1, struct image_dup *dup2, uns col, uns row, uns trans)
+{
+ DBG("blocks_compare(): col=%d row=%d trans=%d", col, row, trans);
+ byte *block1 = image_dup_block(dup1, col, row);
+ byte *block2 = (trans < 4) ? image_dup_block(dup2, col, row) : image_dup_block(dup2, row, col);
+ int add1, add2;
+ switch (trans)
+ {
+ case 0: ;
+ uns err = (err_sum(block1, block1 + (3 << (col + row)), block2) >> (col + row));
+ DBG("average error=%d", err);
+ return err <= image_dup_error_threshold;
+ case 1:
+ add1 = -3;
+ add2 = 6 << col;
+ block2 += (3 << col) - 3;
+ break;
+ case 2:
+ add1 = 1;
+ add2 = -(6 << col);
+ block2 += (3 << (col + row)) - (3 << col);
+ break;
+ case 3:
+ add1 = -3;
+ add2 = 0;
+ block2 += (3 << (col + row)) - 3;
+ break;
+ case 4:
+ add1 = (3 << col);
+ add2 = -(3 << (col + row)) + 3;
+ break;
+ case 5:
+ add1 = -(3 << col);
+ add2 = (3 << (col + row)) + 3;
+ block2 += (3 << (col + row)) - (3 << col);
+ break;
+ case 6:
+ add1 = (3 << col);
+ add2 = -(3 << (col + row)) - 3;
+ block2 += (3 << col) - 3;
+ break;
+ case 7:
+ add1 = -(3 << col);
+ add2 = (3 << (col + row)) - 3;
+ block2 += (3 << (col + row)) - 3;
+ break;
+ default:
+ ASSERT(0);
+ }
+ uns err = (err_sum_transformed(block1, block1 + (3 << (col + row)), block2, (1 << col), add1, add2) >> (col + row));
+ DBG("average error=%d", err);
+ return err <= image_dup_error_threshold;
+}
+
+static int
+same_size_compare(struct image_dup *dup1, struct image_dup *dup2, uns trans)
+{
+ byte *block1 = dup1->image->pixels;
+ byte *block2 = dup2->image->pixels;
+ DBG("same_size_compare(): trans=%d", trans);
+ int add1, add2;
+ switch (trans)
+ {
+ case 0: ;
+ uns err = (err_sum(block1, block1 + 3 * dup1->width * dup1->height, block2) / (dup1->width * dup1->height));
+ DBG("average error=%d", err);
+ return err <= image_dup_error_threshold;
+ case 1:
+ add1 = -3;
+ add2 = 6 * dup1->width;
+ block2 += 3 * (dup1->width - 1);
+ break;
+ case 2:
+ add1 = 1;
+ add2 = -6 * dup1->width;
+ block2 += 3 * dup1->width * (dup1->height - 1);
+ break;
+ case 3:
+ add1 = -3;
+ add2 = 0;
+ block2 += 3 * (dup1->width * dup1->height - 1);
+ break;
+ case 4:
+ add1 = 3 * dup1->width;
+ add2 = -3 * (dup1->width * dup1->height - 1);
+ break;
+ case 5:
+ add1 = -3 * dup1->width;
+ add2 = 3 * (dup1->width * dup1->height + 1);
+ block2 += 3 * dup1->width * (dup1->height - 1);
+ break;
+ case 6:
+ add1 = 3 * dup1->width;
+ add2 = -3 * (dup1->width * dup1->height + 1);
+ block2 += 3 * (dup1->width - 1);
+ break;
+ case 7:
+ add1 = -3 * dup1->width;
+ add2 = 3 * (dup1->width * dup1->height - 1);
+ block2 += 3 * (dup1->width * dup1->height - 1);
+ break;
+ default:
+ ASSERT(0);
+ }
+ uns err = (err_sum_transformed(block1, block1 + 3 * dup1->width * dup1->height, block2, dup1->width, add1, add2) / (dup1->width * dup1->height));
+ DBG("average error=%d", err);
+ return err <= image_dup_error_threshold;
+}
+
+int
+image_dup_compare(struct image_dup *dup1, struct image_dup *dup2, uns trans)
+{
+ if (!average_compare(dup1, dup2))
+ return 0;
+ if ((dup1->flags & dup2->flags) & IMAGE_DUP_FLAG_SCALE)
+ {
+ DBG("Scale support");
+ if (!aspect_ratio_test(dup1->width, dup1->height, dup2->width, dup2->height))
+ trans &= 0xf0;
+ if (!aspect_ratio_test(dup1->width, dup1->height, dup2->height, dup2->width))
+ trans &= 0x0f;
+ }
+ else
+ {
+ DBG("No scale support");
+ if (!(dup1->width == dup2->width && dup1->height == dup2->height))
+ trans &= 0xf0;
+ if (!(dup1->width == dup2->height && dup1->height == dup2->width))
+ trans &= 0x0f;
+ }
+ if (!trans)
+ return 0;
+ if (trans & 0x0f)
+ {
+ uns cols = MIN(dup1->cols, dup2->cols);
+ uns rows = MIN(dup1->rows, dup2->rows);
+ for (uns t = 0; t < 4; t++)
+ if (trans & (1 << t))
+ {
+ DBG("Testing trans %d", t);
+ for (uns i = MAX(cols, rows); i--; )
+ {
+ uns col = MAX(0, (int)(cols - i));
+ uns row = MAX(0, (int)(rows - i));
+ if (!blocks_compare(dup1, dup2, col, row, t))
+ break;
+ if (!i &&
+ (dup1->width != dup2->width || dup1->height != dup2->height ||
+ same_size_compare(dup1, dup2, t)))
+ return 1;
+ }
+ }
+ }
+ if (trans & 0xf0)
+ {
+ uns cols = MIN(dup1->cols, dup2->rows);
+ uns rows = MIN(dup1->rows, dup2->cols);
+ for (uns t = 4; t < 8; t++)
+ if (trans & (1 << t))
+ {
+ DBG("Testing trans %d", t);
+ for (uns i = MAX(cols, rows); i--; )
+ {
+ uns col = MAX(0, (int)(cols - i));
+ uns row = MAX(0, (int)(rows - i));
+ if (!blocks_compare(dup1, dup2, col, row, t))
+ break;
+ if (!i &&
+ (dup1->width != dup2->height || dup1->height != dup2->width ||
+ same_size_compare(dup1, dup2, t)) )
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
--- /dev/null
+#ifndef _IMAGES_DUP_CMP_H
+#define _IMAGES_DUP_CMP_H
+
+struct image;
+
+struct image_dup {
+ struct image *image;
+ byte *buf;
+ uns buf_size;
+ uns flags;
+ uns cols;
+ uns rows;
+ uns line;
+ uns width;
+ uns height;
+};
+
+#define IMAGE_DUP_FLAG_SCALE 0x1
+
+#define IMAGE_DUP_TRANS_ID 0x01
+#define IMAGE_DUP_TRANS_ALL 0xff
+
+void image_dup_init(struct image_dup *dup, struct image *image, struct mempool *pool);
+int image_dup_compare(struct image_dup *dup1, struct image_dup *dup2, uns trans);
+
+#endif
+++ /dev/null
-/*
- * Image Library -- Duplicates Comparison
- *
- * (c) 2006 Pavel Charvat <pchar@ucw.cz>
- *
- * This software may be freely distributed and used according to the terms
- * of the GNU Lesser General Public License.
- *
- * FIXME:
- * - many possible optimization
- * - compare normalized pictures (brightness, ...)
- * - better image scale... now it can completely miss some rows/cols of pixels
- * - maybe better/slower last step
- * - different thresholds for various transformations
- * - do not test all transformations for symetric pictures
- * - ... secret ideas :-)
- */
-
-#undef LOCAL_DEBUG
-
-#include "sherlock/sherlock.h"
-#include "lib/mempool.h"
-#include "images/images.h"
-#include "images/image-dup.h"
-
-static uns image_dup_scale_min_size = 8;
-static uns image_dup_ratio_threshold = 140;
-static uns image_dup_error_threshold = 2000;
-
-static inline byte *
-image_dup_block(struct image_dup *dup, uns col, uns row)
-{
- ASSERT(col <= dup->cols && row <= dup->rows);
- return dup->buf + (dup->line << row) + (3 << (row + col));
-}
-
-static inline void
-pixels_average(byte *dest, byte *src1, byte *src2)
-{
- dest[0] = ((uns)src1[0] + (uns)src2[0]) >> 1;
- dest[1] = ((uns)src1[1] + (uns)src2[1]) >> 1;
- dest[2] = ((uns)src1[2] + (uns)src2[2]) >> 1;
-}
-
-void
-image_dup_init(struct image_dup *dup, struct image *image, struct mempool *pool)
-{
- ASSERT(image->width && image->height);
-
- dup->image = image;
- dup->width = image->width;
- dup->height = image->height;
- for (dup->cols = 0; (uns)(1 << dup->cols) < image->width; dup->cols++);
- for (dup->rows = 0; (uns)(1 << dup->rows) < image->height; dup->rows++);
- dup->buf = mp_alloc(pool, 12 << (dup->cols + dup->rows));
- dup->line = 6 << dup->cols;
- dup->flags = 0;
- if (image->width >= image_dup_scale_min_size && image->height >= image_dup_scale_min_size)
- dup->flags |= IMAGE_DUP_FLAG_SCALE;
-
- /* Scale original image to right bottom block */
- {
- byte *d = image_dup_block(dup, dup->cols, dup->rows);
- uns width = 1 << dup->cols;
- uns height = 1 << dup->rows;
- uns line_size = 3 * image->width;
- uns src_y = 0;
- for (uns y = 0; y < height; y++)
- {
- byte *line = image->pixels + line_size * (src_y >> dup->rows);
- uns src_x = 0;
- for (uns x = 0; x < width; x++)
- {
- byte *s = line + 3 * (src_x >> dup->cols);
- d[0] = s[0];
- d[1] = s[1];
- d[2] = s[2];
- d += 3;
- src_x += image->width;
- }
- src_y += image->height;
- }
- }
-
- /* Complete bottom row */
- for (uns i = dup->cols; i--; )
- {
- byte *d = image_dup_block(dup, i, dup->rows);
- byte *s = image_dup_block(dup, i + 1, dup->rows);
- for (uns y = 0; y < (uns)(1 << dup->rows); y++)
- for (uns x = 0; x < (uns)(1 << i); x++)
- {
- pixels_average(d, s, s + 3);
- d += 3;
- s += 6;
- }
- }
-
- /* Complete remaining blocks */
- for (uns i = 0; i <= dup->cols; i++)
- {
- uns line_size = (3 << i);
- for (uns j = dup->rows; j--; )
- {
- byte *d = image_dup_block(dup, i, j);
- byte *s = image_dup_block(dup, i, j + 1);
- for (uns y = 0; y < (uns)(1 << j); y++)
- {
- for (uns x = 0; x < (uns)(1 << i); x++)
- {
- pixels_average(d, s, s + line_size);
- d += 3;
- s += 3;
- }
- s += line_size;
- }
- }
- }
-}
-
-static inline uns
-err (int a, int b)
-{
- a -= b;
- return a * a;
-}
-
-static inline uns
-err_sum(byte *pos1, byte *end1, byte *pos2)
-{
- uns e = 0;
- while (pos1 != end1)
- e += err(*pos1++, *pos2++);
- return e;
-}
-
-static inline uns
-err_sum_transformed(byte *pos1, byte *end1, byte *pos2, uns width, int add1, int add2)
-{
- DBG("err_sum_transformed(): %p %p %p %d %d %d", pos1, end1, pos2, width, add1, add2);
- uns e = 0;
- while (pos1 != end1)
- {
- for (uns i = 0; i < width; i++, pos2 += add1)
- {
- e += err(pos1[0], pos2[0]);
- e += err(pos1[1], pos2[1]);
- e += err(pos1[2], pos2[2]);
- pos1 += 3;
- }
- pos2 += add2;
- }
- return e;
-}
-
-static inline int
-aspect_ratio_test(uns width1, uns height1, uns width2, uns height2)
-{
- uns r1 = width1 * height2;
- uns r2 = height1 * width2;
- return
- r1 <= ((r2 * image_dup_ratio_threshold) >> 5) &&
- r2 <= ((r1 * image_dup_ratio_threshold) >> 5);
-}
-
-static inline int
-average_compare(struct image_dup *dup1, struct image_dup *dup2)
-{
- byte *block1 = image_dup_block(dup1, 0, 0);
- byte *block2 = image_dup_block(dup2, 0, 0);
- uns e =
- err(block1[0], block2[0]) +
- err(block1[1], block2[1]) +
- err(block1[2], block2[2]);
- return e <= image_dup_error_threshold;
-}
-
-static int
-blocks_compare(struct image_dup *dup1, struct image_dup *dup2, uns col, uns row, uns trans)
-{
- DBG("blocks_compare(): col=%d row=%d trans=%d", col, row, trans);
- byte *block1 = image_dup_block(dup1, col, row);
- byte *block2 = (trans < 4) ? image_dup_block(dup2, col, row) : image_dup_block(dup2, row, col);
- int add1, add2;
- switch (trans)
- {
- case 0: ;
- uns err = (err_sum(block1, block1 + (3 << (col + row)), block2) >> (col + row));
- DBG("average error=%d", err);
- return err <= image_dup_error_threshold;
- case 1:
- add1 = -3;
- add2 = 6 << col;
- block2 += (3 << col) - 3;
- break;
- case 2:
- add1 = 1;
- add2 = -(6 << col);
- block2 += (3 << (col + row)) - (3 << col);
- break;
- case 3:
- add1 = -3;
- add2 = 0;
- block2 += (3 << (col + row)) - 3;
- break;
- case 4:
- add1 = (3 << col);
- add2 = -(3 << (col + row)) + 3;
- break;
- case 5:
- add1 = -(3 << col);
- add2 = (3 << (col + row)) + 3;
- block2 += (3 << (col + row)) - (3 << col);
- break;
- case 6:
- add1 = (3 << col);
- add2 = -(3 << (col + row)) - 3;
- block2 += (3 << col) - 3;
- break;
- case 7:
- add1 = -(3 << col);
- add2 = (3 << (col + row)) - 3;
- block2 += (3 << (col + row)) - 3;
- break;
- default:
- ASSERT(0);
- }
- uns err = (err_sum_transformed(block1, block1 + (3 << (col + row)), block2, (1 << col), add1, add2) >> (col + row));
- DBG("average error=%d", err);
- return err <= image_dup_error_threshold;
-}
-
-static int
-same_size_compare(struct image_dup *dup1, struct image_dup *dup2, uns trans)
-{
- byte *block1 = dup1->image->pixels;
- byte *block2 = dup2->image->pixels;
- DBG("same_size_compare(): trans=%d", trans);
- int add1, add2;
- switch (trans)
- {
- case 0: ;
- uns err = (err_sum(block1, block1 + 3 * dup1->width * dup1->height, block2) / (dup1->width * dup1->height));
- DBG("average error=%d", err);
- return err <= image_dup_error_threshold;
- case 1:
- add1 = -3;
- add2 = 6 * dup1->width;
- block2 += 3 * (dup1->width - 1);
- break;
- case 2:
- add1 = 1;
- add2 = -6 * dup1->width;
- block2 += 3 * dup1->width * (dup1->height - 1);
- break;
- case 3:
- add1 = -3;
- add2 = 0;
- block2 += 3 * (dup1->width * dup1->height - 1);
- break;
- case 4:
- add1 = 3 * dup1->width;
- add2 = -3 * (dup1->width * dup1->height - 1);
- break;
- case 5:
- add1 = -3 * dup1->width;
- add2 = 3 * (dup1->width * dup1->height + 1);
- block2 += 3 * dup1->width * (dup1->height - 1);
- break;
- case 6:
- add1 = 3 * dup1->width;
- add2 = -3 * (dup1->width * dup1->height + 1);
- block2 += 3 * (dup1->width - 1);
- break;
- case 7:
- add1 = -3 * dup1->width;
- add2 = 3 * (dup1->width * dup1->height - 1);
- block2 += 3 * (dup1->width * dup1->height - 1);
- break;
- default:
- ASSERT(0);
- }
- uns err = (err_sum_transformed(block1, block1 + 3 * dup1->width * dup1->height, block2, dup1->width, add1, add2) / (dup1->width * dup1->height));
- DBG("average error=%d", err);
- return err <= image_dup_error_threshold;
-}
-
-int
-image_dup_compare(struct image_dup *dup1, struct image_dup *dup2, uns trans)
-{
- if (!average_compare(dup1, dup2))
- return 0;
- if ((dup1->flags & dup2->flags) & IMAGE_DUP_FLAG_SCALE)
- {
- DBG("Scale support");
- if (!aspect_ratio_test(dup1->width, dup1->height, dup2->width, dup2->height))
- trans &= 0xf0;
- if (!aspect_ratio_test(dup1->width, dup1->height, dup2->height, dup2->width))
- trans &= 0x0f;
- }
- else
- {
- DBG("No scale support");
- if (!(dup1->width == dup2->width && dup1->height == dup2->height))
- trans &= 0xf0;
- if (!(dup1->width == dup2->height && dup1->height == dup2->width))
- trans &= 0x0f;
- }
- if (!trans)
- return 0;
- if (trans & 0x0f)
- {
- uns cols = MIN(dup1->cols, dup2->cols);
- uns rows = MIN(dup1->rows, dup2->rows);
- for (uns t = 0; t < 4; t++)
- if (trans & (1 << t))
- {
- DBG("Testing trans %d", t);
- for (uns i = MAX(cols, rows); i--; )
- {
- uns col = MAX(0, (int)(cols - i));
- uns row = MAX(0, (int)(rows - i));
- if (!blocks_compare(dup1, dup2, col, row, t))
- break;
- if (!i &&
- (dup1->width != dup2->width || dup1->height != dup2->height ||
- same_size_compare(dup1, dup2, t)))
- return 1;
- }
- }
- }
- if (trans & 0xf0)
- {
- uns cols = MIN(dup1->cols, dup2->rows);
- uns rows = MIN(dup1->rows, dup2->cols);
- for (uns t = 4; t < 8; t++)
- if (trans & (1 << t))
- {
- DBG("Testing trans %d", t);
- for (uns i = MAX(cols, rows); i--; )
- {
- uns col = MAX(0, (int)(cols - i));
- uns row = MAX(0, (int)(rows - i));
- if (!blocks_compare(dup1, dup2, col, row, t))
- break;
- if (!i &&
- (dup1->width != dup2->height || dup1->height != dup2->width ||
- same_size_compare(dup1, dup2, t)) )
- return 1;
- }
- }
- }
- return 0;
-}
+++ /dev/null
-#ifndef _IMAGES_IMAGE_DUP_H
-#define _IMAGES_IMAGE_DUP_H
-
-struct image;
-
-struct image_dup {
- struct image *image;
- byte *buf;
- uns flags;
- uns cols;
- uns rows;
- uns line;
- uns width;
- uns height;
-};
-
-#define IMAGE_DUP_FLAG_SCALE 0x1
-
-#define IMAGE_DUP_TRANS_ID 0x01
-#define IMAGE_DUP_TRANS_ALL 0xff
-
-void image_dup_init(struct image_dup *dup, struct image *image, struct mempool *pool);
-int image_dup_compare(struct image_dup *dup1, struct image_dup *dup2, uns trans);
-
-#endif
-#define LOCAL_DEBUG
+#undef LOCAL_DEBUG
#include "sherlock/sherlock.h"
#include "lib/mempool.h"
#include "lang/lang.h"
#include "lib/base224.h"
#include "lib/bbuf.h"
+#include "lib/clists.h"
#include "images/images.h"
#include "images/image-obj.h"
#include "images/image-sig.h"
-#include "images/image-dup.h"
+#include "images/dup-cmp.h"
+#include "images/kd-tree.h"
+#include "images/color.h"
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
+static struct fastbuf *fb_cards;
+static struct fastbuf *fb_card_attrs;
+static struct buck2obj_buf *buck2obj;
+
/* This should happen in gatherer or scanner */
static void
generate_signatures(uns limit)
{
- struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY);
- struct fastbuf *fb_card_attrs = index_bopen("card-attrs", O_RDONLY);
+ fb_cards = index_bopen("cards", O_RDONLY);
+ fb_card_attrs = index_bopen("card-attrs", O_RDONLY);
struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
struct card_attr ca;
struct image_signature sig;
struct buck2obj_buf *bob = buck2obj_alloc();
uns count = 0;
- log(L_INFO, "Generating image signatures");
+ if (limit == ~0U)
+ log(L_INFO, "Generating image signatures");
+ else
+ log(L_INFO, "Generating at most %d image signatures", limit);
bputl(fb_signatures, 0);
imo_decompress_thumbnails_init();
die("Failed to read card");
if (attr = obj_find_attr(obj, 'N'))
{
+#ifdef LOCAL_DEBUG
byte *url = obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U');
DBG("Reading oid=%d url=%s", oid, url);
+#endif
struct image_obj imo;
imo_init(&imo, pool, obj);
if (imo_decompress_thumbnail(&imo))
if (sig.len)
bwrite(fb_signatures, sig.reg, sig.len * sizeof(struct image_region));
count++;
+ if (count % 10000 == 0)
+ log(L_DEBUG, "... passed %d images", count);
if (count >= limit)
break;
}
#define DBG_KD(x...) do{}while(0)
#endif
+static struct image_tree tree;
+static struct signature_record *records;
+static struct signature_record **precords;
+
static void
-build_search_tree(void)
+build_kd_tree(void)
{
log(L_INFO, "Building KD-tree");
struct fastbuf *fb_signatures = index_bopen("image-sig", O_RDONLY);
- struct image_tree tree;
tree.count = bgetl(fb_signatures);
ASSERT(tree.count < 0x80000000);
if (!tree.count)
else
{
DBG("Reading %d signatures", tree.count);
- struct signature_record *records = xmalloc(tree.count * sizeof(struct signature_record));
- struct signature_record **precords = xmalloc(tree.count * sizeof(void *));
+ records = xmalloc(tree.count * sizeof(struct signature_record));
+ precords = xmalloc(tree.count * sizeof(void *));
for (uns i = 0; i < tree.count; i++)
{
bread(fb_signatures, &records[i].oid, sizeof(oid_t));
tree.bbox.vec[0] = tree.bbox.vec[1] = records[0].vec;
}
bclose(fb_signatures);
-
+
for (tree.depth = 1; (uns)(2 << tree.depth) < tree.count; tree.depth++);
- DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth,
+ DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth,
stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1));
uns leaves_index = 1 << (tree.depth - 1);
tree.nodes = xmalloc_zero((1 << tree.depth) * sizeof(struct image_node));
tree.leaves = xmalloc_zero(tree.count * sizeof(struct image_leaf));
-
+
/* Initialize recursion */
struct stk {
struct image_bbox bbox;
for (uns i = 0; i < IMAGE_VEC_K; i++)
stk->bbox.vec[1].f[i] = tree.bbox.vec[1].f[i] - tree.bbox.vec[0].f[i];
uns entry_index = 0;
-
+
/* Main loop */
while (stk != stk_top)
{
stk - stk_top, stk->index, stk->count, stk->start - precords,
stk_print_image_vector(stk->bbox.vec + 0), stk_print_image_vector(stk->bbox.vec + 1));
ASSERT(stk->count);
-
+
/* Create leaf node */
if (stk->index >= leaves_index || stk->count < 2)
{
if (stk->bbox.vec[1].f[i])
{
uns value =
- (record->vec.f[i] - stk->bbox.vec[0].f[i]) *
+ (record->vec.f[i] - stk->bbox.vec[0].f[i]) *
((1 << bits) - 1) / stk->bbox.vec[1].f[i];
ASSERT(value < (uns)(1 << bits));
leaf->flags |= value;
}
}
if (!stk->count)
- leaf->flags |= IMAGE_LEAF_LAST;
- DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x",
+ leaf->flags |= IMAGE_LEAF_LAST;
+ DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x",
leaf->oid, stk_print_image_vector(&record->vec), leaf->flags);
}
stk--;
}
-
+
/* Create internal node */
else
{
/* Sort... FIXME: we only need the median */
build_search_tree_sort(stk->count, dim, stk->start);
-
+
/* Split in the middle */
uns index = stk->index;
stk[1].index = stk[0].index * 2;
bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf));
bclose(fb_tree);
- xfree(tree.leaves);
- xfree(tree.nodes);
- xfree(precords);
- xfree(records);
+ //xfree(tree.leaves);
+ //xfree(tree.nodes);
+ //xfree(precords);
+ //xfree(records);
+ }
+}
+
+/*********************************************************************************/
+
+static uns pass1_buf_size = 400 << 20;
+static uns pass1_max_count = 100000;
+static uns pass1_search_dist = 40;
+static uns pass1_search_count = 500;
+
+static struct mempool *pass1_pool;
+static byte *pass1_buf_start;
+static byte *pass1_buf_pos;
+static uns pass1_buf_free;
+static uns pass1_buf_used;
+static clist pass1_buf_list;
+static clist pass1_lru_list;
+static u64 pass1_lookups;
+static u64 pass1_reads;
+static u64 pass1_pairs;
+static u64 pass1_dups;
+static u64 pass1_shrinks;
+
+struct pass1_node {
+ cnode lru_node;
+ cnode buf_node;
+ uns buf_size;
+ byte *buf;
+ oid_t oid;
+ byte *url;
+ struct image image;
+ struct image_dup dup;
+};
+
+#define HASH_PREFIX(x) pass1_hash_##x
+#define HASH_NODE struct pass1_node
+#define HASH_KEY_ATOMIC oid
+#define HASH_WANT_CLEANUP
+#define HASH_WANT_FIND
+#define HASH_WANT_NEW
+#define HASH_WANT_REMOVE
+#include "lib/hashtable.h"
+
+static inline void
+pass1_buf_init(void)
+{
+ //DBG("pass1_buf_init()");
+ pass1_buf_free = pass1_buf_size;
+ pass1_buf_start = pass1_buf_pos = xmalloc(pass1_buf_size);
+ pass1_buf_used = 0;
+}
+
+static inline void
+pass1_buf_cleanup(void)
+{
+ //DBG("pass1_buf_cleanup()");
+ xfree(pass1_buf_start);
+}
+
+static void
+pass1_node_free(struct pass1_node *node)
+{
+ //DBG("pass1_node_free(%d)", (uns)node->oid);
+ if (node->buf_size)
+ {
+ pass1_buf_used -= node->buf_size;
+ clist_remove(&node->buf_node);
+ }
+ clist_remove(&node->lru_node);
+ pass1_hash_remove(node);
+}
+
+static inline void
+pass1_node_free_lru(void)
+{
+ ASSERT(!clist_empty(&pass1_lru_list));
+ pass1_node_free(SKIP_BACK(struct pass1_node, lru_node, clist_head(&pass1_lru_list)));
+}
+
+static inline void
+pass1_node_after_move(struct pass1_node *node, addr_int_t move)
+{
+ //DBG("pass1_node_after_mode(%d, %d)", (uns)node->oid, (uns)move);
+ /* adjust internal pointers */
+#define MOVE(x) x = (byte *)(x) - move
+ MOVE(node->url);
+ MOVE(node->image.pixels);
+ MOVE(node->dup.buf);
+#undef MOVE
+}
+
+static inline void
+pass1_buf_shrink(void)
+{
+ DBG("pass1_buf_shrink()");
+ pass1_shrinks++;
+ pass1_buf_free = pass1_buf_size;
+ pass1_buf_pos = pass1_buf_start;
+ CLIST_FOR_EACH(void *, p, pass1_buf_list)
+ {
+ struct pass1_node *node = SKIP_BACK(struct pass1_node, buf_node, p);
+ if (node->buf != pass1_buf_pos)
+ {
+ memmove(pass1_buf_pos, node->buf, node->buf_size);
+ pass1_node_after_move(node, node->buf - pass1_buf_pos);
+ node->buf = pass1_buf_pos;
+ }
+ pass1_buf_pos += node->buf_size;
+ pass1_buf_free -= node->buf_size;
}
}
+static void *
+pass1_buf_alloc(uns size)
+{
+ //DBG("pass1_buf_alloc(%d)", size);
+
+ /* if there is not enough free space at the end of the buffer */
+ if (size > pass1_buf_free)
+ {
+ /* free some lru nodes */
+ //DBG("freeing lru nodes");
+ while (size > pass1_buf_size - pass1_buf_used || pass1_buf_used * 2 > pass1_buf_size)
+ {
+ if (unlikely(clist_empty(&pass1_lru_list))) // FIXME
+ die("Buffer too small");
+ pass1_node_free_lru();
+ }
+
+ pass1_buf_shrink();
+ }
+
+ /* final allocation */
+ void *result = pass1_buf_pos;
+ pass1_buf_pos += size;
+ pass1_buf_free -= size;
+ pass1_buf_used += size;
+ return result;
+}
+
+static struct pass1_node *
+pass1_node_new(oid_t oid)
+{
+ DBG("pass1_node_new(%d)", (uns)oid);
+ if (pass1_hash_table.hash_count == pass1_max_count)
+ pass1_node_free_lru();
+ struct pass1_node *node = pass1_hash_new(oid);
+ mp_flush(pass1_pool);
+ pass1_reads++;
+
+ /* read object */
+ struct card_attr ca;
+ bsetpos(fb_card_attrs, (sh_off_t)oid * sizeof(ca)); /* FIXME: these seeks can be easily removed */
+ bread(fb_card_attrs, &ca, sizeof(ca));
+
+ bsetpos(fb_cards, (sh_off_t)ca.card << CARD_POS_SHIFT); /* FIXME: maybe a presort should handle these random seeks */
+ uns buck_len = bgetl(fb_cards) - (LIZARD_COMPRESS_HEADER - 1);
+ uns buck_type = bgetc(fb_cards) + BUCKET_TYPE_PLAIN;
+ struct odes *obj = obj_read_bucket(buck2obj, pass1_pool, buck_type, buck_len, fb_cards, NULL);
+ if (unlikely(!obj))
+ die("Failed to read card");
+ byte *url = obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U');
+ uns url_len = strlen(url);
+
+ /* decompress thumbnail */
+ struct image_obj imo;
+ imo_init(&imo, pass1_pool, obj);
+ if (unlikely(!imo_decompress_thumbnail(&imo)))
+ die("Cannot decompress thumbnail");
+ node->image = imo.thumb;
+
+ /* create duplicates comparision object */
+ image_dup_init(&node->dup, &node->image, pass1_pool);
+
+ /* copy data */
+ //DBG("loaded image %s s=%d d=%d", url, node->image.size, node->dup.buf_size);
+ node->buf_size = node->image.size + node->dup.buf_size + url_len + 1;
+ if (node->buf_size)
+ {
+ byte *buf = node->buf = pass1_buf_alloc(node->buf_size);
+ clist_add_tail(&pass1_buf_list, &node->buf_node);
+#define COPY(ptr, size) ({ void *_p=buf; uns _size=(size); buf+=_size; memcpy(_p,(ptr),_size); _p; })
+ node->url = COPY(url, url_len + 1);
+ node->image.pixels = COPY(node->image.pixels, node->image.size);
+ node->dup.buf = COPY(node->dup.buf, node->dup.buf_size);
+#undef COPY
+ }
+
+ /* add to lru list */
+ return node;
+}
+
+static inline struct pass1_node *
+pass1_node_lock(oid_t oid)
+{
+ DBG("pass1_node_lock(%d)", (uns)oid);
+ pass1_lookups++;
+ struct pass1_node *node = pass1_hash_find(oid);
+ if (node)
+ {
+ clist_remove(&node->lru_node);
+ return node;
+ }
+ else
+ return pass1_node_new(oid);
+}
+
+static inline void
+pass1_node_unlock(struct pass1_node *node)
+{
+ //DBG("pass1_node_unlock(%d)", (uns)node->oid);
+ clist_add_tail(&pass1_lru_list, &node->lru_node);
+}
+
+static void
+pass1(void)
+{
+ log(L_INFO, "Looking for duplicates");
+ ASSERT(tree.nodes);
+
+ /* initialization */
+ pass1_lookups = pass1_reads = pass1_pairs = pass1_dups = pass1_shrinks = 0;
+ fb_cards = bopen("index/cards", O_RDONLY, 10000); // FIXME
+ fb_card_attrs = bopen("index/card-attrs", O_RDONLY, sizeof(struct card_attr)); // FIXME
+ buck2obj = buck2obj_alloc();
+ imo_decompress_thumbnails_init();
+ clist_init(&pass1_lru_list);
+ clist_init(&pass1_buf_list);
+ pass1_hash_init();
+ pass1_buf_init();
+ pass1_pool = mp_new(1 << 20);
+
+ /* main loop */
+ for (uns i = 0; i < tree.count; )
+ {
+ /* lookup next image */
+ oid_t oid = tree.leaves[i].oid;
+ struct pass1_node *node = pass1_node_lock(oid);
+
+ /* compare with all near images */
+ struct image_search search;
+ image_search_init(&search, &tree, &precords[i]->vec, pass1_search_dist);
+ /* FIXME: can be faster than general search in KD-tree */
+ oid_t oid2;
+ uns dist;
+ for (uns j = 0; j < pass1_search_count && image_search_next(&search, &oid2, &dist); j++)
+ {
+ if (oid < oid2)
+ {
+ struct pass1_node *node2 = pass1_node_lock(oid2);
+ DBG("comparing %d and %d", oid, oid2);
+ if (image_dup_compare(&node->dup, &node2->dup, IMAGE_DUP_TRANS_ALL))
+ {
+ pass1_dups++;
+ log(L_DEBUG, "*** Found duplicates oid1=0x%x oid=0x%x", (uns)node->oid, (uns)node2->oid);
+ log(L_DEBUG, " %s", node->url);
+ log(L_DEBUG, " %s", node2->url);
+ }
+ pass1_pairs++;
+ pass1_node_unlock(node2);
+ }
+ }
+ image_search_done(&search);
+ pass1_node_unlock(node);
+ i++;
+ if (i % 1000 == 0)
+ log(L_DEBUG, "... passed %d images", i);
+ }
+
+ /* clean up */
+ pass1_hash_cleanup();
+ pass1_buf_cleanup();
+ mp_delete(pass1_pool);
+ bclose(fb_cards);
+ bclose(fb_card_attrs);
+ buck2obj_free(buck2obj);
+ imo_decompress_thumbnails_done();
+
+ /* print statistics */
+ log(L_INFO, "%d count, %Ld lookups, %Ld reads, %Ld pairs, %Ld dups, %Ld shrinks", tree.count,
+ (long long int)pass1_lookups, (long long int)pass1_reads,
+ (long long int)pass1_pairs, (long long int)pass1_dups, (long long int)pass1_shrinks);
+}
+
+/*********************************************************************************/
static char *shortopts = CF_SHORT_OPTS "";
static struct option longopts[] =
exit(1);
}
-
+
int
main(int argc UNUSED, char **argv)
{
int opt;
-
+
log_init(argv[0]);
while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
switch (opt)
if (optind != argc)
usage("Invalid usage");
- generate_signatures(~0U);
- build_search_tree();
-
+ srgb_to_luv_init();
+
+ generate_signatures(20000);
+ build_kd_tree();
+ pass1();
+
return 0;
}
case PNG_COLOR_TYPE_GRAY:
imo->thumb.flags |= IMAGE_GRAYSCALE;
png_set_gray_to_rgb(png_ptr);
- png_set_strip_alpha(png_ptr);
break;
case PNG_COLOR_TYPE_GRAY_ALPHA:
imo->thumb.flags |= IMAGE_GRAYSCALE;
png_set_gray_to_rgb(png_ptr);
+ png_set_strip_alpha(png_ptr);
break;
case PNG_COLOR_TYPE_RGB:
break;
/* Read image data */
DBG("Reading image data");
- byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, width * height * 3);
+ byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, imo->thumb.size = width * height * 3);
png_bytep rows[height];
for (uns i = 0; i < height; i++, pixels += width * 3)
rows[i] = (png_bytep)pixels;
jpeg_start_decompress(&cinfo);
ASSERT(imo->thumb.width == cinfo.output_width && imo->thumb.height == cinfo.output_height);
ASSERT(sizeof(JSAMPLE) == 1);
- byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, cinfo.output_width * cinfo.output_height * 3);
+ byte *pixels = imo->thumb.pixels = mp_alloc(imo->pool, imo->thumb.size = cinfo.output_width * cinfo.output_height * 3);
if (cinfo.out_color_space == JCS_RGB)
{ /* Read RGB pixels */
uns size = cinfo.output_width * 3;
PixelPacket *pixels = (PixelPacket *)AcquireImagePixels(image, 0, 0, image->columns, image->rows, &magick_exception);
ASSERT(pixels);
uns size = image->columns * image->rows;
- byte *p = imo->thumb.pixels = mp_alloc(imo->pool, size * 3);
+ byte *p = imo->thumb.pixels = mp_alloc(imo->pool, imo->thumb.size = size * 3);
for (uns i = 0; i < size; i++)
{
p[0] = pixels->red >> (QuantumDepth - 8);
+++ /dev/null
-#include "lib/heap.h"
-#include <alloca.h>
-
-#define IMAGE_SEARCH_DIST_UNLIMITED (~0U)
-
-/* FIXME: support full length of oid_t, currently must be <2^31 */
-#define IMAGE_SEARCH_ITEM_TYPE 0x80000000U
-struct image_search_item {
- u32 dist;
- u32 index;
- struct image_bbox bbox;
-};
-
-#define IMAGE_SEARCH_CMP(x,y) (is->buf[x].dist < is->buf[y].dist)
-
-struct image_search {
- struct image_tree *tree;
- struct image_node *nodes;
- struct image_leaf *leaves;
- struct image_vector query;
- struct image_search_item *buf;
- u32 *heap;
- uns count, visited, size, max_dist;
-};
-
-#define SQR(x) ((x)*(x))
-
-static void
-image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist)
-{
- // FIXME: empty tree
- is->tree = tree;
- is->nodes = tree->nodes;
- is->leaves = tree->leaves;
- is->query = *query;
- is->max_dist = max_dist;
- is->size = 0x1000;
- is->buf = xmalloc((is->size + 1) * sizeof(struct image_search_item));
- is->heap = xmalloc((is->size + 1) * sizeof(u32));
- is->visited = is->count = 1;
- is->heap[1] = 1;
- struct image_search_item *item = is->buf + 1;
- item->index = 1;
- item->bbox = tree->bbox;
- item->dist = 0;
- for (uns i = 0; i < IMAGE_VEC_K; i++)
- {
- if (query->f[i] < item->bbox.vec[0].f[i])
- item->dist += SQR(item->bbox.vec[0].f[i] - query->f[i]);
- else if (query->f[i] > item->bbox.vec[1].f[i])
- item->dist += SQR(query->f[i] - item->bbox.vec[0].f[i]);
- else
- {
- item->dist = 0;
- break;
- }
- }
-}
-
-static void
-image_search_done(struct image_search *is)
-{
- xfree(is->buf);
- xfree(is->heap);
-}
-
-static void
-image_search_grow_slow(struct image_search *is)
-{
- is->size *= 2;
- is->buf = xrealloc(is->buf, (is->size + 1) * sizeof(struct image_search_item));
- is->heap = xrealloc(is->heap, (is->size + 1) * sizeof(u32));
-}
-
-static inline struct image_search_item *
-image_search_grow(struct image_search *is)
-{
- if (is->count == is->visited)
- {
- if (is->count == is->size)
- image_search_grow_slow(is);
- is->visited++;
- is->heap[is->visited] = is->visited;
- }
- return is->buf + is->heap[++is->count];
-}
-
-static inline uns
-image_search_leaf_dist(struct image_search *is, struct image_bbox *bbox, struct image_leaf *leaf)
-{
- uns dist = 0;
- uns flags = leaf->flags;
- for (uns i = 0; i < IMAGE_VEC_K; i++)
- {
- uns bits = IMAGE_LEAF_BITS(i);
- uns mask = (1 << bits) - 1;
- uns value = flags & mask;
- flags >>= bits;
- int dif = bbox->vec[0].f[i] + (bbox->vec[1].f[i] - bbox->vec[0].f[i]) * value / ((1 << bits) - 1) - is->query.f[i];
- dist += dif * dif;
- }
- return dist;
-}
-
-static int
-image_search_next(struct image_search *is, oid_t *oid, uns *dist)
-{
- while (likely(is->count))
- {
- struct image_search_item *item = is->buf + is->heap[1];
- DBG("Main loop... dist=%d count=%d visited=%d size=%d index=0x%08x bbox=[(%s),(%s)]",
- item->dist, is->count, is->visited, is->size, item->index,
- stk_print_image_vector(&item->bbox.vec[0]), stk_print_image_vector(&item->bbox.vec[1]));
- if (unlikely(item->dist > is->max_dist))
- {
- DBG("Maximum distance reached");
- return 0;
- }
-
- /* Expand leaf */
- if (item->index & IMAGE_SEARCH_ITEM_TYPE)
- {
- *oid = item->index & ~IMAGE_SEARCH_ITEM_TYPE;
- *dist = item->dist;
- DBG("Found item %d at distance %d", *oid, *dist);
- HEAP_DELMIN(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
- return 1;
- }
-
- /* Expand node with leaves */
- else if (is->nodes[item->index].val & IMAGE_NODE_LEAF)
- {
- DBG("Expanding node to list of leaves");
- struct image_leaf *leaf = is->leaves + (is->nodes[item->index].val & ~IMAGE_NODE_LEAF);
- item->dist = image_search_leaf_dist(is, &item->bbox, leaf);
- item->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
- HEAP_INCREASE(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP, 1);
- while (!((leaf++)->flags & IMAGE_LEAF_LAST))
- {
- struct image_search_item *nitem = image_search_grow(is);
- nitem->dist = image_search_leaf_dist(is, &item->bbox, leaf);
- nitem->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
- HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
- }
- }
-
- /* Expand internal node */
- else
- {
- DBG("Expanding internal node");
- struct image_search_item *nitem = image_search_grow(is);
- uns dim = is->nodes[item->index].val & IMAGE_NODE_DIM;
- uns pivot = is->nodes[item->index].val >> 8;
- item->index *= 2;
- nitem->bbox = item->bbox;
- nitem->dist = item->dist;
- uns query = is->query.f[dim];
- int dif = query - pivot;
- if (dif > 0)
- {
- nitem->index = item->index++;
- item->bbox.vec[0].f[dim] = pivot;
- nitem->bbox.vec[1].f[dim] = pivot;
- if (query > item->bbox.vec[1].f[dim])
- nitem->dist -= SQR(query - item->bbox.vec[1].f[dim]);
- }
- else
- {
- nitem->index = item->index + 1;
- item->bbox.vec[1].f[dim] = pivot;
- nitem->bbox.vec[0].f[dim] = pivot;
- if (query < item->bbox.vec[0].f[dim])
- nitem->dist -= SQR(item->bbox.vec[0].f[dim] - query);
- }
- nitem->dist += SQR(dif);
- HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
- }
- }
- DBG("Heap is empty");
- return 0;
-}
-
-#define LOCAL_DEBUG
+#undef LOCAL_DEBUG
#include "sherlock/sherlock.h"
#include "lib/math.h"
#include "images/images.h"
#include "images/image-obj.h"
#include "images/image-sig.h"
+#include "images/color.h"
#include <alloca.h>
-#include <magick/api.h>
-
-/*
- * Color spaces
- *
- * http://www.tecgraf.puc-rio.br/~mgattass/color/ColorIndex.html
- *
- */
-
-#define REF_WHITE_X 0.96422
-#define REF_WHITE_Y 1.
-#define REF_WHITE_Z 0.82521
-
-/* sRGB to XYZ */
-static void
-srgb_to_xyz_slow(double srgb[3], double xyz[3])
-{
- double a[3];
- for (uns i = 0; i < 3; i++)
- if (srgb[i] > 0.04045)
- a[i] = pow((srgb[i] + 0.055) * (1 / 1.055), 2.4);
- else
- a[i] = srgb[i] * (1 / 12.92);
- xyz[0] = 0.412424 * a[0] + 0.357579 * a[1] + 0.180464 * a[2];
- xyz[1] = 0.212656 * a[0] + 0.715158 * a[1] + 0.072186 * a[2];
- xyz[2] = 0.019332 * a[0] + 0.119193 * a[1] + 0.950444 * a[2];
-}
-
-/* XYZ to CIE-Luv */
-static void
-xyz_to_luv_slow(double xyz[3], double luv[3])
-{
- double sum = xyz[0] + 15 * xyz[1] + 3 * xyz[2];
- if (sum < 0.000001)
- luv[0] = luv[1] = luv[2] = 0;
- else
- {
- double var_u = 4 * xyz[0] / sum;
- double var_v = 9 * xyz[1] / sum;
- if (xyz[1] > 0.008856)
- luv[0] = 116 * pow(xyz[1], 1 / 3.) - 16;
- else
- luv[0] = (116 * 7.787) * xyz[1];
- luv[1] = luv[0] * (13 * (var_u - 4 * REF_WHITE_X / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)));
- luv[2] = luv[0] * (13 * (var_v - 9 * REF_WHITE_Y / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)));
- /* intervals [0..100], [-134..220], [-140..122] */
- }
-}
struct block {
uns l, u, v; /* average Luv coefficients */
for (uns y = 0; y < 4; y++, p += 3 * (width - 4))
for (uns x = 0; x < 4; x++, p += 3)
{
- double rgb[3], luv[3], xyz[3];
- rgb[0] = p[0] / 255.;
- rgb[1] = p[1] / 255.;
- rgb[2] = p[2] / 255.;
- srgb_to_xyz_slow(rgb, xyz);
- xyz_to_luv_slow(xyz, luv);
+ byte luv[3];
+ srgb_to_luv_pixel(luv, p);
l_sum += *tp++ = luv[0];
- u_sum += luv[1] + 150;
- v_sum += luv[2] + 150;
+ u_sum += luv[1];
+ v_sum += luv[2];
}
- block->l = l_sum;
- block->u = u_sum;
- block->v = v_sum;
+ block->l = (l_sum >> 4);
+ block->u = (u_sum >> 4);
+ block->v = (v_sum >> 4);
/* Apply Daubechies wavelet transformation
* FIXME:
}
/* Extract energies in LH, HL and HH bands */
- block->lh = sqrt(t[8] * t[8] + t[9] * t[9] + t[12] * t[12] + t[13] * t[13]);
- block->hl = sqrt(t[2] * t[2] + t[3] * t[3] + t[6] * t[6] + t[7] * t[7]);
- block->hh = sqrt(t[10] * t[10] + t[11] * t[11] + t[14] * t[14] + t[15] * t[15]);
+ block->lh = CLAMP((int)(sqrt(t[8] * t[8] + t[9] * t[9] + t[12] * t[12] + t[13] * t[13]) / 16), 0, 255);
+ block->hl = CLAMP((int)(sqrt(t[2] * t[2] + t[3] * t[3] + t[6] * t[6] + t[7] * t[7]) / 16), 0, 255);
+ block->hh = CLAMP((int)(sqrt(t[10] * t[10] + t[11] * t[11] + t[14] * t[14] + t[15] * t[15]) / 16), 0, 255);
}
/* FIXME: simple average is for testing pusposes only */
#define IMAGE_REG_K 9
#define IMAGE_REG_MAX 4
-typedef u16 image_feature_t; /* 8 or 16 bits precision */
+typedef byte image_feature_t; /* 8 or 16 bits precision */
/* K-dimensional feature vector */
struct image_vector {
#include "lib/fastbuf.h"
#include "images/images.h"
#include "images/image-sig.h"
-#include "images/image-search.h"
+#include "images/kd-tree.h"
#include "sherlock/index.h"
#include "lib/mempool.h"
#include "sherlock/object.h"
#include <stdio.h>
#include <fcntl.h>
+#include <alloca.h>
#define BEST_CNT 30
uns flags; /* enum image_flag */
uns width; /* number of columns */
uns height; /* number of rows */
+ uns size; /* buffer size in bytes */
byte *pixels; /* RGB */
};
+enum image_format {
+ IMAGE_FORMAT_UNDEFINED = 0,
+ IMAGE_FORMAT_JPEG,
+ IMAGE_FORMAT_PNG,
+ IMAGE_FORMAT_GIF
+};
+
+struct image_info {
+ uns width;
+ uns height;
+ enum image_format format;
+ union {
+ struct {
+ } jpeg;
+ struct {
+ } png;
+ struct {
+ } gif;
+ };
+};
+
+int read_image_header(struct image_info *info);
+int read_image_data(struct image_info *info);
+
#endif
--- /dev/null
+#undef LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/heap.h"
+#include "images/images.h"
+#include "images/image-sig.h"
+#include "images/kd-tree.h"
+
+#include <alloca.h>
+
+#define SQR(x) ((x)*(x))
+#define IMAGE_SEARCH_CMP(x,y) (is->buf[x].dist < is->buf[y].dist)
+
+void
+image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist)
+{
+ // FIXME: empty tree
+ is->tree = tree;
+ is->nodes = tree->nodes;
+ is->leaves = tree->leaves;
+ is->query = *query;
+ is->max_dist = max_dist;
+ is->size = 0x1000;
+ is->buf = xmalloc((is->size + 1) * sizeof(struct image_search_item));
+ is->heap = xmalloc((is->size + 1) * sizeof(u32));
+ is->visited = is->count = 1;
+ is->heap[1] = 1;
+ struct image_search_item *item = is->buf + 1;
+ item->index = 1;
+ item->bbox = tree->bbox;
+ item->dist = 0;
+ for (uns i = 0; i < IMAGE_VEC_K; i++)
+ {
+ if (query->f[i] < item->bbox.vec[0].f[i])
+ item->dist += SQR(item->bbox.vec[0].f[i] - query->f[i]);
+ else if (query->f[i] > item->bbox.vec[1].f[i])
+ item->dist += SQR(query->f[i] - item->bbox.vec[0].f[i]);
+ else
+ {
+ item->dist = 0;
+ break;
+ }
+ }
+}
+
+void
+image_search_done(struct image_search *is)
+{
+ xfree(is->buf);
+ xfree(is->heap);
+}
+
+static void
+image_search_grow_slow(struct image_search *is)
+{
+ is->size *= 2;
+ is->buf = xrealloc(is->buf, (is->size + 1) * sizeof(struct image_search_item));
+ is->heap = xrealloc(is->heap, (is->size + 1) * sizeof(u32));
+}
+
+static inline struct image_search_item *
+image_search_grow(struct image_search *is)
+{
+ if (is->count == is->visited)
+ {
+ if (is->count == is->size)
+ image_search_grow_slow(is);
+ is->visited++;
+ is->heap[is->visited] = is->visited;
+ }
+ return is->buf + is->heap[++is->count];
+}
+
+static inline uns
+image_search_leaf_dist(struct image_search *is, struct image_bbox *bbox, struct image_leaf *leaf)
+{
+ uns dist = 0;
+ uns flags = leaf->flags;
+ for (uns i = 0; i < IMAGE_VEC_K; i++)
+ {
+ uns bits = IMAGE_LEAF_BITS(i);
+ uns mask = (1 << bits) - 1;
+ uns value = flags & mask;
+ flags >>= bits;
+ int dif = bbox->vec[0].f[i] + (bbox->vec[1].f[i] - bbox->vec[0].f[i]) * value / ((1 << bits) - 1) - is->query.f[i];
+ dist += dif * dif;
+ }
+ return dist;
+}
+
+int
+image_search_next(struct image_search *is, oid_t *oid, uns *dist)
+{
+ while (likely(is->count))
+ {
+ struct image_search_item *item = is->buf + is->heap[1];
+ DBG("Main loop... dist=%d count=%d visited=%d size=%d index=0x%08x bbox=[(%s),(%s)]",
+ item->dist, is->count, is->visited, is->size, item->index,
+ stk_print_image_vector(&item->bbox.vec[0]), stk_print_image_vector(&item->bbox.vec[1]));
+ if (unlikely(item->dist > is->max_dist))
+ {
+ DBG("Maximum distance reached");
+ return 0;
+ }
+
+ /* Expand leaf */
+ if (item->index & IMAGE_SEARCH_ITEM_TYPE)
+ {
+ *oid = item->index & ~IMAGE_SEARCH_ITEM_TYPE;
+ *dist = item->dist;
+ DBG("Found item %d at distance %d", *oid, *dist);
+ HEAP_DELMIN(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+ return 1;
+ }
+
+ /* Expand node with leaves */
+ else if (is->nodes[item->index].val & IMAGE_NODE_LEAF)
+ {
+ DBG("Expanding node to list of leaves");
+ struct image_leaf *leaf = is->leaves + (is->nodes[item->index].val & ~IMAGE_NODE_LEAF);
+ item->dist = image_search_leaf_dist(is, &item->bbox, leaf);
+ item->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
+ HEAP_INCREASE(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP, 1);
+ while (!((leaf++)->flags & IMAGE_LEAF_LAST))
+ {
+ struct image_search_item *nitem = image_search_grow(is);
+ nitem->dist = image_search_leaf_dist(is, &item->bbox, leaf);
+ nitem->index = IMAGE_SEARCH_ITEM_TYPE | leaf->oid;
+ HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+ }
+ }
+
+ /* Expand internal node */
+ else
+ {
+ DBG("Expanding internal node");
+ struct image_search_item *nitem = image_search_grow(is);
+ uns dim = is->nodes[item->index].val & IMAGE_NODE_DIM;
+ uns pivot = is->nodes[item->index].val >> 8;
+ item->index *= 2;
+ nitem->bbox = item->bbox;
+ nitem->dist = item->dist;
+ uns query = is->query.f[dim];
+ int dif = query - pivot;
+ if (dif > 0)
+ {
+ nitem->index = item->index++;
+ item->bbox.vec[0].f[dim] = pivot;
+ nitem->bbox.vec[1].f[dim] = pivot;
+ if (query > item->bbox.vec[1].f[dim])
+ nitem->dist -= SQR(query - item->bbox.vec[1].f[dim]);
+ }
+ else
+ {
+ nitem->index = item->index + 1;
+ item->bbox.vec[1].f[dim] = pivot;
+ nitem->bbox.vec[0].f[dim] = pivot;
+ if (query < item->bbox.vec[0].f[dim])
+ nitem->dist -= SQR(item->bbox.vec[0].f[dim] - query);
+ }
+ nitem->dist += SQR(dif);
+ HEAP_INSERT(u32, is->heap, is->count, IMAGE_SEARCH_CMP, HEAP_SWAP);
+ }
+ }
+ DBG("Heap is empty");
+ return 0;
+}
+
--- /dev/null
+#ifndef _IMAGES_KD_TREE_H
+#define _IMAGES_KD_TREE_H
+
+#define IMAGE_SEARCH_DIST_UNLIMITED (~0U)
+
+/* FIXME: support full length of oid_t, currently must be <2^31 */
+#define IMAGE_SEARCH_ITEM_TYPE 0x80000000U
+struct image_search_item {
+ u32 dist;
+ u32 index;
+ struct image_bbox bbox;
+};
+
+struct image_search {
+ struct image_tree *tree;
+ struct image_node *nodes;
+ struct image_leaf *leaves;
+ struct image_vector query;
+ struct image_search_item *buf;
+ u32 *heap;
+ uns count, visited, size, max_dist;
+};
+
+void image_search_init(struct image_search *is, struct image_tree *tree, struct image_vector *query, uns max_dist);
+void image_search_done(struct image_search *is);
+int image_search_next(struct image_search *is, oid_t *oid, uns *dist);
+
+#endif