--- /dev/null
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/mempool.h"
+#include "lib/conf.h"
+#include "lib/fastbuf.h"
+#include "lib/chartype.h"
+#include "sherlock/object.h"
+#include "lib/url.h"
+#include "lib/unicode.h"
+#include "sherlock/lizard-fb.h"
+#include "sherlock/tagged-text.h"
+#include "charset/charconv.h"
+#include "charset/unicat.h"
+#include "charset/fb-charconv.h"
+#include "indexer/indexer.h"
+#include "indexer/lexicon.h"
+#include "indexer/params.h"
+#include "utils/dumpconfig.h"
+#include "lang/lang.h"
+#include "lib/base224.h"
+#include "lib/bbuf.h"
+
+#include "images/images.h"
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+
+/* This should happen in gatherer or scanner */
+static void
+generate_signatures(uns limit)
+{
+ struct fastbuf *cards = index_bopen("cards", O_RDONLY);
+ struct fastbuf *card_attrs = index_bopen("card-attrs", O_RDONLY);
+ struct fastbuf *signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
+ struct card_attr ca;
+ struct image_signature sig;
+ struct mempool *pool = mp_new(1 << 16);
+ struct buck2obj_buf *bob = buck2obj_alloc();
+ oid_t oid = 0;
+
+ DBG("Generating signatures");
+
+ for (; bread(card_attrs, &ca, sizeof(ca)); oid++)
+ if ((uns)((ca.type_flags >> 4) - 8) < 4)
+ {
+ bsetpos(cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
+ uns buck_len = bgetl(cards)-(LIZARD_COMPRESS_HEADER-1);
+ uns buck_type = bgetc(cards) + BUCKET_TYPE_PLAIN;
+ mp_flush(pool);
+ struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, cards, NULL);
+ struct oattr *attr;
+ if (!obj)
+ die("Failed to read card");
+ if (attr = obj_find_attr(obj, 'N'))
+ {
+ DBG("Reading oid=%d url=%s", oid, obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'));
+ bb_t buf;
+ uns buf_len = 0;
+ bb_init(&buf);
+ for (; attr; attr = attr->same)
+ {
+ uns len = strlen(attr->val);
+ bb_grow(&buf, buf_len + len);
+ memcpy(buf.ptr + buf_len, attr->val, len);
+ buf_len += len;
+ }
+ byte thumb[buf_len];
+ uns thumb_len = base224_decode(thumb, buf.ptr, buf_len);
+
+ int err = compute_image_signature(thumb, thumb_len, &sig);
+ if (!err)
+ {
+ bputl(signatures, oid);
+ bwrite(signatures, &sig, sizeof(sig));
+ if (!--limit)
+ break;
+ }
+ else
+ DBG("Cannot create signature, error=%d", err);
+
+ bb_done(&buf);
+ }
+ }
+
+ buck2obj_free(bob);
+ mp_delete(pool);
+ bclose(cards);
+ bclose(card_attrs);
+ bclose(signatures);
+}
+
+static char *shortopts = CF_SHORT_OPTS "";
+static struct option longopts[] =
+{
+ CF_LONG_OPTS
+ { NULL, 0, 0, 0 }
+};
+
+static char *help = "\
+Usage: image-indexer [<options>]\n\
+\n\
+Options:\n" CF_USAGE;
+
+static void NONRET
+usage(byte *msg)
+{
+ if (msg)
+ {
+ fputs(msg, stderr);
+ fputc('\n', stderr);
+ }
+ fputs(help, stderr);
+ exit(1);
+}
+
+
+int
+main(int argc UNUSED, char **argv)
+{
+ int opt;
+
+ log_init(argv[0]);
+ while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
+ switch (opt)
+ {
+ default:
+ usage("Invalid option");
+ }
+ if (optind != argc)
+ usage("Invalid usage");
+
+ generate_signatures(~0U);
+
+ return 0;
+}
--- /dev/null
+#define LOCAL_DEBUG
+
+#include "sherlock/sherlock.h"
+#include "lib/math.h"
+#include "lib/fastbuf.h"
+#include "images/images.h"
+
+/*
+ * Color spaces
+ *
+ * http://www.tecgraf.puc-rio.br/~mgattass/color/ColorIndex.html
+ *
+ */
+
+#define REF_WHITE_X 0.96422
+#define REF_WHITE_Y 1.
+#define REF_WHITE_Z 0.82521
+
+/* sRGB to XYZ */
+static void
+srgb_to_xyz_slow(double srgb[3], double xyz[3])
+{
+ double a[3];
+ for (uns i = 0; i < 3; i++)
+ if (srgb[i] > 0.04045)
+ a[i] = pow((srgb[i] + 0.055) * (1 / 1.055), 2.4);
+ else
+ a[i] = srgb[i] * (1 / 12.92);
+ xyz[0] = 0.412424 * a[0] + 0.357579 * a[1] + 0.180464 * a[2];
+ xyz[1] = 0.212656 * a[0] + 0.715158 * a[1] + 0.072186 * a[2];
+ xyz[2] = 0.019332 * a[0] + 0.119193 * a[1] + 0.950444 * a[2];
+}
+
+/* XYZ to CIE-Luv */
+static void
+xyz_to_luv_slow(double xyz[3], double luv[3])
+{
+ double sum = xyz[0] + 15 * xyz[1] + 3 * xyz[2];
+ if (sum < 0.000001)
+ luv[0] = luv[1] = luv[2] = 0;
+ else
+ {
+ double var_u = 4 * xyz[0] / sum;
+ double var_v = 9 * xyz[1] / sum;
+ if (xyz[1] > 0.008856)
+ luv[0] = 116 * pow(xyz[1], 1 / 3.) - 16;
+ else
+ luv[0] = (116 * 7.787) * xyz[1];
+ luv[1] = luv[0] * (13 * (var_u - 4 * REF_WHITE_X / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)));
+ luv[2] = luv[0] * (13 * (var_v - 9 * REF_WHITE_Y / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)));
+ /* intervals [0..100], [-134..220], [-140..122] */
+ }
+}
+
+struct block {
+ uns l, u, v; /* average Luv coefficients */
+ uns lh, hl, hh; /* energies in Daubechies wavelet bands */
+};
+
+static void
+compute_image_area_signature(PixelPacket *pixels, uns width, uns height, struct image_signature *sig)
+{
+ ASSERT(width >= 4 && height >= 4);
+
+ uns w = width >> 2;
+ uns h = height >> 2;
+ DBG("Computing signature for image %dx%d... %dx%d blocks", width, height, w, h);
+ uns blocks_count = w * h;
+ struct block *blocks = xmalloc(blocks_count * sizeof(struct block)), *block = blocks; /* FIXME: use mempool */
+
+ /* Every 4x4 block (FIXME: deal with smaller blocks near the edges) */
+ PixelPacket *p = pixels;
+ for (uns block_y = 0; block_y < h; block_y++, p += width & 3 + width * 3)
+ for (uns block_x = 0; block_x < w; block_x++, p += 4 - 4 * width, block++)
+ {
+ int t[16], s[16], *tp = t;
+
+ /* Convert pixels to Luv color space and compute average coefficients
+ * FIXME:
+ * - could be MUCH faster with precomputed tables and integer arithmetic...
+ * I will propably use interpolation in 3-dim array */
+ uns l_sum = 0;
+ uns u_sum = 0;
+ uns v_sum = 0;
+ for (uns y = 0; y < 4; y++, p += width - 4)
+ for (uns x = 0; x < 4; x++, p++)
+ {
+ double rgb[3], luv[3], xyz[3];
+ rgb[0] = (p->red >> (QuantumDepth - 8)) / 255.;
+ rgb[1] = (p->green >> (QuantumDepth - 8)) / 255.;
+ rgb[2] = (p->blue >> (QuantumDepth - 8)) / 255.;
+ srgb_to_xyz_slow(rgb, xyz);
+ xyz_to_luv_slow(xyz, luv);
+ l_sum += *tp++ = luv[0];
+ u_sum += luv[1] + 150;
+ v_sum += luv[2] + 150;
+ }
+
+ block->l = l_sum;
+ block->u = u_sum;
+ block->v = v_sum;
+
+ /* Apply Daubechies wavelet transformation
+ * FIXME:
+ * - MMX/SSE instructions or tables could be faster
+ * - maybe it would be better to compute Luv and wavelet separately because of processor cache or MMX/SSE
+ * - eliminate slow square roots
+ * - what about Haar transformation? */
+
+#define DAUB_0 31651 /* (1 + sqrt 3) / (4 * sqrt 2) */
+#define DAUB_1 54822 /* (3 + sqrt 3) / (4 * sqrt 2) */
+#define DAUB_2 14689 /* (3 - sqrt 3) / (4 * sqrt 2) */
+#define DAUB_3 -8481 /* (1 - sqrt 3) / (4 * sqrt 2) */
+
+ /* ... to the rows */
+ uns i;
+ for (i = 0; i < 16; i += 4)
+ {
+ s[i + 0] = (DAUB_0 * t[i + 2] + DAUB_1 * t[i + 3] + DAUB_2 * t[i + 0] + DAUB_3 * t[i + 1]) / 0x10000;
+ s[i + 1] = (DAUB_0 * t[i + 0] + DAUB_1 * t[i + 1] + DAUB_2 * t[i + 2] + DAUB_3 * t[i + 3]) / 0x10000;
+ s[i + 2] = (DAUB_3 * t[i + 2] - DAUB_2 * t[i + 3] + DAUB_1 * t[i + 0] - DAUB_0 * t[i + 1]) / 0x10000;
+ s[i + 3] = (DAUB_3 * t[i + 0] - DAUB_2 * t[i + 1] + DAUB_1 * t[i + 2] - DAUB_0 * t[i + 3]) / 0x10000;
+ }
+
+ /* ... and to the columns... skip LL band */
+ for (i = 0; i < 2; i++)
+ {
+ t[i + 8] = (DAUB_3 * s[i + 8] - DAUB_2 * s[i +12] + DAUB_1 * s[i + 0] - DAUB_0 * s[i + 4]) / 0x1000;
+ t[i +12] = (DAUB_3 * s[i + 0] - DAUB_2 * s[i + 4] + DAUB_1 * s[i + 8] - DAUB_0 * s[i +12]) / 0x1000;
+ }
+ for (; i < 4; i++)
+ {
+ t[i + 0] = (DAUB_0 * s[i + 8] + DAUB_1 * s[i +12] + DAUB_2 * s[i + 0] + DAUB_3 * s[i + 4]) / 0x1000;
+ t[i + 4] = (DAUB_0 * s[i + 0] + DAUB_1 * s[i + 4] + DAUB_2 * s[i + 8] + DAUB_3 * s[i +12]) / 0x1000;
+ t[i + 8] = (DAUB_3 * s[i + 8] - DAUB_2 * s[i +12] + DAUB_1 * s[i + 0] - DAUB_0 * s[i + 4]) / 0x1000;
+ t[i +12] = (DAUB_3 * s[i + 0] - DAUB_2 * s[i + 4] + DAUB_1 * s[i + 8] - DAUB_0 * s[i +12]) / 0x1000;
+ }
+
+ /* Extract energies in LH, HL and HH bands */
+ block->lh = sqrt(t[8] * t[8] + t[9] * t[9] + t[12] * t[12] + t[13] * t[13]);
+ block->hl = sqrt(t[2] * t[2] + t[3] * t[3] + t[6] * t[6] + t[7] * t[7]);
+ block->hh = sqrt(t[10] * t[10] + t[11] * t[11] + t[14] * t[14] + t[15] * t[15]);
+ }
+
+ /* FIXME: simple average is for testing pusposes only */
+ uns l_sum = 0;
+ uns u_sum = 0;
+ uns v_sum = 0;
+ uns lh_sum = 0;
+ uns hl_sum = 0;
+ uns hh_sum = 0;
+ for (uns i = 0; i < blocks_count; i++)
+ {
+ l_sum += blocks[i].l;
+ u_sum += blocks[i].u;
+ v_sum += blocks[i].v;
+ lh_sum += blocks[i].lh;
+ hl_sum += blocks[i].hl;
+ hh_sum += blocks[i].hh;
+ }
+
+ sig->vec[0] = l_sum / blocks_count;
+ sig->vec[1] = u_sum / blocks_count;
+ sig->vec[2] = v_sum / blocks_count;
+ sig->vec[3] = lh_sum / blocks_count;
+ sig->vec[4] = hl_sum / blocks_count;
+ sig->vec[5] = hh_sum / blocks_count;
+
+ xfree(blocks);
+
+ DBG("Resulting signature is (%d, %d, %d, %d, %d, %d)", sig->vec[0], sig->vec[1], sig->vec[2], sig->vec[3], sig->vec[4], sig->vec[5]);
+}
+
+int
+compute_image_signature(void *data, uns len, struct image_signature *sig)
+{
+ int retval = 0;
+
+ InitializeMagick(NULL); /* FIXME: call only once */
+ ExceptionInfo exception;
+ GetExceptionInfo(&exception);
+ ImageInfo *image_info = CloneImageInfo(NULL);
+ image_info->subrange = 1;
+
+ DBG("Decoding");
+ Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... most of the time :-/ */
+ if (!image)
+ die("Invalid image format");
+ if (image->columns < 4 || image->rows < 4)
+ {
+ DBG("Image too small (%dx%d)", (int)image->columns, (int)image->rows);
+ retval = -1;
+ goto exit;
+ }
+
+ QuantizeInfo quantize_info;
+ GetQuantizeInfo(&quantize_info);
+ quantize_info.colorspace = RGBColorspace;
+ QuantizeImage(&quantize_info, image);
+
+ PixelPacket *pixels = (PixelPacket *) AcquireImagePixels(image, 0, 0, image->columns, image->rows, &exception);
+
+ compute_image_area_signature(pixels, image->columns, image->rows, sig);
+
+exit:
+ DestroyImage(image);
+ DestroyImageInfo(image_info);
+ DestroyExceptionInfo(&exception);
+ DestroyMagick();
+ return retval;
+}
+