+++ /dev/null
-#define LOCAL_DEBUG
-
-#include "sherlock/sherlock.h"
-#include "lib/mempool.h"
-#include "lib/conf.h"
-#include "lib/fastbuf.h"
-#include "lib/chartype.h"
-#include "sherlock/object.h"
-#include "lib/url.h"
-#include "lib/unicode.h"
-#include "sherlock/lizard-fb.h"
-#include "sherlock/tagged-text.h"
-#include "charset/charconv.h"
-#include "charset/unicat.h"
-#include "charset/fb-charconv.h"
-#include "indexer/indexer.h"
-#include "indexer/lexicon.h"
-#include "indexer/params.h"
-#include "utils/dumpconfig.h"
-#include "lang/lang.h"
-#include "lib/base224.h"
-#include "lib/bbuf.h"
-
-#include "images/images.h"
-
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-
-/* This should happen in gatherer or scanner */
-static void
-generate_signatures(uns limit)
-{
- struct fastbuf *cards = index_bopen("cards", O_RDONLY);
- struct fastbuf *card_attrs = index_bopen("card-attrs", O_RDONLY);
- struct fastbuf *signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
- struct card_attr ca;
- struct image_signature sig;
- struct mempool *pool = mp_new(1 << 16);
- struct buck2obj_buf *bob = buck2obj_alloc();
- oid_t oid = 0;
-
- DBG("Generating signatures");
-
- for (; bread(card_attrs, &ca, sizeof(ca)); oid++)
- if ((uns)((ca.type_flags >> 4) - 8) < 4)
- {
- bsetpos(cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
- uns buck_len = bgetl(cards)-(LIZARD_COMPRESS_HEADER-1);
- uns buck_type = bgetc(cards) + BUCKET_TYPE_PLAIN;
- mp_flush(pool);
- struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, cards, NULL);
- struct oattr *attr;
- if (!obj)
- die("Failed to read card");
- if (attr = obj_find_attr(obj, 'N'))
- {
- DBG("Reading oid=%d url=%s", oid, obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'));
- bb_t buf;
- uns buf_len = 0;
- bb_init(&buf);
- for (; attr; attr = attr->same)
- {
- uns len = strlen(attr->val);
- bb_grow(&buf, buf_len + len);
- memcpy(buf.ptr + buf_len, attr->val, len);
- buf_len += len;
- }
- byte thumb[buf_len];
- uns thumb_len = base224_decode(thumb, buf.ptr, buf_len);
-
- int err = compute_image_signature(thumb, thumb_len, &sig);
- if (!err)
- {
- bputl(signatures, oid);
- bwrite(signatures, &sig, sizeof(sig));
- if (!--limit)
- break;
- }
- else
- DBG("Cannot create signature, error=%d", err);
-
- bb_done(&buf);
- }
- }
-
- buck2obj_free(bob);
- mp_delete(pool);
- bclose(cards);
- bclose(card_attrs);
- bclose(signatures);
-}
-
-static char *shortopts = CF_SHORT_OPTS "";
-static struct option longopts[] =
-{
- CF_LONG_OPTS
- { NULL, 0, 0, 0 }
-};
-
-static char *help = "\
-Usage: image-indexer [<options>]\n\
-\n\
-Options:\n" CF_USAGE;
-
-static void NONRET
-usage(byte *msg)
-{
- if (msg)
- {
- fputs(msg, stderr);
- fputc('\n', stderr);
- }
- fputs(help, stderr);
- exit(1);
-}
-
-
-int
-main(int argc UNUSED, char **argv)
-{
- int opt;
-
- log_init(argv[0]);
- while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
- switch (opt)
- {
- default:
- usage("Invalid option");
- }
- if (optind != argc)
- usage("Invalid usage");
-
- generate_signatures(~0U);
-
- return 0;
-}
+++ /dev/null
-#define LOCAL_DEBUG
-
-#include "sherlock/sherlock.h"
-#include "lib/math.h"
-#include "lib/fastbuf.h"
-#include "images/images.h"
-
-/*
- * Color spaces
- *
- * http://www.tecgraf.puc-rio.br/~mgattass/color/ColorIndex.html
- *
- */
-
-#define REF_WHITE_X 0.96422
-#define REF_WHITE_Y 1.
-#define REF_WHITE_Z 0.82521
-
-/* sRGB to XYZ */
-static void
-srgb_to_xyz_slow(double srgb[3], double xyz[3])
-{
- double a[3];
- for (uns i = 0; i < 3; i++)
- if (srgb[i] > 0.04045)
- a[i] = pow((srgb[i] + 0.055) * (1 / 1.055), 2.4);
- else
- a[i] = srgb[i] * (1 / 12.92);
- xyz[0] = 0.412424 * a[0] + 0.357579 * a[1] + 0.180464 * a[2];
- xyz[1] = 0.212656 * a[0] + 0.715158 * a[1] + 0.072186 * a[2];
- xyz[2] = 0.019332 * a[0] + 0.119193 * a[1] + 0.950444 * a[2];
-}
-
-/* XYZ to CIE-Luv */
-static void
-xyz_to_luv_slow(double xyz[3], double luv[3])
-{
- double sum = xyz[0] + 15 * xyz[1] + 3 * xyz[2];
- if (sum < 0.000001)
- luv[0] = luv[1] = luv[2] = 0;
- else
- {
- double var_u = 4 * xyz[0] / sum;
- double var_v = 9 * xyz[1] / sum;
- if (xyz[1] > 0.008856)
- luv[0] = 116 * pow(xyz[1], 1 / 3.) - 16;
- else
- luv[0] = (116 * 7.787) * xyz[1];
- luv[1] = luv[0] * (13 * (var_u - 4 * REF_WHITE_X / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)));
- luv[2] = luv[0] * (13 * (var_v - 9 * REF_WHITE_Y / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)));
- /* intervals [0..100], [-134..220], [-140..122] */
- }
-}
-
-struct block {
- uns l, u, v; /* average Luv coefficients */
- uns lh, hl, hh; /* energies in Daubechies wavelet bands */
-};
-
-static void
-compute_image_area_signature(PixelPacket *pixels, uns width, uns height, struct image_signature *sig)
-{
- ASSERT(width >= 4 && height >= 4);
-
- uns w = width >> 2;
- uns h = height >> 2;
- DBG("Computing signature for image %dx%d... %dx%d blocks", width, height, w, h);
- uns blocks_count = w * h;
- struct block *blocks = xmalloc(blocks_count * sizeof(struct block)), *block = blocks; /* FIXME: use mempool */
-
- /* Every 4x4 block (FIXME: deal with smaller blocks near the edges) */
- PixelPacket *p = pixels;
- for (uns block_y = 0; block_y < h; block_y++, p += width & 3 + width * 3)
- for (uns block_x = 0; block_x < w; block_x++, p += 4 - 4 * width, block++)
- {
- int t[16], s[16], *tp = t;
-
- /* Convert pixels to Luv color space and compute average coefficients
- * FIXME:
- * - could be MUCH faster with precomputed tables and integer arithmetic...
- * I will propably use interpolation in 3-dim array */
- uns l_sum = 0;
- uns u_sum = 0;
- uns v_sum = 0;
- for (uns y = 0; y < 4; y++, p += width - 4)
- for (uns x = 0; x < 4; x++, p++)
- {
- double rgb[3], luv[3], xyz[3];
- rgb[0] = (p->red >> (QuantumDepth - 8)) / 255.;
- rgb[1] = (p->green >> (QuantumDepth - 8)) / 255.;
- rgb[2] = (p->blue >> (QuantumDepth - 8)) / 255.;
- srgb_to_xyz_slow(rgb, xyz);
- xyz_to_luv_slow(xyz, luv);
- l_sum += *tp++ = luv[0];
- u_sum += luv[1] + 150;
- v_sum += luv[2] + 150;
- }
-
- block->l = l_sum;
- block->u = u_sum;
- block->v = v_sum;
-
- /* Apply Daubechies wavelet transformation
- * FIXME:
- * - MMX/SSE instructions or tables could be faster
- * - maybe it would be better to compute Luv and wavelet separately because of processor cache or MMX/SSE
- * - eliminate slow square roots
- * - what about Haar transformation? */
-
-#define DAUB_0 31651 /* (1 + sqrt 3) / (4 * sqrt 2) */
-#define DAUB_1 54822 /* (3 + sqrt 3) / (4 * sqrt 2) */
-#define DAUB_2 14689 /* (3 - sqrt 3) / (4 * sqrt 2) */
-#define DAUB_3 -8481 /* (1 - sqrt 3) / (4 * sqrt 2) */
-
- /* ... to the rows */
- uns i;
- for (i = 0; i < 16; i += 4)
- {
- s[i + 0] = (DAUB_0 * t[i + 2] + DAUB_1 * t[i + 3] + DAUB_2 * t[i + 0] + DAUB_3 * t[i + 1]) / 0x10000;
- s[i + 1] = (DAUB_0 * t[i + 0] + DAUB_1 * t[i + 1] + DAUB_2 * t[i + 2] + DAUB_3 * t[i + 3]) / 0x10000;
- s[i + 2] = (DAUB_3 * t[i + 2] - DAUB_2 * t[i + 3] + DAUB_1 * t[i + 0] - DAUB_0 * t[i + 1]) / 0x10000;
- s[i + 3] = (DAUB_3 * t[i + 0] - DAUB_2 * t[i + 1] + DAUB_1 * t[i + 2] - DAUB_0 * t[i + 3]) / 0x10000;
- }
-
- /* ... and to the columns... skip LL band */
- for (i = 0; i < 2; i++)
- {
- t[i + 8] = (DAUB_3 * s[i + 8] - DAUB_2 * s[i +12] + DAUB_1 * s[i + 0] - DAUB_0 * s[i + 4]) / 0x1000;
- t[i +12] = (DAUB_3 * s[i + 0] - DAUB_2 * s[i + 4] + DAUB_1 * s[i + 8] - DAUB_0 * s[i +12]) / 0x1000;
- }
- for (; i < 4; i++)
- {
- t[i + 0] = (DAUB_0 * s[i + 8] + DAUB_1 * s[i +12] + DAUB_2 * s[i + 0] + DAUB_3 * s[i + 4]) / 0x1000;
- t[i + 4] = (DAUB_0 * s[i + 0] + DAUB_1 * s[i + 4] + DAUB_2 * s[i + 8] + DAUB_3 * s[i +12]) / 0x1000;
- t[i + 8] = (DAUB_3 * s[i + 8] - DAUB_2 * s[i +12] + DAUB_1 * s[i + 0] - DAUB_0 * s[i + 4]) / 0x1000;
- t[i +12] = (DAUB_3 * s[i + 0] - DAUB_2 * s[i + 4] + DAUB_1 * s[i + 8] - DAUB_0 * s[i +12]) / 0x1000;
- }
-
- /* Extract energies in LH, HL and HH bands */
- block->lh = sqrt(t[8] * t[8] + t[9] * t[9] + t[12] * t[12] + t[13] * t[13]);
- block->hl = sqrt(t[2] * t[2] + t[3] * t[3] + t[6] * t[6] + t[7] * t[7]);
- block->hh = sqrt(t[10] * t[10] + t[11] * t[11] + t[14] * t[14] + t[15] * t[15]);
- }
-
- /* FIXME: simple average is for testing pusposes only */
- uns l_sum = 0;
- uns u_sum = 0;
- uns v_sum = 0;
- uns lh_sum = 0;
- uns hl_sum = 0;
- uns hh_sum = 0;
- for (uns i = 0; i < blocks_count; i++)
- {
- l_sum += blocks[i].l;
- u_sum += blocks[i].u;
- v_sum += blocks[i].v;
- lh_sum += blocks[i].lh;
- hl_sum += blocks[i].hl;
- hh_sum += blocks[i].hh;
- }
-
- sig->vec[0] = l_sum / blocks_count;
- sig->vec[1] = u_sum / blocks_count;
- sig->vec[2] = v_sum / blocks_count;
- sig->vec[3] = lh_sum / blocks_count;
- sig->vec[4] = hl_sum / blocks_count;
- sig->vec[5] = hh_sum / blocks_count;
-
- xfree(blocks);
-
- DBG("Resulting signature is (%d, %d, %d, %d, %d, %d)", sig->vec[0], sig->vec[1], sig->vec[2], sig->vec[3], sig->vec[4], sig->vec[5]);
-}
-
-int
-compute_image_signature(void *data, uns len, struct image_signature *sig)
-{
- int retval = 0;
-
- InitializeMagick(NULL); /* FIXME: call only once */
- ExceptionInfo exception;
- GetExceptionInfo(&exception);
- ImageInfo *image_info = CloneImageInfo(NULL);
- image_info->subrange = 1;
-
- DBG("Decoding");
- Image *image = BlobToImage(image_info, data, len, &exception); /* Damn slow... most of the time :-/ */
- if (!image)
- die("Invalid image format");
- if (image->columns < 4 || image->rows < 4)
- {
- DBG("Image too small (%dx%d)", (int)image->columns, (int)image->rows);
- retval = -1;
- goto exit;
- }
-
- QuantizeInfo quantize_info;
- GetQuantizeInfo(&quantize_info);
- quantize_info.colorspace = RGBColorspace;
- QuantizeImage(&quantize_info, image);
-
- PixelPacket *pixels = (PixelPacket *) AcquireImagePixels(image, 0, 0, image->columns, image->rows, &exception);
-
- compute_image_area_signature(pixels, image->columns, image->rows, sig);
-
-exit:
- DestroyImage(image);
- DestroyImageInfo(image_info);
- DestroyExceptionInfo(&exception);
- DestroyMagick();
- return retval;
-}
-