3 #include "sherlock/sherlock.h"
4 #include "lib/mempool.h"
6 #include "lib/fastbuf.h"
7 #include "lib/chartype.h"
8 #include "sherlock/object.h"
10 #include "lib/unicode.h"
11 #include "sherlock/lizard-fb.h"
12 #include "sherlock/tagged-text.h"
13 #include "charset/charconv.h"
14 #include "charset/unicat.h"
15 #include "charset/fb-charconv.h"
16 #include "indexer/indexer.h"
17 #include "indexer/lexicon.h"
18 #include "indexer/params.h"
19 #include "utils/dumpconfig.h"
20 #include "lang/lang.h"
21 #include "lib/base224.h"
24 #include "images/images.h"
30 /* This should happen in gatherer or scanner */
32 generate_signatures(uns limit)
34 struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY);
35 struct fastbuf *fb_card_attrs = index_bopen("card-attrs", O_RDONLY);
36 struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
38 struct image_signature sig;
39 struct mempool *pool = mp_new(1 << 16);
40 struct buck2obj_buf *bob = buck2obj_alloc();
43 log(L_INFO, "Generating image signatures");
44 bputl(fb_signatures, 0);
45 compute_image_signature_prepare();
47 for (oid_t oid = 0; bread(fb_card_attrs, &ca, sizeof(ca)); oid++)
48 if ((uns)((ca.type_flags >> 4) - 8) < 4)
50 bsetpos(fb_cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
51 uns buck_len = bgetl(fb_cards) - (LIZARD_COMPRESS_HEADER - 1);
52 uns buck_type = bgetc(fb_cards) + BUCKET_TYPE_PLAIN;
54 struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb_cards, NULL);
57 die("Failed to read card");
58 if (attr = obj_find_attr(obj, 'N'))
60 DBG("Reading oid=%d url=%s", oid, obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'));
64 for (; attr; attr = attr->same)
66 uns len = strlen(attr->val);
67 bb_grow(&buf, buf_len + len);
68 memcpy(buf.ptr + buf_len, attr->val, len);
72 uns thumb_len = base224_decode(thumb, buf.ptr, buf_len);
74 int err = compute_image_signature(thumb, thumb_len, &sig);
77 bwrite(fb_signatures, &oid, sizeof(oid));
78 bwrite(fb_signatures, &sig.vec, sizeof(struct image_vector));
79 bputc(fb_signatures, sig.len);
81 bwrite(fb_signatures, sig.reg, sig.len * sizeof(struct image_region));
87 DBG("Cannot create signature, error=%d", err);
92 brewind(fb_signatures);
93 bputl(fb_signatures, count);
94 DBG("%d signatures written", count);
96 compute_image_signature_finish();
100 bclose(fb_card_attrs);
101 bclose(fb_signatures);
105 generate_random_signatures(uns count)
107 log(L_INFO, "Generating %d random signatures", count);
108 struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
109 bputl(fb_signatures, count);
110 for (uns i = 0; i < count; i++)
113 struct image_vector vec;
114 for (uns j = 0; j < IMAGE_VEC_K; j++)
115 vec.f[j] = random_max(256);
116 bwrite(fb_signatures, &oid, sizeof(oid));
117 bwrite(fb_signatures, &vec, sizeof(vec));
118 bputc(fb_signatures, 0);
120 bclose(fb_signatures);
123 struct signature_record {
125 struct image_vector vec;
128 #define ASORT_PREFIX(x) build_search_tree_##x
129 #define ASORT_KEY_TYPE struct signature_record *
130 #define ASORT_ELT(i) rec[i]
131 #define ASORT_LT(x,y) x->vec.f[dim] < y->vec.f[dim]
132 #define ASORT_EXTRA_ARGS , uns dim, struct signature_record **rec
133 #include "lib/arraysort.h"
136 #define DBG_KD(x...) DBG(x)
138 #define DBG_KD(x...) do{}while(0)
142 build_search_tree(void)
144 log(L_INFO, "Building KD-tree");
146 struct fastbuf *fb_signatures = index_bopen("image-sig", O_RDONLY);
147 struct image_tree tree;
148 tree.count = bgetl(fb_signatures);
149 ASSERT(tree.count < 0x80000000);
153 bclose(fb_signatures);
154 die("There are no signatures");
158 DBG("Reading %d signatures", tree.count);
159 struct signature_record *records = xmalloc(tree.count * sizeof(struct signature_record));
160 struct signature_record **precords = xmalloc(tree.count * sizeof(void *));
161 for (uns i = 0; i < tree.count; i++)
163 bread(fb_signatures, &records[i].oid, sizeof(oid_t));
164 bread(fb_signatures, &records[i].vec, sizeof(struct image_vector));
165 uns len = bgetc(fb_signatures);
166 bskip(fb_signatures, len * sizeof(struct image_region));
167 precords[i] = records + i;
169 for (uns j = 0; j < IMAGE_VEC_K; j++)
171 tree.bbox.vec[0].f[j] = MIN(tree.bbox.vec[0].f[j], records[i].vec.f[j]);
172 tree.bbox.vec[1].f[j] = MAX(tree.bbox.vec[1].f[j], records[i].vec.f[j]);
175 tree.bbox.vec[0] = tree.bbox.vec[1] = records[0].vec;
177 bclose(fb_signatures);
179 for (tree.depth = 1; (uns)(2 << tree.depth) < tree.count; tree.depth++);
180 DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth,
181 stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1));
182 uns leaves_index = 1 << (tree.depth - 1);
183 tree.nodes = xmalloc_zero((1 << tree.depth) * sizeof(struct image_node));
184 tree.leaves = xmalloc_zero(tree.count * sizeof(struct image_leaf));
186 /* Initialize recursion */
188 struct image_bbox bbox;
190 struct signature_record **start;
191 } stk_top[32], *stk = stk_top + 1;
193 stk->start = precords;
194 stk->count = tree.count;
195 stk->bbox.vec[0] = tree.bbox.vec[0];
196 for (uns i = 0; i < IMAGE_VEC_K; i++)
197 stk->bbox.vec[1].f[i] = tree.bbox.vec[1].f[i] - tree.bbox.vec[0].f[i];
201 while (stk != stk_top)
203 DBG_KD("Main loop... depth=%d index=%d count=%d, start=%d, min=%s dif=%s",
204 stk - stk_top, stk->index, stk->count, stk->start - precords,
205 stk_print_image_vector(stk->bbox.vec + 0), stk_print_image_vector(stk->bbox.vec + 1));
208 /* Create leaf node */
209 if (stk->index >= leaves_index || stk->count < 2)
211 tree.nodes[stk->index].val = IMAGE_NODE_LEAF | entry_index;
212 for (; stk->count--; stk->start++)
214 struct image_leaf *leaf = &tree.leaves[entry_index++];
215 struct signature_record *record = *stk->start;
216 leaf->oid = record->oid;
218 for (uns i = IMAGE_VEC_K; i--; )
220 uns bits = IMAGE_LEAF_BITS(i);
221 leaf->flags <<= bits;
222 if (stk->bbox.vec[1].f[i])
225 (record->vec.f[i] - stk->bbox.vec[0].f[i]) *
226 ((1 << bits) - 1) / stk->bbox.vec[1].f[i];
227 ASSERT(value < (uns)(1 << bits));
228 leaf->flags |= value;
232 leaf->flags |= IMAGE_LEAF_LAST;
233 DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x",
234 leaf->oid, stk_print_image_vector(&record->vec), leaf->flags);
239 /* Create internal node */
242 /* Select dimension to splis */
244 for (uns i = 1; i < IMAGE_VEC_K; i++)
245 if (stk->bbox.vec[1].f[i] > stk->bbox.vec[1].f[dim])
248 /* Sort... FIXME: we only need the median */
249 build_search_tree_sort(stk->count, dim, stk->start);
251 /* Split in the middle */
252 uns index = stk->index;
253 stk[1].index = stk[0].index * 2;
254 stk[0].index = stk[1].index + 1;
255 stk[1].count = stk[0].count >> 1;
256 stk[0].count -= stk[1].count;
257 stk[1].start = stk[0].start;
258 stk[0].start += stk[1].count;
260 /* Choose split value */
261 uns lval = stk->start[-1]->vec.f[dim];
262 uns rval = stk->start[0]->vec.f[dim];
263 uns pivot = stk->bbox.vec[0].f[dim] + (stk->bbox.vec[1].f[dim] >> 1);
266 else if (pivot >= rval)
269 DBG_KD("Created internal node; dim=%d pivot=%d", dim, pivot);
272 stk[1].bbox = stk[0].bbox;
273 stk[1].bbox.vec[1].f[dim] = pivot - stk[0].bbox.vec[0].f[dim];
274 stk[0].bbox.vec[0].f[dim] += stk[1].bbox.vec[1].f[dim];
275 stk[0].bbox.vec[1].f[dim] -= stk[1].bbox.vec[1].f[dim];
277 /* Fill the node structure */
278 tree.nodes[index].val = dim + (pivot << 8);
283 DBG("Tree constructed, saving...");
285 struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC);
286 bputl(fb_tree, tree.count);
287 bputl(fb_tree, tree.depth);
288 bwrite(fb_tree, &tree.bbox, sizeof(struct image_bbox));
289 bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node));
290 bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf));
301 static char *shortopts = CF_SHORT_OPTS "";
302 static struct option longopts[] =
308 static char *help = "\
309 Usage: image-indexer [<options>]\n\
311 Options:\n" CF_USAGE;
327 main(int argc UNUSED, char **argv)
332 while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
336 usage("Invalid option");
339 usage("Invalid usage");
341 generate_signatures(~0U);
342 //generate_random_signatures(1000000);