]> mj.ucw.cz Git - libucw.git/blob - images/image-idx.c
LIBPNG support for reading of PNG thumbnails (probably faster
[libucw.git] / images / image-idx.c
1 #define LOCAL_DEBUG
2
3 #include "sherlock/sherlock.h"
4 #include "lib/mempool.h"
5 #include "lib/conf.h"
6 #include "lib/fastbuf.h"
7 #include "lib/chartype.h"
8 #include "sherlock/object.h"
9 #include "lib/url.h"
10 #include "lib/unicode.h"
11 #include "sherlock/lizard-fb.h"
12 #include "sherlock/tagged-text.h"
13 #include "charset/charconv.h"
14 #include "charset/unicat.h"
15 #include "charset/fb-charconv.h"
16 #include "indexer/indexer.h"
17 #include "indexer/lexicon.h"
18 #include "indexer/params.h"
19 #include "utils/dumpconfig.h"
20 #include "lang/lang.h"
21 #include "lib/base224.h"
22 #include "lib/bbuf.h"
23
24 #include "images/images.h"
25 #include "images/image-obj.h"
26 #include "images/image-sig.h"
27
28 #include <stdlib.h>
29 #include <fcntl.h>
30 #include <string.h>
31
32 /* This should happen in gatherer or scanner */
33 UNUSED static void
34 generate_signatures(uns limit)
35 {
36   struct fastbuf *fb_cards = index_bopen("cards", O_RDONLY);
37   struct fastbuf *fb_card_attrs = index_bopen("card-attrs", O_RDONLY);
38   struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
39   struct card_attr ca;
40   struct image_signature sig;
41   struct mempool *pool = mp_new(1 << 16);
42   struct buck2obj_buf *bob = buck2obj_alloc();
43   uns count = 0;
44
45   log(L_INFO, "Generating image signatures");
46   bputl(fb_signatures, 0);
47   imo_decompress_thumbnails_init();
48
49   for (oid_t oid = 0; bread(fb_card_attrs, &ca, sizeof(ca)); oid++)
50     if ((uns)((ca.type_flags >> 4) - 8) < 4)
51       {
52         bsetpos(fb_cards, (sh_off_t)ca.card << CARD_POS_SHIFT);
53         uns buck_len = bgetl(fb_cards) - (LIZARD_COMPRESS_HEADER - 1);
54         uns buck_type = bgetc(fb_cards) + BUCKET_TYPE_PLAIN;
55         mp_flush(pool);
56         struct odes *obj = obj_read_bucket(bob, pool, buck_type, buck_len, fb_cards, NULL);
57         struct oattr *attr;
58         if (!obj)
59           die("Failed to read card");
60         if (attr = obj_find_attr(obj, 'N'))
61           {
62             DBG("Reading oid=%d url=%s", oid, obj_find_aval(obj_find_attr(obj, 'U' + OBJ_ATTR_SON)->son, 'U'));
63             struct image_obj imo;
64             imo_init(&imo, pool, obj);
65             if (imo_decompress_thumbnail(&imo))
66               {
67                 int err;
68                 if (!(err = compute_image_signature(&imo.thumb, &sig)))
69                   {
70                     bwrite(fb_signatures, &oid, sizeof(oid));
71                     bwrite(fb_signatures, &sig.vec, sizeof(struct image_vector));
72                     bputc(fb_signatures, sig.len);
73                     if (sig.len)
74                       bwrite(fb_signatures, sig.reg, sig.len * sizeof(struct image_region));
75                     count++;
76                     if (count >= limit)
77                       break;
78                   }
79                 else
80                   DBG("Cannot create signature, error=%d", err);
81               }
82             else
83               DBG("Cannot decompress thumbnail");
84           }
85       }
86   brewind(fb_signatures);
87   bputl(fb_signatures, count);
88   DBG("%d signatures written", count);
89
90   imo_decompress_thumbnails_done();
91   buck2obj_free(bob);
92   mp_delete(pool);
93   bclose(fb_cards);
94   bclose(fb_card_attrs);
95   bclose(fb_signatures);
96 }
97
98 UNUSED static void
99 generate_random_signatures(uns count)
100 {
101   log(L_INFO, "Generating %d random signatures", count);
102   struct fastbuf *fb_signatures = index_bopen("image-sig", O_CREAT | O_WRONLY | O_TRUNC);
103   bputl(fb_signatures, count);
104   for (uns i = 0; i < count; i++)
105     {
106       oid_t oid = i;
107       struct image_vector vec;
108       for (uns j = 0; j < IMAGE_VEC_K; j++)
109         vec.f[j] = random_max(256); 
110       bwrite(fb_signatures, &oid, sizeof(oid));
111       bwrite(fb_signatures, &vec, sizeof(vec));
112       bputc(fb_signatures, 0);
113     }
114   bclose(fb_signatures); 
115 }
116
117 struct signature_record {
118   oid_t oid;
119   struct image_vector vec;
120 };
121
122 #define ASORT_PREFIX(x) build_search_tree_##x
123 #define ASORT_KEY_TYPE struct signature_record *
124 #define ASORT_ELT(i) rec[i]
125 #define ASORT_LT(x,y) x->vec.f[dim] < y->vec.f[dim]
126 #define ASORT_EXTRA_ARGS , uns dim, struct signature_record **rec
127 #include "lib/arraysort.h"
128
129 #if 0
130 #define DBG_KD(x...) DBG(x)
131 #else
132 #define DBG_KD(x...) do{}while(0)
133 #endif
134
135 static void
136 build_search_tree(void)
137 {
138   log(L_INFO, "Building KD-tree");
139
140   struct fastbuf *fb_signatures = index_bopen("image-sig", O_RDONLY);
141   struct image_tree tree;
142   tree.count = bgetl(fb_signatures);
143   ASSERT(tree.count < 0x80000000);
144   if (!tree.count)
145     {
146       /* FIXME */
147       bclose(fb_signatures);
148       die("There are no signatures");
149     }
150   else
151     {
152       DBG("Reading %d signatures", tree.count);
153       struct signature_record *records = xmalloc(tree.count * sizeof(struct signature_record));
154       struct signature_record **precords = xmalloc(tree.count * sizeof(void *));
155       for (uns i = 0; i < tree.count; i++)
156         {
157           bread(fb_signatures, &records[i].oid, sizeof(oid_t));
158           bread(fb_signatures, &records[i].vec, sizeof(struct image_vector));
159           uns len = bgetc(fb_signatures);
160           bskip(fb_signatures, len * sizeof(struct image_region));
161           precords[i] = records + i;
162           if (likely(i))
163             for (uns j = 0; j < IMAGE_VEC_K; j++)
164               {
165                 tree.bbox.vec[0].f[j] = MIN(tree.bbox.vec[0].f[j], records[i].vec.f[j]);
166                 tree.bbox.vec[1].f[j] = MAX(tree.bbox.vec[1].f[j], records[i].vec.f[j]);
167               }
168           else
169             tree.bbox.vec[0] = tree.bbox.vec[1] = records[0].vec;
170         }
171       bclose(fb_signatures);
172       
173       for (tree.depth = 1; (uns)(2 << tree.depth) < tree.count; tree.depth++);
174       DBG("depth=%d nodes=%d bbox=[(%s), (%s)]", tree.depth, 1 << tree.depth, 
175           stk_print_image_vector(tree.bbox.vec + 0), stk_print_image_vector(tree.bbox.vec + 1));
176       uns leaves_index = 1 << (tree.depth - 1);
177       tree.nodes = xmalloc_zero((1 << tree.depth) * sizeof(struct image_node));
178       tree.leaves = xmalloc_zero(tree.count * sizeof(struct image_leaf));
179      
180       /* Initialize recursion */
181       struct stk {
182         struct image_bbox bbox;
183         uns index, count;
184         struct signature_record **start;
185       } stk_top[32], *stk = stk_top + 1;
186       stk->index = 1;
187       stk->start = precords;
188       stk->count = tree.count;
189       stk->bbox.vec[0] = tree.bbox.vec[0];
190       for (uns i = 0; i < IMAGE_VEC_K; i++)
191         stk->bbox.vec[1].f[i] = tree.bbox.vec[1].f[i] - tree.bbox.vec[0].f[i];
192       uns entry_index = 0;
193     
194       /* Main loop */
195       while (stk != stk_top)
196         {
197           DBG_KD("Main loop... depth=%d index=%d count=%d, start=%d, min=%s dif=%s",
198               stk - stk_top, stk->index, stk->count, stk->start - precords,
199               stk_print_image_vector(stk->bbox.vec + 0), stk_print_image_vector(stk->bbox.vec + 1));
200           ASSERT(stk->count);
201           
202           /* Create leaf node */
203           if (stk->index >= leaves_index || stk->count < 2)
204             {
205               tree.nodes[stk->index].val = IMAGE_NODE_LEAF | entry_index;
206               for (; stk->count--; stk->start++)
207                 {
208                   struct image_leaf *leaf = &tree.leaves[entry_index++];
209                   struct signature_record *record = *stk->start;
210                   leaf->oid = record->oid;
211                   leaf->flags = 0;
212                   for (uns i = IMAGE_VEC_K; i--; )
213                     {
214                       uns bits = IMAGE_LEAF_BITS(i);
215                       leaf->flags <<= bits;
216                       if (stk->bbox.vec[1].f[i])
217                         {
218                           uns value =
219                             (record->vec.f[i] - stk->bbox.vec[0].f[i]) * 
220                             ((1 << bits) - 1) / stk->bbox.vec[1].f[i];
221                           ASSERT(value < (uns)(1 << bits));
222                           leaf->flags |= value;
223                         }
224                     }
225                   if (!stk->count)
226                     leaf->flags |= IMAGE_LEAF_LAST; 
227                   DBG_KD("Creating leaf node; oid=%d vec=(%s) flags=0x%08x", 
228                       leaf->oid, stk_print_image_vector(&record->vec), leaf->flags);
229                 }
230               stk--;
231             }
232          
233           /* Create internal node */
234           else
235             {
236               /* Select dimension to splis */
237               uns dim = 0;
238               for (uns i = 1; i < IMAGE_VEC_K; i++)
239                 if (stk->bbox.vec[1].f[i] > stk->bbox.vec[1].f[dim])
240                   dim = i;
241
242               /* Sort... FIXME: we only need the median */
243               build_search_tree_sort(stk->count, dim, stk->start);
244               
245               /* Split in the middle */
246               uns index = stk->index;
247               stk[1].index = stk[0].index * 2;
248               stk[0].index = stk[1].index + 1;
249               stk[1].count = stk[0].count >> 1;
250               stk[0].count -= stk[1].count;
251               stk[1].start = stk[0].start;
252               stk[0].start += stk[1].count;
253
254               /* Choose split value */
255               uns lval = stk->start[-1]->vec.f[dim];
256               uns rval = stk->start[0]->vec.f[dim];
257               uns pivot = stk->bbox.vec[0].f[dim] + (stk->bbox.vec[1].f[dim] >> 1);
258               if (pivot <= lval)
259                 pivot = lval;
260               else if (pivot >= rval)
261                 pivot = rval;
262
263               DBG_KD("Created internal node; dim=%d pivot=%d", dim, pivot);
264
265               /* Split the box */
266               stk[1].bbox = stk[0].bbox;
267               stk[1].bbox.vec[1].f[dim] = pivot - stk[0].bbox.vec[0].f[dim];
268               stk[0].bbox.vec[0].f[dim] += stk[1].bbox.vec[1].f[dim];
269               stk[0].bbox.vec[1].f[dim] -= stk[1].bbox.vec[1].f[dim];
270
271               /* Fill the node structure */
272               tree.nodes[index].val = dim + (pivot << 8);
273               stk++;
274             }
275         }
276
277       DBG("Tree constructed, saving...");
278
279       struct fastbuf *fb_tree = index_bopen("image-tree", O_CREAT | O_WRONLY | O_TRUNC);
280       bputl(fb_tree, tree.count);
281       bputl(fb_tree, tree.depth);
282       bwrite(fb_tree, &tree.bbox, sizeof(struct image_bbox));
283       bwrite(fb_tree, tree.nodes + 1, ((1 << tree.depth) - 1) * sizeof(struct image_node));
284       bwrite(fb_tree, tree.leaves, tree.count * sizeof(struct image_leaf));
285       bclose(fb_tree);
286
287       xfree(tree.leaves);
288       xfree(tree.nodes);
289       xfree(precords);
290       xfree(records);
291     }
292 }
293
294
295 static char *shortopts = CF_SHORT_OPTS "";
296 static struct option longopts[] =
297 {
298   CF_LONG_OPTS
299   { NULL, 0, 0, 0 }
300 };
301
302 static char *help = "\
303 Usage: image-indexer [<options>]\n\
304 \n\
305 Options:\n" CF_USAGE;
306
307 static void NONRET
308 usage(byte *msg)
309 {
310   if (msg)
311   {
312     fputs(msg, stderr);
313     fputc('\n', stderr);
314   }
315   fputs(help, stderr);
316   exit(1);
317 }
318
319   
320 int
321 main(int argc UNUSED, char **argv)
322 {
323   int opt;
324   
325   log_init(argv[0]);
326   while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
327     switch (opt)
328     {
329       default:
330       usage("Invalid option");
331     }
332   if (optind != argc)
333     usage("Invalid usage");
334
335   generate_signatures(~0U);
336   //generate_random_signatures(1000000);
337   build_search_tree();
338   
339   return 0;
340 }