images/dup-cmp.c

   1 /*
   2  *      Image Library -- Duplicates Comparison
   3  *
   4  *      (c) 2006 Pavel Charvat <pchar@ucw.cz>
   5  *
   6  *      This software may be freely distributed and used according to the terms
   7  *      of the GNU Lesser General Public License.
   8  *
   9  *
  10  *      FIXME:
  11  *      - many possible optimization
  12  *      - compare normalized pictures (brightness, ...)
  13  *      - a blur should help to deal with scaling errors
  14  *      - maybe better/slower last step
  15  *      - different thresholds for various transformations
  16  *      - do not test all transformations for symetric pictures
  17  *      - allocated memory could be easily decreased to about 1/3
  18  *        for aspect ratio threshold near one
  19  */
  20
  21 #undef LOCAL_DEBUG
  22
  23 #include "sherlock/sherlock.h"
  24 #include "lib/mempool.h"
  25 #include "images/images.h"
  26 #include "images/dup-cmp.h"
  27
  28 #include "lib/mempool.h"
  29 #include "lib/fastbuf.h"
  30 #include <fcntl.h>
  31
  32 static uns image_dup_ratio_threshold = 140;
  33 static uns image_dup_error_threshold = 800;
  34 static uns image_dup_tab_limit = 8;
  35
  36 static inline byte *
  37 image_dup_block(struct image_dup *dup, uns tab_col, uns tab_row)
  38 {
  39   return dup->tab_pixels + (dup->tab_row_size << tab_row) + (3 << (tab_row + tab_col));
  40 }
  41
  42 static inline struct image *
  43 image_dup_subimage(struct image_thread *thread, struct image_dup *dup, struct image *block, uns tab_col, uns tab_row)
  44 {
  45   return image_init_matrix(thread, block, image_dup_block(dup, tab_col, tab_row),
  46       1 << tab_col, 1 << tab_row, 3 << tab_col, COLOR_SPACE_RGB);
  47 }
  48
  49 static inline void
  50 pixels_average(byte *dest, byte *src1, byte *src2)
  51 {
  52   dest[0] = ((uns)src1[0] + (uns)src2[0]) >> 1;
  53   dest[1] = ((uns)src1[1] + (uns)src2[1]) >> 1;
  54   dest[2] = ((uns)src1[2] + (uns)src2[2]) >> 1;
  55 }
  56
  57 uns
  58 image_dup_estimate_size(uns cols, uns rows)
  59 {
  60   uns tab_cols, tab_rows;
  61   for (tab_cols = 0; (uns)(2 << tab_cols) < cols && tab_cols < image_dup_tab_limit; tab_cols++);
  62   for (tab_rows = 0; (uns)(2 << tab_rows) < rows && tab_rows < image_dup_tab_limit; tab_rows++);
  63   return sizeof(struct image) + cols * rows * 3 + sizeof(struct image_dup) + (12 << (tab_cols + tab_rows)) + 64;
  64 }
  65
  66 uns
  67 image_dup_init(struct image_thread *thread, struct image_dup *dup, struct image *img, struct mempool *pool)
  68 {
  69   DBG("image_dup_init()");
  70
  71   ASSERT((img->flags & IMAGE_PIXEL_FORMAT) == COLOR_SPACE_RGB);
  72
  73   dup->image = img;
  74   for (dup->tab_cols = 0; (uns)(2 << dup->tab_cols) < img->cols && dup->tab_cols < image_dup_tab_limit; dup->tab_cols++);
  75   for (dup->tab_rows = 0; (uns)(2 << dup->tab_rows) < img->rows && dup->tab_rows < image_dup_tab_limit; dup->tab_rows++);
  76   dup->tab_pixels = mp_alloc(pool, dup->tab_size = (12 << (dup->tab_cols + dup->tab_rows)));
  77   dup->tab_row_size = 6 << dup->tab_cols;
  78
  79   /* Scale original image to right bottom block */
  80   {
  81     struct image block;
  82     if (!image_dup_subimage(thread, dup, &block, dup->tab_cols, dup->tab_rows))
  83       return 0;
  84     if (!image_scale(thread, &block, img))
  85       return 0;
  86   }
  87
  88   /* Complete bottom row */
  89   for (uns i = dup->tab_cols; i--; )
  90     {
  91       byte *d = image_dup_block(dup, i, dup->tab_rows);
  92       byte *s = image_dup_block(dup, i + 1, dup->tab_rows);
  93       for (uns y = 0; y < (uns)(1 << dup->tab_rows); y++)
  94         for (uns x = 0; x < (uns)(1 << i); x++)
  95           {
  96             pixels_average(d, s, s + 3);
  97             d += 3;
  98             s += 6;
  99           }
 100     }
 101
 102   /* Complete remaining blocks */
 103   for (uns i = 0; i <= dup->tab_cols; i++)
 104     {
 105       uns line_size = (3 << i);
 106       for (uns j = dup->tab_rows; j--; )
 107         {
 108           byte *d = image_dup_block(dup, i, j);
 109           byte *s = image_dup_block(dup, i, j + 1);
 110           for (uns y = 0; y < (uns)(1 << j); y++)
 111             {
 112               for (uns x = 0; x < (uns)(1 << i); x++)
 113                 {
 114                   pixels_average(d, s, s + line_size);
 115                   d += 3;
 116                   s += 3;
 117                 }
 118               s += line_size;
 119             }
 120         }
 121     }
 122
 123   return 1;
 124 }
 125
 126 static inline uns
 127 err (int a, int b)
 128 {
 129   a -= b;
 130   return a * a;
 131 }
 132
 133 static inline u64
 134 err_sum(byte *pos1, byte *pos2, uns count)
 135 {
 136   uns e64 = 0;
 137   while (count--)
 138     {
 139       uns e = err(*pos1++, *pos2++);
 140       e += err(*pos1++, *pos2++);
 141       e += err(*pos1++, *pos2++);
 142       e64 += e;
 143     }
 144   return e64;
 145 }
 146
 147 static inline u64
 148 err_sum_transformed(byte *pos1, byte *pos2, uns cols, uns rows, int row_step_1, int col_step_2, int row_step_2)
 149 {
 150   DBG("err_sum_transformed(pos1=%p pos2=%p cols=%u rows=%u row_step_1=%d col_step_2=%d row_step_2=%d)",
 151       pos1, pos2, cols, rows, row_step_1, col_step_2, row_step_2);
 152   u64 e64 = 0;
 153   for (uns j = rows; j--; )
 154     {
 155       byte *p1 = pos1;
 156       byte *p2 = pos2;
 157       uns e = 0;
 158       for (uns i = cols; i--; )
 159       {
 160         e += err(p1[0], p2[0]);
 161         e += err(p1[1], p2[1]);
 162         e += err(p1[2], p2[2]);
 163         p1 += 3;
 164         p2 += col_step_2;
 165       }
 166       pos1 += row_step_1;
 167       pos2 += row_step_2;
 168       e64 += e;
 169     }
 170   return e64;
 171 }
 172
 173 static inline int
 174 aspect_ratio_test(uns cols1, uns rows1, uns cols2, uns rows2)
 175 {
 176   DBG("aspect_ratio_test(cols1=%u rows1=%u cols2=%u rows2=%u)", cols1, rows1, cols2, rows2);
 177   uns r1 = cols1 * rows2;
 178   uns r2 = rows1 * cols2;
 179   return
 180     r1 <= ((r2 * image_dup_ratio_threshold) >> 7) &&
 181     r2 <= ((r1 * image_dup_ratio_threshold) >> 7);
 182 }
 183
 184 static inline int
 185 average_compare(struct image_dup *dup1, struct image_dup *dup2)
 186 {
 187   byte *block1 = image_dup_block(dup1, 0, 0);
 188   byte *block2 = image_dup_block(dup2, 0, 0);
 189   uns e =
 190     err(block1[0], block2[0]) +
 191     err(block1[1], block2[1]) +
 192     err(block1[2], block2[2]);
 193   return e <= image_dup_error_threshold;
 194 }
 195
 196 static int
 197 blocks_compare(struct image_dup *dup1, struct image_dup *dup2, uns tab_col, uns tab_row, uns trans)
 198 {
 199   DBG("blocks_compare(tab_col=%d tab_row=%d trans=%d)", tab_col, tab_row, trans);
 200   byte *block1 = image_dup_block(dup1, tab_col, tab_row);
 201   byte *block2;
 202   int col_step, row_step;
 203   if (trans < 4)
 204     block2 = image_dup_block(dup2, tab_col, tab_row);
 205   else
 206     block2 = image_dup_block(dup2, tab_row, tab_col);
 207   switch (trans)
 208     {
 209       case 0: ;
 210         uns err = (err_sum(block1, block2, 1 << (tab_col + tab_row)) >> (tab_col + tab_row));
 211         DBG("average error=%d", err);
 212         return err <= image_dup_error_threshold;
 213       case 1:
 214         col_step = -3;
 215         row_step = (3 << tab_col);
 216         block2 += row_step - 3;
 217         break;
 218       case 2:
 219         col_step = 3;
 220         row_step = -(3 << tab_col);
 221         block2 += (3 << (tab_col + tab_row)) + row_step;
 222         break;
 223       case 3:
 224         col_step = -3;
 225         row_step = -(3 << tab_col);
 226         block2 += (3 << (tab_col + tab_row)) - 3;
 227         break;
 228       case 4:
 229         col_step = (3 << tab_row);
 230         row_step = 3;
 231         break;
 232       case 5:
 233         col_step = -(3 << tab_row);
 234         row_step = 3;
 235         block2 += (3 << (tab_col + tab_row)) + col_step;
 236         break;
 237       case 6:
 238         col_step = (3 << tab_row);
 239         row_step = -3;
 240         block2 += col_step - 3;
 241         break;
 242       case 7:
 243         col_step = -(3 << tab_row);
 244         row_step = -3;
 245         block2 += (3 << (tab_col + tab_row)) - 3;
 246         break;
 247       default:
 248         ASSERT(0);
 249     }
 250   uns err = (err_sum_transformed(block1, block2, (1 << tab_col), (1 << tab_row), (3 << tab_col), col_step, row_step) >> (tab_col + tab_row));
 251   DBG("average error=%d", err);
 252   return err <= image_dup_error_threshold;
 253 }
 254
 255 static int
 256 same_size_compare(struct image_dup *dup1, struct image_dup *dup2, uns trans)
 257 {
 258   struct image *img1 = dup1->image;
 259   struct image *img2 = dup2->image;
 260   byte *block1 = img1->pixels;
 261   byte *block2 = img2->pixels;
 262   int col_step, row_step;
 263   DBG("same_size_compare(trans=%d)",  trans);
 264   switch (trans)
 265     {
 266       case 0: ;
 267         col_step = 3;
 268         row_step = img2->row_size;
 269         break;
 270       case 1:
 271         col_step = -3;
 272         row_step = img2->row_size;
 273         block2 += 3 * (img2->cols - 1);
 274         break;
 275       case 2:
 276         col_step = 3;
 277         row_step = -img2->row_size;
 278         block2 += img2->row_size * (img2->rows - 1);
 279         break;
 280       case 3:
 281         col_step = -3;
 282         row_step = -img2->row_size;
 283         block2 += img2->row_size * (img2->rows - 1) + 3 * (img2->cols - 1);
 284         break;
 285       case 4:
 286         col_step = img2->row_size;
 287         row_step = 3;
 288         break;
 289       case 5:
 290         col_step = -img2->row_size;
 291         row_step = 3;
 292         block2 += img2->row_size * (img2->rows - 1);
 293         break;
 294       case 6:
 295         col_step = img2->row_size;
 296         row_step = -3;
 297         block2 += 3 * (img2->cols - 1);
 298         break;
 299       case 7:
 300         col_step = -img2->row_size;
 301         row_step = -3;
 302         block2 += img2->row_size * (img2->rows - 1) + 3 * (img2->cols - 1);
 303         break;
 304       default:
 305         ASSERT(0);
 306     }
 307   uns err = (err_sum_transformed(block1, block2, img1->cols, img1->rows, img1->row_size, col_step, row_step) / ((u64)img1->cols * img1->rows));
 308   DBG("average error=%d", err);
 309   return err <= image_dup_error_threshold;
 310 }
 311
 312 uns
 313 image_dup_compare(struct image_dup *dup1, struct image_dup *dup2, uns flags)
 314 {
 315   DBG("image_dup_compare()");
 316   if (!average_compare(dup1, dup2))
 317     return 0;
 318   struct image *img1 = dup1->image;
 319   struct image *img2 = dup2->image;
 320   if (flags & IMAGE_DUP_SCALE)
 321     {
 322       DBG("Scale support");
 323       if (!aspect_ratio_test(img1->cols, img1->rows, img2->cols, img2->rows))
 324         flags &= ~0x0f;
 325       if (!aspect_ratio_test(img1->cols, img1->rows, img2->rows, img2->cols))
 326         flags &= ~0xf0;
 327     }
 328   else
 329     {
 330       DBG("No scale support");
 331       if (!(img1->cols == img2->cols && img1->rows == img2->rows))
 332         flags &= ~0x0f;
 333       if (!(img1->cols == img2->rows && img1->rows == img2->cols))
 334         flags &= ~0xf0;
 335     }
 336   if (!(flags & 0xff))
 337     return 0;
 338   uns result = 0;
 339   if (flags & 0x0f)
 340     {
 341       uns cols = MIN(dup1->tab_cols, dup2->tab_cols);
 342       uns rows = MIN(dup1->tab_rows, dup2->tab_rows);
 343       for (uns t = 0; t < 4; t++)
 344         if (flags & (1 << t))
 345           {
 346             DBG("Testing trans %d", t);
 347             for (uns i = MAX(cols, rows); i--; )
 348               {
 349                 uns col = MAX(0, (int)(cols - i));
 350                 uns row = MAX(0, (int)(rows - i));
 351                 if (!blocks_compare(dup1, dup2, col, row, t))
 352                   break;
 353                 if (!i &&
 354                     (img1->cols != img2->cols || img1->rows != img2->rows ||
 355                     same_size_compare(dup1, dup2, t)))
 356                   {
 357                     result |= 1 << t;
 358                     if (!(flags & IMAGE_DUP_WANT_ALL))
 359                       return result;
 360                     else
 361                       break;
 362                   }
 363               }
 364           }
 365     }
 366   if (flags & 0xf0)
 367     {
 368       uns cols = MIN(dup1->tab_cols, dup2->tab_rows);
 369       uns rows = MIN(dup1->tab_rows, dup2->tab_cols);
 370       for (uns t = 4; t < 8; t++)
 371         if (flags & (1 << t))
 372           {
 373             DBG("Testing trans %d", t);
 374             for (uns i = MAX(cols, rows); i--; )
 375               {
 376                 uns col = MAX(0, (int)(cols - i));
 377                 uns row = MAX(0, (int)(rows - i));
 378                 if (!blocks_compare(dup1, dup2, col, row, t))
 379                   break;
 380                 if (!i &&
 381                     (img1->cols != img2->rows || img1->rows != img2->cols ||
 382                     same_size_compare(dup1, dup2, t)) )
 383                   {
 384                     result |= 1 << t;
 385                     if (!(flags & IMAGE_DUP_WANT_ALL))
 386                       return result;
 387                     else
 388                       break;
 389                   }
 390               }
 391           }
 392     }
 393   return result;
 394 }