images/scale-gen.h

   1 /*
   2  *      Image Library -- Image scaling algorithms
   3  *
   4  *      (c) 2006 Pavel Charvat <pchar@ucw.cz>
   5  *
   6  *      This software may be freely distributed and used according to the terms
   7  *      of the GNU Lesser General Public License.
   8  */
   9
  10 #ifndef IMAGE_SCALE_CHANNELS
  11 #  define IMAGE_SCALE_CHANNELS IMAGE_SCALE_PIXEL_SIZE
  12 #endif
  13
  14 #undef IMAGE_COPY_PIXEL
  15 #if IMAGE_SCALE_PIXEL_SIZE == 1
  16 #define IMAGE_COPY_PIXEL(dest, src) do{ *(byte *)dest = *(byte *)src; }while(0)
  17 #elif IMAGE_SCALE_PIXEL_SIZE == 2
  18 #define IMAGE_COPY_PIXEL(dest, src) do{ *(u16 *)dest = *(u16 *)src; }while(0)
  19 #elif IMAGE_SCALE_PIXEL_SIZE == 3
  20 #define IMAGE_COPY_PIXEL(dest, src) do{ ((byte *)dest)[0] = ((byte *)src)[0]; ((byte *)dest)[1] = ((byte *)src)[1]; ((byte *)dest)[2] = ((byte *)src)[2]; }while(0)
  21 #elif IMAGE_SCALE_PIXEL_SIZE == 4
  22 #define IMAGE_COPY_PIXEL(dest, src) do{ *(u32 *)dest = *(u32 *)src; }while(0)
  23 #endif
  24
  25 static void
  26 IMAGE_SCALE_PREFIX(nearest_xy)(struct image *dest, struct image *src)
  27 {
  28   uint x_inc = (src->cols << 16) / dest->cols;
  29   uint y_inc = (src->rows << 16) / dest->rows;
  30   uint x_start = x_inc >> 1, x_pos;
  31   uint y_pos = y_inc >> 1;
  32   byte *row_start;
  33 # define IMAGE_WALK_PREFIX(x) walk_##x
  34 # define IMAGE_WALK_INLINE
  35 # define IMAGE_WALK_UNROLL 4
  36 # define IMAGE_WALK_IMAGE dest
  37 # define IMAGE_WALK_COL_STEP IMAGE_SCALE_PIXEL_SIZE
  38 # define IMAGE_WALK_DO_ROW_START do{ row_start = src->pixels + (y_pos >> 16) * src->row_size; y_pos += y_inc; x_pos = x_start; }while(0)
  39 # define IMAGE_WALK_DO_STEP do{ byte *pos = row_start + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE; x_pos += x_inc; IMAGE_COPY_PIXEL(walk_pos, pos); }while(0)
  40 # include <images/image-walk.h>
  41 }
  42
  43 #if 0 /* Experiments with rearranging pixels for SSE... */
  44 static void
  45 IMAGE_SCALE_PREFIX(linear_x)(struct image *dest, struct image *src)
  46 {
  47   /* Handle problematic special case */
  48   byte *src_row = src->pixels;
  49   byte *dest_row = dest->pixels;
  50   if (src->cols == 1)
  51     {
  52       for (uint y_counter = dest->rows; y_counter--; )
  53         {
  54           // FIXME
  55           ASSERT(0);
  56           src_row += src->row_size;
  57           dest_row += dest->row_size;
  58         }
  59       return;
  60     }
  61   /* Initialize the main loop */
  62   uint x_inc = ((src->cols - 1) << 16) / (dest->cols - 1);
  63 # define COLS_AT_ONCE 256
  64   byte pixel_buf[COLS_AT_ONCE * 2 * IMAGE_SCALE_PIXEL_SIZE]; /* Buffers should fit in cache */
  65   u16 coef_buf[COLS_AT_ONCE * IMAGE_SCALE_PIXEL_SIZE];
  66   /* Main loop */
  67   for (uint y_counter = dest->rows; y_counter--; )
  68     {
  69       uint x_pos = 0;
  70       byte *dest_pos = dest_row;
  71       for (uint x_counter = dest->cols; --x_counter; )
  72       for (uint x_counter = dest->cols; x_counter > COLS_AT_ONCE; x_counter -= COLS_AT_ONCE)
  73         {
  74           byte *pixel_buf_pos = pixel_buf;
  75           u16 *coef_buf_pos = coef_buf;
  76           for (uint i = 0; i < COLS_AT_ONCE / 2; i++)
  77             {
  78               byte *src_pos = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE;
  79               uint ofs = x_pos & 0xffff;
  80               x_pos += x_inc;
  81               byte *src_pos_2 = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE;
  82               uint ofs_2 = x_pos & 0xffff;
  83               x_pos += x_inc;
  84               *coef_buf_pos++ = ofs;
  85               byte *pixel_buf_pos_2 = pixel_buf_pos + IMAGE_SCALE_PIXEL_SIZE;
  86               byte *pixel_buf_pos_3 = pixel_buf_pos + IMAGE_SCALE_PIXEL_SIZE * 2;
  87               byte *pixel_buf_pos_4 = pixel_buf_pos + IMAGE_SCALE_PIXEL_SIZE * 3;
  88               IMAGE_COPY_PIXEL(pixel_buf_pos, src_pos);
  89               IMAGE_COPY_PIXEL(pixel_buf_pos_2, src_pos + IMAGE_SCALE_PIXEL_SIZE);
  90               IMAGE_COPY_PIXEL(pixel_buf_pos_3, src_pos_2);
  91               IMAGE_COPY_PIXEL(pixel_buf_pos_4, src_pos_2 + IMAGE_SCALE_PIXEL_SIZE);
  92               pixel_buf_pos += 4 * IMAGE_SCALE_PIXEL_SIZE;
  93               *coef_buf_pos++ = ofs_2;
  94             }
  95 /*
  96           byte *src_pos = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE;
  97           uint ofs = x_pos & 0xffff;
  98           x_pos += x_inc;
  99           dest_pos[0] = LINEAR_INTERPOLATE(src_pos[0], src_pos[0 + IMAGE_SCALE_PIXEL_SIZE], ofs);
 100 #         if IMAGE_SCALE_CHANNELS >= 2
 101           dest_pos[1] = LINEAR_INTERPOLATE(src_pos[1], src_pos[1 + IMAGE_SCALE_PIXEL_SIZE], ofs);
 102 #         endif
 103 #         if IMAGE_SCALE_CHANNELS >= 3
 104           dest_pos[2] = LINEAR_INTERPOLATE(src_pos[2], src_pos[2 + IMAGE_SCALE_PIXEL_SIZE], ofs);
 105 #         endif
 106 #         if IMAGE_SCALE_CHANNELS >= 4
 107           dest_pos[3] = LINEAR_INTERPOLATE(src_pos[3], src_pos[3 + IMAGE_SCALE_PIXEL_SIZE], ofs);
 108 #         endif
 109           dest_pos += IMAGE_SCALE_PIXEL_SIZE;*/
 110
 111         }
 112       /* Always copy the last column - handle "x_pos == dest->cols * 0x10000" overflow */
 113       IMAGE_COPY_PIXEL(dest_pos, src_row + src->row_pixels_size - IMAGE_SCALE_PIXEL_SIZE);
 114       /* Next step */
 115       src_row += src->row_size;
 116       dest_row += dest->row_size;
 117     }
 118 #undef COLS_AT_ONCE
 119 }
 120
 121 static void
 122 IMAGE_SCALE_PREFIX(bilinear_xy)(struct image *dest, struct image *src)
 123 {
 124   uint x_inc = (((src->cols - 1) << 16) - 1) / (dest->cols);
 125   uint y_inc = (((src->rows - 1) << 16) - 1) / (dest->rows);
 126   uint y_pos = 0x10000;
 127   byte *cache[2], buf1[dest->row_pixels_size + 16], buf2[dest->row_pixels_size + 16], *pbuf[2];
 128   byte *dest_row = dest->pixels, *dest_pos;
 129   uint cache_index = ~0U, cache_i = 0;
 130   pbuf[0] = cache[0] = ALIGN_PTR((void *)buf1, 16);
 131   pbuf[1] = cache[1] = ALIGN_PTR((void *)buf2, 16);
 132 #ifdef __SSE2__
 133   __m128i zero = _mm_setzero_si128();
 134 #endif
 135   for (uint row_counter = dest->rows; row_counter--; )
 136     {
 137       dest_pos = dest_row;
 138       uint y_index = y_pos >> 16;
 139       uint y_ofs = y_pos & 0xffff;
 140       y_pos += y_inc;
 141       uint x_pos = 0;
 142       if (y_index > (uint)(cache_index + 1))
 143         cache_index = y_index - 1;
 144       while (y_index > cache_index)
 145         {
 146           cache[0] = cache[1];
 147           cache[1] = pbuf[cache_i ^= 1];
 148           cache_index++;
 149           byte *src_row = src->pixels + cache_index * src->row_size;
 150           byte *cache_pos = cache[1];
 151           for (uint col_counter = dest->cols; --col_counter; )
 152             {
 153               byte *c1 = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE;
 154               byte *c2 = c1 + IMAGE_SCALE_PIXEL_SIZE;
 155               uint ofs = x_pos & 0xffff;
 156               cache_pos[0] = LINEAR_INTERPOLATE(c1[0], c2[0], ofs);
 157 #             if IMAGE_SCALE_CHANNELS >= 2
 158               cache_pos[1] = LINEAR_INTERPOLATE(c1[1], c2[1], ofs);
 159 #             endif
 160 #             if IMAGE_SCALE_CHANNELS >= 3
 161               cache_pos[2] = LINEAR_INTERPOLATE(c1[2], c2[2], ofs);
 162 #             endif
 163 #             if IMAGE_SCALE_CHANNELS >= 4
 164               cache_pos[3] = LINEAR_INTERPOLATE(c1[3], c2[3], ofs);
 165 #             endif
 166               cache_pos += IMAGE_SCALE_PIXEL_SIZE;
 167               x_pos += x_inc;
 168             }
 169           IMAGE_COPY_PIXEL(cache_pos, src_row + src->row_pixels_size - IMAGE_SCALE_PIXEL_SIZE);
 170         }
 171       uint i = 0;
 172 #ifdef __SSE2__
 173       __m128i coef = _mm_set1_epi16(y_ofs >> 9);
 174       for (; (int)i < (int)dest->row_pixels_size - 15; i += 16)
 175         {
 176           __m128i a2 = _mm_loadu_si128((__m128i *)(cache[0] + i));
 177           __m128i a1 = _mm_unpacklo_epi8(a2, zero);
 178           a2 = _mm_unpackhi_epi8(a2, zero);
 179           __m128i b2 = _mm_loadu_si128((__m128i *)(cache[1] + i));
 180           __m128i b1 = _mm_unpacklo_epi8(b2, zero);
 181           b2 = _mm_unpackhi_epi8(b2, zero);
 182           b1 = _mm_sub_epi16(b1, a1);
 183           b2 = _mm_sub_epi16(b2, a2);
 184           a1 = _mm_slli_epi16(a1, 7);
 185           a2 = _mm_slli_epi16(a2, 7);
 186           b1 = _mm_mullo_epi16(b1, coef);
 187           b2 = _mm_mullo_epi16(b2, coef);
 188           a1 = _mm_add_epi16(a1, b1);
 189           a2 = _mm_add_epi16(a2, b2);
 190           a1 = _mm_srli_epi16(a1, 7);
 191           a2 = _mm_srli_epi16(a2, 7);
 192           a1 = _mm_packus_epi16(a1, a2);
 193           _mm_storeu_si128((__m128i *)(dest_pos + i), a1);
 194         }
 195 #elif 1
 196       for (; (int)i < (int)dest->row_pixels_size - 3; i += 4)
 197         {
 198           dest_pos[i + 0] = LINEAR_INTERPOLATE(cache[0][i + 0], cache[1][i + 0], y_ofs);
 199           dest_pos[i + 1] = LINEAR_INTERPOLATE(cache[0][i + 1], cache[1][i + 1], y_ofs);
 200           dest_pos[i + 2] = LINEAR_INTERPOLATE(cache[0][i + 2], cache[1][i + 2], y_ofs);
 201           dest_pos[i + 3] = LINEAR_INTERPOLATE(cache[0][i + 3], cache[1][i + 3], y_ofs);
 202         }
 203 #endif
 204       for (; i < dest->row_pixels_size; i++)
 205         dest_pos[i] = LINEAR_INTERPOLATE(cache[0][i], cache[1][i], y_ofs);
 206       dest_row += dest->row_size;
 207     }
 208 }
 209 #endif
 210
 211 static void
 212 IMAGE_SCALE_PREFIX(downsample_xy)(struct image *dest, struct image *src)
 213 {
 214   /* FIXME slow */
 215   byte *rsrc = src->pixels, *psrc;
 216   byte *rdest = dest->pixels, *pdest;
 217   u64 x_inc = ((u64)dest->cols << 32) / src->cols, x_pos;
 218   u64 y_inc = ((u64)dest->rows << 32) / src->rows, y_pos = 0;
 219   uint x_inc_frac = (u64)0xffffffffff / x_inc;
 220   uint y_inc_frac = (u64)0xffffffffff / y_inc;
 221   uint final_mul = ((u64)(x_inc >> 16) * (y_inc >> 16)) >> 16;
 222   uint buf_size = dest->cols * IMAGE_SCALE_CHANNELS;
 223   u32 buf[buf_size], *pbuf;
 224   buf_size *= sizeof(u32);
 225   bzero(buf, buf_size);
 226   for (uint rows_counter = src->rows; rows_counter--; )
 227     {
 228       pbuf = buf;
 229       psrc = rsrc;
 230       rsrc += src->row_size;
 231       x_pos = 0;
 232       y_pos += y_inc;
 233       if (y_pos <= 0x100000000)
 234         {
 235           for (uint cols_counter = src->cols; cols_counter--; )
 236             {
 237               x_pos += x_inc;
 238               if (x_pos <= 0x100000000)
 239                 {
 240                   pbuf[0] += psrc[0];
 241 #                 if IMAGE_SCALE_CHANNELS >= 2
 242                   pbuf[1] += psrc[1];
 243 #                 endif
 244 #                 if IMAGE_SCALE_CHANNELS >= 3
 245                   pbuf[2] += psrc[2];
 246 #                 endif
 247 #                 if IMAGE_SCALE_CHANNELS >= 4
 248                   pbuf[3] += psrc[3];
 249 #                 endif
 250                 }
 251               else
 252                 {
 253                   x_pos -= 0x100000000;
 254                   uint mul2 = (uint)(x_pos >> 16) * x_inc_frac;
 255                   uint mul1 = 0xffffff - mul2;
 256                   pbuf[0] += (psrc[0] * mul1) >> 24;
 257                   pbuf[0 + IMAGE_SCALE_CHANNELS] += (psrc[0] * mul2) >> 24;
 258 #                 if IMAGE_SCALE_CHANNELS >= 2
 259                   pbuf[1] += (psrc[1] * mul1) >> 24;
 260                   pbuf[1 + IMAGE_SCALE_CHANNELS] += (psrc[1] * mul2) >> 24;
 261 #                 endif
 262 #                 if IMAGE_SCALE_CHANNELS >= 3
 263                   pbuf[2] += (psrc[2] * mul1) >> 24;
 264                   pbuf[2 + IMAGE_SCALE_CHANNELS] += (psrc[2] * mul2) >> 24;
 265 #                 endif
 266 #                 if IMAGE_SCALE_CHANNELS >= 4
 267                   pbuf[3] += (psrc[3] * mul1) >> 24;
 268                   pbuf[3 + IMAGE_SCALE_CHANNELS] += (psrc[3] * mul2) >> 24;
 269 #                 endif
 270                   pbuf += IMAGE_SCALE_CHANNELS;
 271                 }
 272               psrc += IMAGE_SCALE_PIXEL_SIZE;
 273             }
 274         }
 275       else
 276         {
 277           y_pos -= 0x100000000;
 278           pdest = rdest;
 279           rdest += dest->row_size;
 280           uint mul2 = (uint)(y_pos >> 16) * y_inc_frac;
 281           uint mul1 = 0xffffff - mul2;
 282           uint a0 = 0;
 283 #         if IMAGE_SCALE_CHANNELS >= 2
 284           uint a1 = 0;
 285 #         endif
 286 #         if IMAGE_SCALE_CHANNELS >= 3
 287           uint a2 = 0;
 288 #         endif
 289 #         if IMAGE_SCALE_CHANNELS >= 4
 290           uint a3 = 0;
 291 #         endif
 292           for (uint cols_counter = src->cols; cols_counter--; )
 293             {
 294               x_pos += x_inc;
 295               if (x_pos <= 0x100000000)
 296                 {
 297                   pbuf[0] += ((psrc[0] * mul1) >> 24);
 298                   a0 += (psrc[0] * mul2) >> 24;
 299 #                 if IMAGE_SCALE_CHANNELS >= 2
 300                   pbuf[1] += ((psrc[1] * mul1) >> 24);
 301                   a1 += (psrc[1] * mul2) >> 24;
 302 #                 endif
 303 #                 if IMAGE_SCALE_CHANNELS >= 3
 304                   pbuf[2] += ((psrc[2] * mul1) >> 24);
 305                   a2 += (psrc[2] * mul2) >> 24;
 306 #                 endif
 307 #                 if IMAGE_SCALE_CHANNELS >= 4
 308                   pbuf[3] += ((psrc[3] * mul1) >> 24);
 309                   a3 += (psrc[3] * mul2) >> 24;
 310 #                 endif
 311                 }
 312               else
 313                 {
 314                   x_pos -= 0x100000000;
 315                   uint mul4 = (uint)(x_pos >> 16) * x_inc_frac;
 316                   uint mul3 = 0xffffff - mul4;
 317                   uint mul13 = ((u64)mul1 * mul3) >> 24;
 318                   uint mul23 = ((u64)mul2 * mul3) >> 24;
 319                   uint mul14 = ((u64)mul1 * mul4) >> 24;
 320                   uint mul24 = ((u64)mul2 * mul4) >> 24;
 321                   pdest[0] = ((((psrc[0] * mul13) >> 24) + pbuf[0]) * final_mul) >> 16;
 322                   pbuf[0] = ((psrc[0] * mul23) >> 24) + a0;
 323                   pbuf[0 + IMAGE_SCALE_CHANNELS] += ((psrc[0 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24);
 324                   a0 = ((psrc[0 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24);
 325 #                 if IMAGE_SCALE_CHANNELS >= 2
 326                   pdest[1] = ((((psrc[1] * mul13) >> 24) + pbuf[1]) * final_mul) >> 16;
 327                   pbuf[1] = ((psrc[1] * mul23) >> 24) + a1;
 328                   pbuf[1 + IMAGE_SCALE_CHANNELS] += ((psrc[1 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24);
 329                   a1 = ((psrc[1 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24);
 330 #                 endif
 331 #                 if IMAGE_SCALE_CHANNELS >= 3
 332                   pdest[2] = ((((psrc[2] * mul13) >> 24) + pbuf[2]) * final_mul) >> 16;
 333                   pbuf[2] = ((psrc[2] * mul23) >> 24) + a2;
 334                   pbuf[2 + IMAGE_SCALE_CHANNELS] += ((psrc[2 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24);
 335                   a2 = ((psrc[2 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24);
 336 #                 endif
 337 #                 if IMAGE_SCALE_CHANNELS >= 4
 338                   pdest[3] = ((((psrc[3] * mul13) >> 24) + pbuf[3]) * final_mul) >> 16;
 339                   pbuf[3] = ((psrc[3] * mul23) >> 24) + a3;
 340                   pbuf[3 + IMAGE_SCALE_CHANNELS] += ((psrc[3 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24);
 341                   a3 = ((psrc[3 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24);
 342 #                 endif
 343                   pbuf += IMAGE_SCALE_CHANNELS;
 344                   pdest += IMAGE_SCALE_PIXEL_SIZE;
 345                 }
 346               psrc += IMAGE_SCALE_PIXEL_SIZE;
 347             }
 348           pdest[0] = (pbuf[0] * final_mul) >> 16;
 349           pbuf[0] = a0;
 350 #         if IMAGE_SCALE_CHANNELS >= 2
 351           pdest[1] = (pbuf[1] * final_mul) >> 16;
 352           pbuf[1] = a1;
 353 #         endif
 354 #         if IMAGE_SCALE_CHANNELS >= 3
 355           pdest[2] = (pbuf[2] * final_mul) >> 16;
 356           pbuf[2] = a2;
 357 #         endif
 358 #         if IMAGE_SCALE_CHANNELS >= 4
 359           pdest[3] = (pbuf[3] * final_mul) >> 16;
 360           pbuf[3] = a3;
 361 #         endif
 362         }
 363     }
 364   pdest = rdest;
 365   pbuf = buf;
 366   for (uint cols_counter = dest->cols; cols_counter--; )
 367     {
 368       pdest[0] = (pbuf[0] * final_mul) >> 16;
 369 #     if IMAGE_SCALE_CHANNELS >= 2
 370       pdest[1] = (pbuf[1] * final_mul) >> 16;
 371 #     endif
 372 #     if IMAGE_SCALE_CHANNELS >= 3
 373       pdest[2] = (pbuf[2] * final_mul) >> 16;
 374 #     endif
 375 #     if IMAGE_SCALE_CHANNELS >= 4
 376       pdest[3] = (pbuf[3] * final_mul) >> 16;
 377 #     endif
 378       pbuf += IMAGE_SCALE_CHANNELS;
 379       pdest += IMAGE_SCALE_PIXEL_SIZE;
 380     }
 381 }
 382
 383 #undef IMAGE_SCALE_PREFIX
 384 #undef IMAGE_SCALE_PIXEL_SIZE
 385 #undef IMAGE_SCALE_CHANNELS