images/scale.c

   1 /*
   2  *      Image Library -- Image scaling algorithms
   3  *
   4  *      (c) 2006 Pavel Charvat <pchar@ucw.cz>
   5  *
   6  *      This software may be freely distributed and used according to the terms
   7  *      of the GNU Lesser General Public License.
   8  */
   9
  10 #undef LOCAL_DEBUG
  11
  12 #include "lib/lib.h"
  13 #include "images/images.h"
  14 #include "images/error.h"
  15 #include "images/math.h"
  16
  17 #include <string.h>
  18
  19 #ifdef __SSE2__
  20 #include <emmintrin.h>
  21 #endif
  22
  23 #define LINEAR_INTERPOLATE(a, b, t) (((int)((a) << 16) + (int)(t) * ((int)(b) - (int)(a)) + 0x8000) >> 16)
  24
  25 /* Generate optimized code for various pixel formats */
  26
  27 #define IMAGE_SCALE_PREFIX(x) image_scale_1_##x
  28 #define IMAGE_SCALE_PIXEL_SIZE 1
  29 #include "images/scale-gen.h"
  30
  31 #define IMAGE_SCALE_PREFIX(x) image_scale_2_##x
  32 #define IMAGE_SCALE_PIXEL_SIZE 2
  33 #include "images/scale-gen.h"
  34
  35 #define IMAGE_SCALE_PREFIX(x) image_scale_3_##x
  36 #define IMAGE_SCALE_PIXEL_SIZE 3
  37 #include "images/scale-gen.h"
  38
  39 #define IMAGE_SCALE_PREFIX(x) image_scale_4_##x
  40 #define IMAGE_SCALE_PIXEL_SIZE 4
  41 #include "images/scale-gen.h"
  42
  43 /* Simple "nearest neighbour" algorithm */
  44
  45 static void
  46 image_scale_nearest_xy(struct image *dest, struct image *src)
  47 {
  48   switch (src->pixel_size)
  49     {
  50       case 1:
  51         image_scale_1_nearest_xy(dest, src);
  52         return;
  53       case 2:
  54         image_scale_2_nearest_xy(dest, src);
  55         return;
  56       case 3:
  57         image_scale_3_nearest_xy(dest, src);
  58         return;
  59       case 4:
  60         image_scale_4_nearest_xy(dest, src);
  61         return;
  62       default:
  63         ASSERT(0);
  64     }
  65 }
  66
  67 static inline void
  68 image_scale_nearest_x(struct image *dest, struct image *src)
  69 {
  70   image_scale_nearest_xy(dest, src);
  71 }
  72
  73 static void
  74 image_scale_nearest_y(struct image *dest, struct image *src)
  75 {
  76   uns y_inc = (src->rows << 16) / dest->rows;
  77   uns y_pos = y_inc >> 1;
  78   byte *dest_pos = dest->pixels;
  79   for (uns row_counter = dest->rows; row_counter--; )
  80     {
  81       byte *src_pos = src->pixels + (y_pos >> 16) * src->row_size;
  82       y_pos += y_inc;
  83       memcpy(dest_pos, src_pos, dest->row_pixels_size);
  84       dest_pos += dest->row_size;
  85     }
  86 }
  87
  88 /* Bilinear filter */
  89
  90 UNUSED static void
  91 image_scale_linear_y(struct image *dest, struct image *src)
  92 {
  93   byte *dest_row = dest->pixels;
  94   /* Handle problematic special case */
  95   if (src->rows == 1)
  96     {
  97       for (uns y_counter = dest->rows; y_counter--; dest_row += dest->row_size)
  98         memcpy(dest_row, src->pixels, src->row_pixels_size);
  99       return;
 100     }
 101   /* Initialize the main loop */
 102   uns y_inc  = ((src->rows - 1) << 16) / (dest->rows - 1), y_pos = 0;
 103 #ifdef __SSE2__
 104   __m128i zero = _mm_setzero_si128();
 105 #endif
 106   /* Main loop */
 107   for (uns y_counter = dest->rows; --y_counter; )
 108     {
 109       uns coef = y_pos & 0xffff;
 110       byte *src_row_1 = src->pixels + (y_pos >> 16) * src->row_size;
 111       byte *src_row_2 = src_row_1 + src->row_size;
 112       uns i = 0;
 113 #ifdef __SSE2__
 114       /* SSE2 */
 115       __m128i sse_coef = _mm_set1_epi16(coef >> 9);
 116       for (; (int)i < (int)dest->row_pixels_size - 15; i += 16)
 117         {
 118           __m128i a2 = _mm_loadu_si128((__m128i *)(src_row_1 + i));
 119           __m128i a1 = _mm_unpacklo_epi8(a2, zero);
 120           a2 = _mm_unpackhi_epi8(a2, zero);
 121           __m128i b2 = _mm_loadu_si128((__m128i *)(src_row_2 + i));
 122           __m128i b1 = _mm_unpacklo_epi8(b2, zero);
 123           b2 = _mm_unpackhi_epi8(b2, zero);
 124           b1 = _mm_sub_epi16(b1, a1);
 125           b2 = _mm_sub_epi16(b2, a2);
 126           a1 = _mm_slli_epi16(a1, 7);
 127           a2 = _mm_slli_epi16(a2, 7);
 128           b1 = _mm_mullo_epi16(b1, sse_coef);
 129           b2 = _mm_mullo_epi16(b2, sse_coef);
 130           a1 = _mm_add_epi16(a1, b1);
 131           a2 = _mm_add_epi16(a2, b2);
 132           a1 = _mm_srli_epi16(a1, 7);
 133           a2 = _mm_srli_epi16(a2, 7);
 134           a1 = _mm_packus_epi16(a1, a2);
 135           _mm_storeu_si128((__m128i *)(dest_row + i), a1);
 136         }
 137 #endif
 138       /* Unrolled loop using general-purpose registers */
 139       for (; (int)i < (int)dest->row_pixels_size - 3; i += 4)
 140         {
 141           dest_row[i + 0] = LINEAR_INTERPOLATE(src_row_1[i + 0], src_row_2[i + 0], coef);
 142           dest_row[i + 1] = LINEAR_INTERPOLATE(src_row_1[i + 1], src_row_2[i + 1], coef);
 143           dest_row[i + 2] = LINEAR_INTERPOLATE(src_row_1[i + 2], src_row_2[i + 2], coef);
 144           dest_row[i + 3] = LINEAR_INTERPOLATE(src_row_1[i + 3], src_row_2[i + 3], coef);
 145         }
 146       /* Remaining columns */
 147       for (; i < dest->row_pixels_size; i++)
 148         dest_row[i] = LINEAR_INTERPOLATE(src_row_1[i], src_row_2[i], coef);
 149       dest_row += dest->row_size;
 150       y_pos += y_inc;
 151     }
 152   /* Always copy the last row - faster and also handle "y_pos == dest->rows * 0x10000" overflow */
 153   memcpy(dest_row, src->pixels + src->image_size - src->row_size, src->row_pixels_size);
 154 }
 155
 156 /* Box filter */
 157
 158 static void
 159 image_scale_downsample_xy(struct image *dest, struct image *src)
 160 {
 161   switch (src->pixel_size)
 162     {
 163       case 1:
 164         image_scale_1_downsample_xy(dest, src);
 165         return;
 166       case 2:
 167         image_scale_2_downsample_xy(dest, src);
 168         return;
 169       case 3:
 170         image_scale_3_downsample_xy(dest, src);
 171         return;
 172       case 4:
 173         image_scale_4_downsample_xy(dest, src);
 174         return;
 175       default:
 176         ASSERT(0);
 177     }
 178 }
 179
 180 /* General routine
 181  * FIXME: customizable; implement at least bilinear and bicubic filters */
 182
 183 int
 184 image_scale(struct image_context *ctx, struct image *dest, struct image *src)
 185 {
 186   if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT))
 187     {
 188       IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Different pixel formats not supported.");
 189       return 0;
 190     }
 191   if (dest->cols == src->cols)
 192     {
 193       if (dest->rows == src->rows)
 194         {
 195           /* No scale, copy only */
 196           image_scale_nearest_y(dest, src);
 197           return 1;
 198         }
 199       else if (dest->rows < src->rows)
 200         {
 201           /* Downscale vertically */
 202           image_scale_downsample_xy(dest, src);
 203           return 1;
 204         }
 205       else
 206         {
 207           /* Upscale vertically */
 208           image_scale_nearest_y(dest, src);
 209           return 1;
 210         }
 211     }
 212   else if (dest->rows == src->rows)
 213     {
 214       if (dest->cols < src->cols)
 215         {
 216           /* Downscale horizontally */
 217           image_scale_downsample_xy(dest, src);
 218           return 1;
 219         }
 220       else
 221         {
 222           /* Upscale horizontally */
 223           image_scale_nearest_x(dest, src);
 224           return 1;
 225         }
 226     }
 227   else
 228     {
 229       if (dest->cols <= src->cols && src->cols <= dest->cols)
 230         {
 231           /* Downscale in both dimensions */
 232           image_scale_downsample_xy(dest, src);
 233           return 1;
 234         }
 235       else
 236         {
 237           image_scale_nearest_xy(dest, src);
 238           return 1;
 239         }
 240     }
 241 }
 242
 243 void
 244 image_dimensions_fit_to_box(u32 *cols, u32 *rows, u32 max_cols, u32 max_rows, uns upsample)
 245 {
 246   ASSERT(image_dimensions_valid(*cols, *rows));
 247   ASSERT(image_dimensions_valid(max_cols, max_rows));
 248   if (*cols <= max_cols && *rows <= max_rows)
 249     {
 250       if (!upsample)
 251         return;
 252       if (max_cols * *rows > max_rows * *cols)
 253         {
 254           *cols = *cols * max_rows / *rows;
 255           *cols = MIN(*cols, max_cols);
 256           *rows = max_rows;
 257         }
 258       else
 259         {
 260           *rows = *rows * max_cols / *cols;
 261           *rows = MIN(*rows, max_rows);
 262           *cols = max_cols;
 263         }
 264     }
 265   else if (*cols <= max_cols)
 266     goto down_cols;
 267   else if (*rows <= max_rows || max_rows * *cols > max_cols * *rows)
 268     goto down_rows;
 269 down_cols:
 270   *cols = *cols * max_rows / *rows;
 271   *cols = MAX(*cols, 1);
 272   *rows = max_rows;
 273   return;
 274 down_rows:
 275   *rows = *rows * max_cols / *cols;
 276   *rows = MAX(*rows, 1);
 277   *cols = max_cols;
 278 }