Released as 6.5.11.

[libucw.git] / images / scale.c
diff --git a/images/scale.c b/images/scale.c

index 2c351cbdcb6714d47e4a4f74a69bff680dbea3d3..2695696fffba5d3073ce7b1b62d9d07055f640d1 100644 (file)
--- a/images/scale.c
+++ b/images/scale.c
@@ -9,67 +9,242 @@
  
  #undef LOCAL_DEBUG
  
-#include "lib/lib.h"
-#include "images/images.h"
+#include <ucw/lib.h>
+#include <images/images.h>
+#include <images/error.h>
+#include <images/math.h>
+
  #include <string.h>
  
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#define LINEAR_INTERPOLATE(a, b, t) (((int)((a) << 16) + (int)(t) * ((int)(b) - (int)(a)) + 0x8000) >> 16)
+
+/* Generate optimized code for various pixel formats */
+
  #define IMAGE_SCALE_PREFIX(x) image_scale_1_##x
  #define IMAGE_SCALE_PIXEL_SIZE 1
-#include "images/scale-gen.h"
+#include <images/scale-gen.h>
  
  #define IMAGE_SCALE_PREFIX(x) image_scale_2_##x
  #define IMAGE_SCALE_PIXEL_SIZE 2
-#include "images/scale-gen.h"
+#include <images/scale-gen.h>
  
  #define IMAGE_SCALE_PREFIX(x) image_scale_3_##x
  #define IMAGE_SCALE_PIXEL_SIZE 3
-#include "images/scale-gen.h"
+#include <images/scale-gen.h>
  
  #define IMAGE_SCALE_PREFIX(x) image_scale_4_##x
  #define IMAGE_SCALE_PIXEL_SIZE 4
-#include "images/scale-gen.h"
+#include <images/scale-gen.h>
  
-int
-image_scale(struct image_thread *it, struct image *dest, struct image *src)
+/* Simple "nearest neighbour" algorithm */
+
+static void
+image_scale_nearest_xy(struct image *dest, struct image *src)
  {
-  if (src->cols < dest->cols || src->rows < dest->rows)
+  switch (src->pixel_size)
      {
-      image_thread_err(it, IMAGE_ERR_INVALID_DIMENSIONS, "Upsampling not supported.");
-      return 0;
+      case 1:
+       image_scale_1_nearest_xy(dest, src);
+       return;
+      case 2:
+       image_scale_2_nearest_xy(dest, src);
+       return;
+      case 3:
+       image_scale_3_nearest_xy(dest, src);
+       return;
+      case 4:
+       image_scale_4_nearest_xy(dest, src);
+       return;
+      default:
+       ASSERT(0);
      }
-  if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT))
+}
+
+static inline void
+image_scale_nearest_x(struct image *dest, struct image *src)
+{
+  image_scale_nearest_xy(dest, src);
+}
+
+static void
+image_scale_nearest_y(struct image *dest, struct image *src)
+{
+  uint y_inc = (src->rows << 16) / dest->rows;
+  uint y_pos = y_inc >> 1;
+  byte *dest_pos = dest->pixels;
+  for (uint row_counter = dest->rows; row_counter--; )
      {
-      image_thread_err(it, IMAGE_ERR_INVALID_PIXEL_FORMAT, "Different pixel format not supported.");
-      return 0;
+      byte *src_pos = src->pixels + (y_pos >> 16) * src->row_size;
+      y_pos += y_inc;
+      memcpy(dest_pos, src_pos, dest->row_pixels_size);
+      dest_pos += dest->row_size;
      }
+}
+
+/* Bilinear filter */
+
+UNUSED static void
+image_scale_linear_y(struct image *dest, struct image *src)
+{
+  byte *dest_row = dest->pixels;
+  /* Handle problematic special case */
+  if (src->rows == 1)
+    {
+      for (uint y_counter = dest->rows; y_counter--; dest_row += dest->row_size)
+        memcpy(dest_row, src->pixels, src->row_pixels_size);
+      return;
+    }
+  /* Initialize the main loop */
+  uint y_inc  = ((src->rows - 1) << 16) / (dest->rows - 1), y_pos = 0;
+#ifdef __SSE2__
+  __m128i zero = _mm_setzero_si128();
+#endif
+  /* Main loop */
+  for (uint y_counter = dest->rows; --y_counter; )
+    {
+      uint coef = y_pos & 0xffff;
+      byte *src_row_1 = src->pixels + (y_pos >> 16) * src->row_size;
+      byte *src_row_2 = src_row_1 + src->row_size;
+      uint i = 0;
+#ifdef __SSE2__
+      /* SSE2 */
+      __m128i sse_coef = _mm_set1_epi16(coef >> 9);
+      for (; (int)i < (int)dest->row_pixels_size - 15; i += 16)
+        {
+         __m128i a2 = _mm_loadu_si128((__m128i *)(src_row_1 + i));
+         __m128i a1 = _mm_unpacklo_epi8(a2, zero);
+         a2 = _mm_unpackhi_epi8(a2, zero);
+         __m128i b2 = _mm_loadu_si128((__m128i *)(src_row_2 + i));
+         __m128i b1 = _mm_unpacklo_epi8(b2, zero);
+         b2 = _mm_unpackhi_epi8(b2, zero);
+         b1 = _mm_sub_epi16(b1, a1);
+         b2 = _mm_sub_epi16(b2, a2);
+         a1 = _mm_slli_epi16(a1, 7);
+         a2 = _mm_slli_epi16(a2, 7);
+         b1 = _mm_mullo_epi16(b1, sse_coef);
+         b2 = _mm_mullo_epi16(b2, sse_coef);
+         a1 = _mm_add_epi16(a1, b1);
+         a2 = _mm_add_epi16(a2, b2);
+         a1 = _mm_srli_epi16(a1, 7);
+         a2 = _mm_srli_epi16(a2, 7);
+         a1 = _mm_packus_epi16(a1, a2);
+         _mm_storeu_si128((__m128i *)(dest_row + i), a1);
+       }
+#endif
+      /* Unrolled loop using general-purpose registers */
+      for (; (int)i < (int)dest->row_pixels_size - 3; i += 4)
+        {
+         dest_row[i + 0] = LINEAR_INTERPOLATE(src_row_1[i + 0], src_row_2[i + 0], coef);
+         dest_row[i + 1] = LINEAR_INTERPOLATE(src_row_1[i + 1], src_row_2[i + 1], coef);
+         dest_row[i + 2] = LINEAR_INTERPOLATE(src_row_1[i + 2], src_row_2[i + 2], coef);
+         dest_row[i + 3] = LINEAR_INTERPOLATE(src_row_1[i + 3], src_row_2[i + 3], coef);
+       }
+      /* Remaining columns */
+      for (; i < dest->row_pixels_size; i++)
+       dest_row[i] = LINEAR_INTERPOLATE(src_row_1[i], src_row_2[i], coef);
+      dest_row += dest->row_size;
+      y_pos += y_inc;
+    }
+  /* Always copy the last row - faster and also handle "y_pos == dest->rows * 0x10000" overflow */
+  memcpy(dest_row, src->pixels + src->image_size - src->row_size, src->row_pixels_size);
+}
+
+/* Box filter */
+
+static void
+image_scale_downsample_xy(struct image *dest, struct image *src)
+{
    switch (src->pixel_size)
      {
-      /* Gray */
        case 1:
-       image_scale_1_downsample(dest, src);
-       return 1;
-      /* GrayA */
+       image_scale_1_downsample_xy(dest, src);
+       return;
        case 2:
-       image_scale_2_downsample(dest, src);
-       return 1;
-      /* RGB */
+       image_scale_2_downsample_xy(dest, src);
+       return;
        case 3:
-       image_scale_3_downsample(dest, src);
-       return 1;
-      /* RGBA or aligned RGB */
+       image_scale_3_downsample_xy(dest, src);
+       return;
        case 4:
-       image_scale_4_downsample(dest, src);
-       return 1;
+       image_scale_4_downsample_xy(dest, src);
+       return;
        default:
         ASSERT(0);
      }
  }
  
+/* General routine
+ * FIXME: customizable; implement at least bilinear and bicubic filters */
+
+int
+image_scale(struct image_context *ctx, struct image *dest, struct image *src)
+{
+  if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT))
+    {
+      IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Different pixel formats not supported.");
+      return 0;
+    }
+  if (dest->cols == src->cols)
+    {
+      if (dest->rows == src->rows)
+        {
+         /* No scale, copy only */
+         image_scale_nearest_y(dest, src);
+         return 1;
+       }
+      else if (dest->rows < src->rows)
+        {
+         /* Downscale vertically */
+         image_scale_downsample_xy(dest, src);
+         return 1;
+       }
+      else
+        {
+         /* Upscale vertically */
+         image_scale_nearest_y(dest, src);
+         return 1;
+       }
+    }
+  else if (dest->rows == src->rows)
+    {
+      if (dest->cols < src->cols)
+        {
+          /* Downscale horizontally */
+          image_scale_downsample_xy(dest, src);
+          return 1;
+       }
+      else
+        {
+         /* Upscale horizontally */
+         image_scale_nearest_x(dest, src);
+         return 1;
+       }
+    }
+  else
+    {
+      if (dest->cols <= src->cols && dest->rows <= src->rows)
+        {
+         /* Downscale in both dimensions */
+          image_scale_downsample_xy(dest, src);
+         return 1;
+       }
+      else
+        {
+         image_scale_nearest_xy(dest, src);
+         return 1;
+       }
+    }
+}
+
  void
-image_dimensions_fit_to_box(u32 *cols, u32 *rows, u32 max_cols, u32 max_rows, uns upsample)
+image_dimensions_fit_to_box(uint *cols, uint *rows, uint max_cols, uint max_rows, uint upsample)
  {
-  ASSERT(*cols && *rows && *cols <= IMAGE_MAX_SIZE && *rows <= IMAGE_MAX_SIZE);
-  ASSERT(max_cols && max_rows && max_cols <= IMAGE_MAX_SIZE && max_rows <= IMAGE_MAX_SIZE);
+  ASSERT(image_dimensions_valid(*cols, *rows));
+  ASSERT(image_dimensions_valid(max_cols, max_rows));
    if (*cols <= max_cols && *rows <= max_rows)
      {
        if (!upsample)
@@ -96,7 +271,7 @@ down_cols:
    *cols = MAX(*cols, 1);
    *rows = max_rows;
    return;
-down_rows:  
+down_rows:
    *rows = *rows * max_cols / *cols;
    *rows = MAX(*rows, 1);
    *cols = max_cols;