X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=images%2Fscale.c;h=41fda01d5be4c158b597c0fd1cbac4cec7830710;hb=09e7fe5641b94148d998a1b620bf694f353cb17b;hp=f617b01e49589678c3c11957faf7c6bf921f2123;hpb=966305189fb3c543170117da57f110a8e9851ac2;p=libucw.git diff --git a/images/scale.c b/images/scale.c index f617b01e..41fda01d 100644 --- a/images/scale.c +++ b/images/scale.c @@ -9,10 +9,21 @@ #undef LOCAL_DEBUG -#include "lib/lib.h" +#include "ucw/lib.h" #include "images/images.h" +#include "images/error.h" +#include "images/math.h" + #include +#ifdef __SSE2__ +#include +#endif + +#define LINEAR_INTERPOLATE(a, b, t) (((int)((a) << 16) + (int)(t) * ((int)(b) - (int)(a)) + 0x8000) >> 16) + +/* Generate optimized code for various pixel formats */ + #define IMAGE_SCALE_PREFIX(x) image_scale_1_##x #define IMAGE_SCALE_PIXEL_SIZE 1 #include "images/scale-gen.h" @@ -29,52 +40,216 @@ #define IMAGE_SCALE_PIXEL_SIZE 4 #include "images/scale-gen.h" -int -image_scale(struct image_thread *it, struct image *dest, struct image *src) +/* Simple "nearest neighbour" algorithm */ + +static void +image_scale_nearest_xy(struct image *dest, struct image *src) { - if (src->cols < dest->cols || src->rows < dest->rows) + switch (src->pixel_size) { - image_thread_err(it, IMAGE_ERR_INVALID_DIMENSIONS, "Upsampling not supported."); - return 0; + case 1: + image_scale_1_nearest_xy(dest, src); + return; + case 2: + image_scale_2_nearest_xy(dest, src); + return; + case 3: + image_scale_3_nearest_xy(dest, src); + return; + case 4: + image_scale_4_nearest_xy(dest, src); + return; + default: + ASSERT(0); } - if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT)) +} + +static inline void +image_scale_nearest_x(struct image *dest, struct image *src) +{ + image_scale_nearest_xy(dest, src); +} + +static void +image_scale_nearest_y(struct image *dest, struct image *src) +{ + uns y_inc = (src->rows << 16) / dest->rows; + uns y_pos = y_inc >> 1; + byte *dest_pos = dest->pixels; + for (uns row_counter = dest->rows; row_counter--; ) { - image_thread_err(it, IMAGE_ERR_INVALID_PIXEL_FORMAT, "Different pixel format not supported."); - return 0; + byte *src_pos = src->pixels + (y_pos >> 16) * src->row_size; + y_pos += y_inc; + memcpy(dest_pos, src_pos, dest->row_pixels_size); + dest_pos += dest->row_size; + } +} + +/* Bilinear filter */ + +UNUSED static void +image_scale_linear_y(struct image *dest, struct image *src) +{ + byte *dest_row = dest->pixels; + /* Handle problematic special case */ + if (src->rows == 1) + { + for (uns y_counter = dest->rows; y_counter--; dest_row += dest->row_size) + memcpy(dest_row, src->pixels, src->row_pixels_size); + return; + } + /* Initialize the main loop */ + uns y_inc = ((src->rows - 1) << 16) / (dest->rows - 1), y_pos = 0; +#ifdef __SSE2__ + __m128i zero = _mm_setzero_si128(); +#endif + /* Main loop */ + for (uns y_counter = dest->rows; --y_counter; ) + { + uns coef = y_pos & 0xffff; + byte *src_row_1 = src->pixels + (y_pos >> 16) * src->row_size; + byte *src_row_2 = src_row_1 + src->row_size; + uns i = 0; +#ifdef __SSE2__ + /* SSE2 */ + __m128i sse_coef = _mm_set1_epi16(coef >> 9); + for (; (int)i < (int)dest->row_pixels_size - 15; i += 16) + { + __m128i a2 = _mm_loadu_si128((__m128i *)(src_row_1 + i)); + __m128i a1 = _mm_unpacklo_epi8(a2, zero); + a2 = _mm_unpackhi_epi8(a2, zero); + __m128i b2 = _mm_loadu_si128((__m128i *)(src_row_2 + i)); + __m128i b1 = _mm_unpacklo_epi8(b2, zero); + b2 = _mm_unpackhi_epi8(b2, zero); + b1 = _mm_sub_epi16(b1, a1); + b2 = _mm_sub_epi16(b2, a2); + a1 = _mm_slli_epi16(a1, 7); + a2 = _mm_slli_epi16(a2, 7); + b1 = _mm_mullo_epi16(b1, sse_coef); + b2 = _mm_mullo_epi16(b2, sse_coef); + a1 = _mm_add_epi16(a1, b1); + a2 = _mm_add_epi16(a2, b2); + a1 = _mm_srli_epi16(a1, 7); + a2 = _mm_srli_epi16(a2, 7); + a1 = _mm_packus_epi16(a1, a2); + _mm_storeu_si128((__m128i *)(dest_row + i), a1); + } +#endif + /* Unrolled loop using general-purpose registers */ + for (; (int)i < (int)dest->row_pixels_size - 3; i += 4) + { + dest_row[i + 0] = LINEAR_INTERPOLATE(src_row_1[i + 0], src_row_2[i + 0], coef); + dest_row[i + 1] = LINEAR_INTERPOLATE(src_row_1[i + 1], src_row_2[i + 1], coef); + dest_row[i + 2] = LINEAR_INTERPOLATE(src_row_1[i + 2], src_row_2[i + 2], coef); + dest_row[i + 3] = LINEAR_INTERPOLATE(src_row_1[i + 3], src_row_2[i + 3], coef); + } + /* Remaining columns */ + for (; i < dest->row_pixels_size; i++) + dest_row[i] = LINEAR_INTERPOLATE(src_row_1[i], src_row_2[i], coef); + dest_row += dest->row_size; + y_pos += y_inc; } + /* Always copy the last row - faster and also handle "y_pos == dest->rows * 0x10000" overflow */ + memcpy(dest_row, src->pixels + src->image_size - src->row_size, src->row_pixels_size); +} + +/* Box filter */ + +static void +image_scale_downsample_xy(struct image *dest, struct image *src) +{ switch (src->pixel_size) { - /* Gray */ case 1: - image_scale_1_downsample(dest, src); - return 1; - /* GrayA */ + image_scale_1_downsample_xy(dest, src); + return; case 2: - image_scale_2_downsample(dest, src); - return 1; - /* RGB */ + image_scale_2_downsample_xy(dest, src); + return; case 3: - image_scale_3_downsample(dest, src); - return 1; - /* RGBA or aligned RGB */ + image_scale_3_downsample_xy(dest, src); + return; case 4: - image_scale_4_downsample(dest, src); - return 1; + image_scale_4_downsample_xy(dest, src); + return; default: ASSERT(0); } } +/* General routine + * FIXME: customizable; implement at least bilinear and bicubic filters */ + +int +image_scale(struct image_context *ctx, struct image *dest, struct image *src) +{ + if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT)) + { + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Different pixel formats not supported."); + return 0; + } + if (dest->cols == src->cols) + { + if (dest->rows == src->rows) + { + /* No scale, copy only */ + image_scale_nearest_y(dest, src); + return 1; + } + else if (dest->rows < src->rows) + { + /* Downscale vertically */ + image_scale_downsample_xy(dest, src); + return 1; + } + else + { + /* Upscale vertically */ + image_scale_nearest_y(dest, src); + return 1; + } + } + else if (dest->rows == src->rows) + { + if (dest->cols < src->cols) + { + /* Downscale horizontally */ + image_scale_downsample_xy(dest, src); + return 1; + } + else + { + /* Upscale horizontally */ + image_scale_nearest_x(dest, src); + return 1; + } + } + else + { + if (dest->cols <= src->cols && dest->rows <= src->rows) + { + /* Downscale in both dimensions */ + image_scale_downsample_xy(dest, src); + return 1; + } + else + { + image_scale_nearest_xy(dest, src); + return 1; + } + } +} + void -image_dimensions_fit_to_box(u32 *cols, u32 *rows, u32 max_cols, u32 max_rows, uns upsample) +image_dimensions_fit_to_box(uns *cols, uns *rows, uns max_cols, uns max_rows, uns upsample) { - ASSERT(*cols && *rows && *cols <= 0xffff && *rows <= 0xffff); - ASSERT(max_cols && max_rows && max_cols <= 0xffff && max_rows <= 0xffff); + ASSERT(image_dimensions_valid(*cols, *rows)); + ASSERT(image_dimensions_valid(max_cols, max_rows)); if (*cols <= max_cols && *rows <= max_rows) { if (!upsample) return; - if (max_cols / *cols > max_rows / *rows) + if (max_cols * *rows > max_rows * *cols) { *cols = *cols * max_rows / *rows; *cols = MIN(*cols, max_cols); @@ -96,9 +271,8 @@ down_cols: *cols = MAX(*cols, 1); *rows = max_rows; return; -down_rows: +down_rows: *rows = *rows * max_cols / *cols; *rows = MAX(*rows, 1); *cols = max_cols; - return; }