2 * Image Library -- Image scaling algorithms
4 * (c) 2006 Pavel Charvat <pchar@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
13 #include <images/images.h>
14 #include <images/error.h>
15 #include <images/math.h>
20 #include <emmintrin.h>
23 #define LINEAR_INTERPOLATE(a, b, t) (((int)((a) << 16) + (int)(t) * ((int)(b) - (int)(a)) + 0x8000) >> 16)
25 /* Generate optimized code for various pixel formats */
27 #define IMAGE_SCALE_PREFIX(x) image_scale_1_##x
28 #define IMAGE_SCALE_PIXEL_SIZE 1
29 #include <images/scale-gen.h>
31 #define IMAGE_SCALE_PREFIX(x) image_scale_2_##x
32 #define IMAGE_SCALE_PIXEL_SIZE 2
33 #include <images/scale-gen.h>
35 #define IMAGE_SCALE_PREFIX(x) image_scale_3_##x
36 #define IMAGE_SCALE_PIXEL_SIZE 3
37 #include <images/scale-gen.h>
39 #define IMAGE_SCALE_PREFIX(x) image_scale_4_##x
40 #define IMAGE_SCALE_PIXEL_SIZE 4
41 #include <images/scale-gen.h>
43 /* Simple "nearest neighbour" algorithm */
46 image_scale_nearest_xy(struct image *dest, struct image *src)
48 switch (src->pixel_size)
51 image_scale_1_nearest_xy(dest, src);
54 image_scale_2_nearest_xy(dest, src);
57 image_scale_3_nearest_xy(dest, src);
60 image_scale_4_nearest_xy(dest, src);
68 image_scale_nearest_x(struct image *dest, struct image *src)
70 image_scale_nearest_xy(dest, src);
74 image_scale_nearest_y(struct image *dest, struct image *src)
76 uint y_inc = (src->rows << 16) / dest->rows;
77 uint y_pos = y_inc >> 1;
78 byte *dest_pos = dest->pixels;
79 for (uint row_counter = dest->rows; row_counter--; )
81 byte *src_pos = src->pixels + (y_pos >> 16) * src->row_size;
83 memcpy(dest_pos, src_pos, dest->row_pixels_size);
84 dest_pos += dest->row_size;
91 image_scale_linear_y(struct image *dest, struct image *src)
93 byte *dest_row = dest->pixels;
94 /* Handle problematic special case */
97 for (uint y_counter = dest->rows; y_counter--; dest_row += dest->row_size)
98 memcpy(dest_row, src->pixels, src->row_pixels_size);
101 /* Initialize the main loop */
102 uint y_inc = ((src->rows - 1) << 16) / (dest->rows - 1), y_pos = 0;
104 __m128i zero = _mm_setzero_si128();
107 for (uint y_counter = dest->rows; --y_counter; )
109 uint coef = y_pos & 0xffff;
110 byte *src_row_1 = src->pixels + (y_pos >> 16) * src->row_size;
111 byte *src_row_2 = src_row_1 + src->row_size;
115 __m128i sse_coef = _mm_set1_epi16(coef >> 9);
116 for (; (int)i < (int)dest->row_pixels_size - 15; i += 16)
118 __m128i a2 = _mm_loadu_si128((__m128i *)(src_row_1 + i));
119 __m128i a1 = _mm_unpacklo_epi8(a2, zero);
120 a2 = _mm_unpackhi_epi8(a2, zero);
121 __m128i b2 = _mm_loadu_si128((__m128i *)(src_row_2 + i));
122 __m128i b1 = _mm_unpacklo_epi8(b2, zero);
123 b2 = _mm_unpackhi_epi8(b2, zero);
124 b1 = _mm_sub_epi16(b1, a1);
125 b2 = _mm_sub_epi16(b2, a2);
126 a1 = _mm_slli_epi16(a1, 7);
127 a2 = _mm_slli_epi16(a2, 7);
128 b1 = _mm_mullo_epi16(b1, sse_coef);
129 b2 = _mm_mullo_epi16(b2, sse_coef);
130 a1 = _mm_add_epi16(a1, b1);
131 a2 = _mm_add_epi16(a2, b2);
132 a1 = _mm_srli_epi16(a1, 7);
133 a2 = _mm_srli_epi16(a2, 7);
134 a1 = _mm_packus_epi16(a1, a2);
135 _mm_storeu_si128((__m128i *)(dest_row + i), a1);
138 /* Unrolled loop using general-purpose registers */
139 for (; (int)i < (int)dest->row_pixels_size - 3; i += 4)
141 dest_row[i + 0] = LINEAR_INTERPOLATE(src_row_1[i + 0], src_row_2[i + 0], coef);
142 dest_row[i + 1] = LINEAR_INTERPOLATE(src_row_1[i + 1], src_row_2[i + 1], coef);
143 dest_row[i + 2] = LINEAR_INTERPOLATE(src_row_1[i + 2], src_row_2[i + 2], coef);
144 dest_row[i + 3] = LINEAR_INTERPOLATE(src_row_1[i + 3], src_row_2[i + 3], coef);
146 /* Remaining columns */
147 for (; i < dest->row_pixels_size; i++)
148 dest_row[i] = LINEAR_INTERPOLATE(src_row_1[i], src_row_2[i], coef);
149 dest_row += dest->row_size;
152 /* Always copy the last row - faster and also handle "y_pos == dest->rows * 0x10000" overflow */
153 memcpy(dest_row, src->pixels + src->image_size - src->row_size, src->row_pixels_size);
159 image_scale_downsample_xy(struct image *dest, struct image *src)
161 switch (src->pixel_size)
164 image_scale_1_downsample_xy(dest, src);
167 image_scale_2_downsample_xy(dest, src);
170 image_scale_3_downsample_xy(dest, src);
173 image_scale_4_downsample_xy(dest, src);
181 * FIXME: customizable; implement at least bilinear and bicubic filters */
184 image_scale(struct image_context *ctx, struct image *dest, struct image *src)
186 if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT))
188 IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Different pixel formats not supported.");
191 if (dest->cols == src->cols)
193 if (dest->rows == src->rows)
195 /* No scale, copy only */
196 image_scale_nearest_y(dest, src);
199 else if (dest->rows < src->rows)
201 /* Downscale vertically */
202 image_scale_downsample_xy(dest, src);
207 /* Upscale vertically */
208 image_scale_nearest_y(dest, src);
212 else if (dest->rows == src->rows)
214 if (dest->cols < src->cols)
216 /* Downscale horizontally */
217 image_scale_downsample_xy(dest, src);
222 /* Upscale horizontally */
223 image_scale_nearest_x(dest, src);
229 if (dest->cols <= src->cols && dest->rows <= src->rows)
231 /* Downscale in both dimensions */
232 image_scale_downsample_xy(dest, src);
237 image_scale_nearest_xy(dest, src);
244 image_dimensions_fit_to_box(uint *cols, uint *rows, uint max_cols, uint max_rows, uint upsample)
246 ASSERT(image_dimensions_valid(*cols, *rows));
247 ASSERT(image_dimensions_valid(max_cols, max_rows));
248 if (*cols <= max_cols && *rows <= max_rows)
252 if (max_cols * *rows > max_rows * *cols)
254 *cols = *cols * max_rows / *rows;
255 *cols = MIN(*cols, max_cols);
260 *rows = *rows * max_cols / *cols;
261 *rows = MIN(*rows, max_rows);
265 else if (*cols <= max_cols)
267 else if (*rows <= max_rows || max_rows * *cols > max_cols * *rows)
270 *cols = *cols * max_rows / *rows;
271 *cols = MAX(*cols, 1);
275 *rows = *rows * max_cols / *cols;
276 *rows = MAX(*rows, 1);