summaryrefslogtreecommitdiffstats
path: root/src/gui
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2016-07-06 17:11:59 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2016-07-23 07:43:01 +0000
commitb91f86a2128093ad7c65fa30b63ef87a9e55a4e0 (patch)
tree6c470a9871aa0d0d458c5cfab98bc41287bdae5c /src/gui
parent529b1c9e2a7706cefb3763628b29ca500202026d (diff)
Improve accuracy in fast path bilinear sampling
Adds rounding before using the optimized low accuracy interpolation, this reduces the magnitude of error in the scaled result from ~4 bits to just 2 bits. Change-Id: Ie4e618bf5b1f4a74367aa419ebbd534cc6a846b3 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
Diffstat (limited to 'src/gui')
-rw-r--r--src/gui/painting/qdrawhelper.cpp32
1 files changed, 19 insertions, 13 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index b452019251..f7b81944c5 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -1827,9 +1827,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper
/** \internal
interpolate 4 argb pixels with the distx and disty factor.
- distx and disty bust be between 0 and 16
+ distx and disty must be between 0 and 16
*/
-static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty)
+static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
{
uint distxy = distx * disty;
//idistx * disty = (16-distx) * disty = 16*disty - distxy
@@ -2176,7 +2176,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
const uint *s1 = (const uint *)data->texture.scanLine(y1);
const uint *s2 = (const uint *)data->texture.scanLine(y2);
- int disty = (fy & 0x0000ffff) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
if (blendType != BlendTransformedBilinearTiled) {
#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
@@ -2190,7 +2190,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
uint tr = s1[x2]; \
uint bl = s2[x1]; \
uint br = s2[x2]; \
- int distx = (fx & 0x0000ffff) >> 12; \
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; \
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
fx += fdx; \
++b; \
@@ -2209,6 +2209,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
const __m128i v_256 = _mm_set1_epi16(256);
const __m128i v_disty = _mm_set1_epi16(disty);
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
+ const __m128i v_fx_r = _mm_set1_epi32(0x8);
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
while (b < boundedEnd) {
@@ -2222,7 +2223,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
- __m128i v_distx = _mm_srli_epi16(v_fx, 12);
+ __m128i v_distx = _mm_srli_epi16(v_fx, 8);
+ v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
@@ -2252,6 +2254,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
}
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
+ const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
while (b < boundedEnd) {
@@ -2260,7 +2263,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
Vect_buffer v_fx_shifted;
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
- int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
+ int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx.vect, v_ffff_mask), v_fx_r), 12);
for (int i = 0; i < 4; i++) {
int x1 = v_fx_shifted.i[i];
@@ -2290,7 +2293,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
uint tr = s1[x2];
uint bl = s2[x1];
uint br = s2[x2];
- int distx = (fx & 0x0000ffff) >> 12;
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
fx += fdx;
++b;
@@ -2362,6 +2365,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
const __m128i v_256 = _mm_set1_epi16(256);
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
const __m128i v_fdy = _mm_set1_epi32(fdy*4);
+ const __m128i v_fxy_r = _mm_set1_epi32(0x8);
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
__m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
@@ -2396,6 +2400,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
__m128i v_distx = _mm_srli_epi16(v_fx, 12);
__m128i v_disty = _mm_srli_epi16(v_fy, 12);
+ v_distx = _mm_srli_epi16(_mm_add_epi32(v_fx, v_fxy_r), 4);
+ v_disty = _mm_srli_epi16(_mm_add_epi32(v_fy, v_fxy_r), 4);
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
@@ -2434,8 +2440,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
int disty = (fy & 0x0000ffff) >> 8;
*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
#else
- int distx = (fx & 0x0000ffff) >> 12;
- int disty = (fy & 0x0000ffff) >> 12;
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
#endif
@@ -2664,13 +2670,13 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
fracX += fdx;
}
} else { //scale down
- int disty = (fy & 0x0000ffff) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
for (int i = 0; i < len; ++i) {
uint tl = buf1[i * 2 + 0];
uint tr = buf1[i * 2 + 1];
uint bl = buf2[i * 2 + 0];
uint br = buf2[i * 2 + 1];
- int distx = (fracX & 0x0000ffff) >> 12;
+ int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
fracX += fdx;
}
@@ -2736,8 +2742,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
uint bl = buf2[i * 2 + 0];
uint br = buf2[i * 2 + 1];
- int distx = (fracX & 0x0000ffff) >> 12;
- int disty = (fracY & 0x0000ffff) >> 12;
+ int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12;
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
fracX += fdx;