summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2016-07-06 17:11:59 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2016-07-23 07:43:01 +0000
commitb91f86a2128093ad7c65fa30b63ef87a9e55a4e0 (patch)
tree6c470a9871aa0d0d458c5cfab98bc41287bdae5c
parent529b1c9e2a7706cefb3763628b29ca500202026d (diff)
Improve accuracy in fast path bilinear sampling
Adds rounding before using the optimized low accuracy interpolation, this reduces the magnitude of error in the scaled result from ~4 bits to just 2 bits. Change-Id: Ie4e618bf5b1f4a74367aa419ebbd534cc6a846b3 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
-rw-r--r--src/gui/painting/qdrawhelper.cpp32
-rw-r--r--tests/auto/gui/image/qimage/tst_qimage.cpp73
2 files changed, 64 insertions, 41 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index b452019251..f7b81944c5 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -1827,9 +1827,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper
/** \internal
interpolate 4 argb pixels with the distx and disty factor.
- distx and disty bust be between 0 and 16
+ distx and disty must be between 0 and 16
*/
-static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty)
+static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
{
uint distxy = distx * disty;
//idistx * disty = (16-distx) * disty = 16*disty - distxy
@@ -2176,7 +2176,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
const uint *s1 = (const uint *)data->texture.scanLine(y1);
const uint *s2 = (const uint *)data->texture.scanLine(y2);
- int disty = (fy & 0x0000ffff) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
if (blendType != BlendTransformedBilinearTiled) {
#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
@@ -2190,7 +2190,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
uint tr = s1[x2]; \
uint bl = s2[x1]; \
uint br = s2[x2]; \
- int distx = (fx & 0x0000ffff) >> 12; \
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; \
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
fx += fdx; \
++b; \
@@ -2209,6 +2209,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
const __m128i v_256 = _mm_set1_epi16(256);
const __m128i v_disty = _mm_set1_epi16(disty);
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
+ const __m128i v_fx_r = _mm_set1_epi32(0x8);
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
while (b < boundedEnd) {
@@ -2222,7 +2223,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
- __m128i v_distx = _mm_srli_epi16(v_fx, 12);
+ __m128i v_distx = _mm_srli_epi16(v_fx, 8);
+ v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
@@ -2252,6 +2254,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
}
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
+ const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
while (b < boundedEnd) {
@@ -2260,7 +2263,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
Vect_buffer v_fx_shifted;
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
- int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
+ int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx.vect, v_ffff_mask), v_fx_r), 12);
for (int i = 0; i < 4; i++) {
int x1 = v_fx_shifted.i[i];
@@ -2290,7 +2293,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
uint tr = s1[x2];
uint bl = s2[x1];
uint br = s2[x2];
- int distx = (fx & 0x0000ffff) >> 12;
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
fx += fdx;
++b;
@@ -2362,6 +2365,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
const __m128i v_256 = _mm_set1_epi16(256);
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
const __m128i v_fdy = _mm_set1_epi32(fdy*4);
+ const __m128i v_fxy_r = _mm_set1_epi32(0x8);
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
__m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
@@ -2396,6 +2400,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
__m128i v_distx = _mm_srli_epi16(v_fx, 12);
__m128i v_disty = _mm_srli_epi16(v_fy, 12);
+ v_distx = _mm_srli_epi16(_mm_add_epi32(v_fx, v_fxy_r), 4);
+ v_disty = _mm_srli_epi16(_mm_add_epi32(v_fy, v_fxy_r), 4);
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
@@ -2434,8 +2440,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
int disty = (fy & 0x0000ffff) >> 8;
*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
#else
- int distx = (fx & 0x0000ffff) >> 12;
- int disty = (fy & 0x0000ffff) >> 12;
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
#endif
@@ -2664,13 +2670,13 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
fracX += fdx;
}
} else { //scale down
- int disty = (fy & 0x0000ffff) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
for (int i = 0; i < len; ++i) {
uint tl = buf1[i * 2 + 0];
uint tr = buf1[i * 2 + 1];
uint bl = buf2[i * 2 + 0];
uint br = buf2[i * 2 + 1];
- int distx = (fracX & 0x0000ffff) >> 12;
+ int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
fracX += fdx;
}
@@ -2736,8 +2742,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
uint bl = buf2[i * 2 + 0];
uint br = buf2[i * 2 + 1];
- int distx = (fracX & 0x0000ffff) >> 12;
- int disty = (fracY & 0x0000ffff) >> 12;
+ int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12;
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
fracX += fdx;
diff --git a/tests/auto/gui/image/qimage/tst_qimage.cpp b/tests/auto/gui/image/qimage/tst_qimage.cpp
index 1bc4ec2ae7..7e1a02d716 100644
--- a/tests/auto/gui/image/qimage/tst_qimage.cpp
+++ b/tests/auto/gui/image/qimage/tst_qimage.cpp
@@ -106,6 +106,7 @@ private slots:
void smoothScale();
void smoothScale2_data();
void smoothScale2();
+ void smoothScale3_data();
void smoothScale3();
void smoothScale4();
@@ -1715,9 +1716,12 @@ static inline int rand8()
return int(256. * (qrand() / (RAND_MAX + 1.0)));
}
-// compares img.scale against the bilinear filtering used by QPainter
-void tst_QImage::smoothScale3()
+void tst_QImage::smoothScale3_data()
{
+ QTest::addColumn<QImage>("img");
+ QTest::addColumn<qreal>("scale_x");
+ QTest::addColumn<qreal>("scale_y");
+
QImage img(128, 128, QImage::Format_RGB32);
for (int y = 0; y < img.height(); ++y) {
for (int x = 0; x < img.width(); ++x) {
@@ -1730,36 +1734,49 @@ void tst_QImage::smoothScale3()
}
}
- qreal scales[2] = { .5, 2 };
+ QTest::newRow("(0.5, 0.5)") << img << qreal(0.5) << qreal(0.5);
+ QTest::newRow("(0.5, 1.0)") << img << qreal(0.5) << qreal(1.0);
+ QTest::newRow("(1.0, 0.5)") << img << qreal(1.0) << qreal(0.5);
+ QTest::newRow("(0.5, 2.0)") << img << qreal(0.5) << qreal(2.0);
+ QTest::newRow("(1.0, 2.0)") << img << qreal(1.0) << qreal(2.0);
+ QTest::newRow("(2.0, 0.5)") << img << qreal(2.0) << qreal(0.5);
+ QTest::newRow("(2.0, 1.0)") << img << qreal(2.0) << qreal(1.0);
+ QTest::newRow("(2.0, 2.0)") << img << qreal(2) << qreal(2);
+}
+// compares img.scale against the bilinear filtering used by QPainter
+void tst_QImage::smoothScale3()
+{
+ QFETCH(QImage, img);
+ QFETCH(qreal, scale_x);
+ QFETCH(qreal, scale_y);
- for (int i = 0; i < 2; ++i) {
- QImage a = img.scaled(img.size() * scales[i], Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
- QImage b(a.size(), a.format());
- b.fill(0x0);
+ QImage a = img.scaled(img.width() * scale_x, img.height() * scale_y, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
+ QImage b(a.size(), a.format());
+ b.fill(0x0);
- QPainter p(&b);
- p.setRenderHint(QPainter::SmoothPixmapTransform);
- p.scale(scales[i], scales[i]);
- p.drawImage(0, 0, img);
- p.end();
- int err = 0;
-
- for (int y = 0; y < a.height(); ++y) {
- for (int x = 0; x < a.width(); ++x) {
- QRgb ca = a.pixel(x, y);
- QRgb cb = b.pixel(x, y);
-
- // tolerate a little bit of rounding errors
- bool r = true;
- r &= qAbs(qRed(ca) - qRed(cb)) <= 18;
- r &= qAbs(qGreen(ca) - qGreen(cb)) <= 18;
- r &= qAbs(qBlue(ca) - qBlue(cb)) <= 18;
- if (!r)
- err++;
- }
+ QPainter p(&b);
+ p.setRenderHint(QPainter::SmoothPixmapTransform);
+ p.scale(scale_x, scale_y);
+ p.drawImage(0, 0, img);
+ p.end();
+ int err = 0;
+
+ for (int y = 0; y < a.height(); ++y) {
+ for (int x = 0; x < a.width(); ++x) {
+ QRgb ca = a.pixel(x, y);
+ QRgb cb = b.pixel(x, y);
+
+ // tolerate a little bit of rounding errors
+ int tolerance = 3;
+ bool r = true;
+ r &= qAbs(qRed(ca) - qRed(cb)) <= tolerance;
+ r &= qAbs(qGreen(ca) - qGreen(cb)) <= tolerance;
+ r &= qAbs(qBlue(ca) - qBlue(cb)) <= tolerance;
+ if (!r)
+ err++;
}
- QCOMPARE(err, 0);
}
+ QCOMPARE(err, 0);
}
// Tests smooth upscale is smooth