summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2017-08-02 13:29:47 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2017-08-02 21:09:40 +0000
commit8e53e787d961a847aea7d5ed03bbafc5fe2eb683 (patch)
treece40f2367e665323822cd39bb586fc72000fb16a
parenteef70912bf7b0d25c07122f39cc8f819c6e351eb (diff)
Fix RGBA64 interpolation when x remainder equals zero
The 16-bit multiplication can not handle the case where one of the numbers is 65536, so skip it in that case as we do when the y remainder triggers the same issue. Task-number: QTBUG-62165 Change-Id: Iea2ebe557949797d9aa77b8d7cdac9247eea7b84 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
-rw-r--r--src/gui/painting/qdrawhelper.cpp21
1 files changed, 12 insertions, 9 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index aca31cf3e8..5c6c91f0ba 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -1858,9 +1858,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, u
#if defined(__SSE2__)
static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty)
{
- const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
- const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
-
__m128i vt = _mm_loadu_si128((const __m128i*)t);
if (disty) {
__m128i vb = _mm_loadu_si128((const __m128i*)b);
@@ -1868,8 +1865,12 @@ static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint
vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty));
vt = _mm_add_epi16(vt, vb);
}
- vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
- vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+ if (distx) {
+ const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
+ const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
+ vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
+ vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+ }
#ifdef Q_PROCESSOR_X86_64
return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt));
#else
@@ -3089,8 +3090,6 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
for (int i = 0; i < len; ++i) {
int distx = (fracX & 0x0000ffff);
#if defined(__SSE2__)
- const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
- const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
__m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2));
if (disty) {
__m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2));
@@ -3098,8 +3097,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
vb = _mm_mulhi_epu16(vb, vdy);
vt = _mm_add_epi16(vt, vb);
}
- vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
- vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+ if (distx) {
+ const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
+ const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
+ vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
+ vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+ }
_mm_storel_epi64((__m128i*)(b+i), vt);
#else
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);