diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-08-02 13:29:47 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-08-02 21:09:40 +0000 |
commit | 8e53e787d961a847aea7d5ed03bbafc5fe2eb683 (patch) | |
tree | ce40f2367e665323822cd39bb586fc72000fb16a | |
parent | eef70912bf7b0d25c07122f39cc8f819c6e351eb (diff) |
Fix RGBA64 interpolation when x remainder equals zero
The 16-bit multiplication can not handle the case where one of the
numbers is 65536, so skip it in that case as we do when the y remainder
triggers the same issue.
Task-number: QTBUG-62165
Change-Id: Iea2ebe557949797d9aa77b8d7cdac9247eea7b84
Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index aca31cf3e8..5c6c91f0ba 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1858,9 +1858,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, u #if defined(__SSE2__) static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty) { - const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); - __m128i vt = _mm_loadu_si128((const __m128i*)t); if (disty) { __m128i vb = _mm_loadu_si128((const __m128i*)b); @@ -1868,8 +1865,12 @@ static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty)); vt = _mm_add_epi16(vt, vb); } - vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); - vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + if (distx) { + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); + vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + } #ifdef Q_PROCESSOR_X86_64 return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt)); #else @@ -3089,8 +3090,6 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co for (int i = 0; i < len; ++i) { int distx = (fracX & 0x0000ffff); #if defined(__SSE2__) - const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2)); if (disty) { __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2)); @@ -3098,8 +3097,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co vb = _mm_mulhi_epu16(vb, vdy); vt = _mm_add_epi16(vt, vb); } - vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); - vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + if (distx) { + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); + vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + } _mm_storel_epi64((__m128i*)(b+i), vt); #else b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty); |