From 8e53e787d961a847aea7d5ed03bbafc5fe2eb683 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 2 Aug 2017 13:29:47 +0200 Subject: Fix RGBA64 interpolation when x remainder equals zero The 16-bit multiplication can not handle the case where one of the numbers is 65536, so skip it in that case as we do when the y remainder triggers the same issue. Task-number: QTBUG-62165 Change-Id: Iea2ebe557949797d9aa77b8d7cdac9247eea7b84 Reviewed-by: Eirik Aavitsland --- src/gui/painting/qdrawhelper.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'src/gui/painting/qdrawhelper.cpp') diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index aca31cf3e8..5c6c91f0ba 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1858,9 +1858,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, u #if defined(__SSE2__) static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty) { - const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); - __m128i vt = _mm_loadu_si128((const __m128i*)t); if (disty) { __m128i vb = _mm_loadu_si128((const __m128i*)b); @@ -1868,8 +1865,12 @@ static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty)); vt = _mm_add_epi16(vt, vb); } - vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); - vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + if (distx) { + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); + vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + } #ifdef Q_PROCESSOR_X86_64 return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt)); #else @@ -3089,8 +3090,6 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co for (int i = 0; i < len; ++i) { int distx = (fracX & 0x0000ffff); #if defined(__SSE2__) - const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2)); if (disty) { __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2)); @@ -3098,8 +3097,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co vb = _mm_mulhi_epu16(vb, vdy); vt = _mm_add_epi16(vt, vb); } - vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); - vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + if (distx) { + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); + vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + } _mm_storel_epi64((__m128i*)(b+i), vt); #else b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty); -- cgit v1.2.3