From 60addee9384cb8c589f7abf9020f7d6d7a6f4d63 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Thu, 31 Jan 2019 16:13:16 +0100 Subject: Improve ARGB32ToRGBA64 conversions Improves the precision so 255 values map to 65535 exactly. Change-Id: I366f408e8c6047d52acbed35e9d665249bbaba2b Reviewed-by: Thiago Macieira --- src/gui/painting/qdrawhelper_avx2.cpp | 28 ++++++++++++--------------- src/gui/painting/qdrawhelper_sse4.cpp | 36 +++++++++++++++++------------------ 2 files changed, 29 insertions(+), 35 deletions(-) (limited to 'src/gui') diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp index f05cc0926e..2b3cc9b226 100644 --- a/src/gui/painting/qdrawhelper_avx2.cpp +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -1135,14 +1135,12 @@ static void convertARGBToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, qsizety const __m256i src1 = _mm256_unpacklo_epi8(srcVector, srcVector); const __m256i src2 = _mm256_unpackhi_epi8(srcVector, srcVector); if (!cf) { - dst1 = _mm256_unpacklo_epi8(srcVector, zero); - dst2 = _mm256_unpackhi_epi8(srcVector, zero); - const __m256i alpha1 = _mm256_shuffle_epi8(dst1, shuffleMask); - const __m256i alpha2 = _mm256_shuffle_epi8(dst2, shuffleMask); - dst1 = _mm256_mullo_epi16(dst1, alpha1); - dst2 = _mm256_mullo_epi16(dst2, alpha2); - dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 7)); - dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 7)); + const __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask); + const __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask); + dst1 = _mm256_mulhi_epu16(src1, alpha1); + dst2 = _mm256_mulhi_epu16(src2, alpha2); + dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 15)); + dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 15)); dst1 = _mm256_blend_epi16(dst1, src1, 0x88); dst2 = _mm256_blend_epi16(dst2, src2, 0x88); } else { @@ -1171,14 +1169,12 @@ static void convertARGBToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, qsizety const __m256i src1 = _mm256_unpacklo_epi8(srcVector, srcVector); const __m256i src2 = _mm256_unpackhi_epi8(srcVector, srcVector); if (!cf) { - dst1 = _mm256_unpacklo_epi8(srcVector, zero); - dst2 = _mm256_unpackhi_epi8(srcVector, zero); - const __m256i alpha1 = _mm256_shuffle_epi8(dst1, shuffleMask); - const __m256i alpha2 = _mm256_shuffle_epi8(dst2, shuffleMask); - dst1 = _mm256_mullo_epi16(dst1, alpha1); - dst2 = _mm256_mullo_epi16(dst2, alpha2); - dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 7)); - dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 7)); + const __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask); + const __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask); + dst1 = _mm256_mulhi_epu16(src1, alpha1); + dst2 = _mm256_mulhi_epu16(src2, alpha2); + dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 15)); + dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 15)); dst1 = _mm256_blend_epi16(dst1, src1, 0x88); dst2 = _mm256_blend_epi16(dst2, src2, 0x88); } else { diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index 1da3b75ade..d9a687b1b4 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -107,28 +107,26 @@ static void convertARGBToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int cou for (; i < count - 3; i += 4) { __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); if (!_mm_testz_si128(srcVector, alphaMask)) { - if (!_mm_testc_si128(srcVector, alphaMask)) { - if (!RGBA) - srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); - __m128i src1 = _mm_unpacklo_epi8(srcVector, zero); - __m128i src2 = _mm_unpackhi_epi8(srcVector, zero); + bool cf = _mm_testc_si128(srcVector, alphaMask); + + if (!RGBA) + srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); + const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector); + const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector); + if (!cf) { __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask); __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask); - src1 = _mm_mullo_epi16(src1, alpha1); - src2 = _mm_mullo_epi16(src2, alpha2); - alpha1 = _mm_unpacklo_epi8(srcVector, srcVector); - alpha2 = _mm_unpackhi_epi8(srcVector, srcVector); - src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 7)); - src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 7)); - src1 = _mm_blend_epi16(src1, alpha1, 0x88); - src2 = _mm_blend_epi16(src2, alpha2, 0x88); - _mm_storeu_si128((__m128i *)&buffer[i], src1); - _mm_storeu_si128((__m128i *)&buffer[i + 2], src2); + __m128i dst1 = _mm_mulhi_epu16(src1, alpha1); + __m128i dst2 = _mm_mulhi_epu16(src2, alpha2); + // Map 0->0xfffe to 0->0xffff + dst1 = _mm_add_epi16(dst1, _mm_srli_epi16(dst1, 15)); + dst2 = _mm_add_epi16(dst2, _mm_srli_epi16(dst2, 15)); + // correct alpha value: + dst1 = _mm_blend_epi16(dst1, src1, 0x88); + dst2 = _mm_blend_epi16(dst2, src2, 0x88); + _mm_storeu_si128((__m128i *)&buffer[i], dst1); + _mm_storeu_si128((__m128i *)&buffer[i + 2], dst2); } else { - if (!RGBA) - srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); - const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector); - const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector); _mm_storeu_si128((__m128i *)&buffer[i], src1); _mm_storeu_si128((__m128i *)&buffer[i + 2], src2); } -- cgit v1.2.3