summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2019-01-31 16:13:16 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2019-02-07 14:27:47 +0000
commit60addee9384cb8c589f7abf9020f7d6d7a6f4d63 (patch)
tree2a55252db920f78421c03e9130a2080cba7793c3
parent928cab5ff1d931d00074d8930c41537109814371 (diff)
Improve ARGB32ToRGBA64 conversions
Improves the precision so 255 values map to 65535 exactly. Change-Id: I366f408e8c6047d52acbed35e9d665249bbaba2b Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/gui/painting/qdrawhelper_avx2.cpp28
-rw-r--r--src/gui/painting/qdrawhelper_sse4.cpp36
2 files changed, 29 insertions, 35 deletions
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp
index f05cc0926e..2b3cc9b226 100644
--- a/src/gui/painting/qdrawhelper_avx2.cpp
+++ b/src/gui/painting/qdrawhelper_avx2.cpp
@@ -1135,14 +1135,12 @@ static void convertARGBToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, qsizety
const __m256i src1 = _mm256_unpacklo_epi8(srcVector, srcVector);
const __m256i src2 = _mm256_unpackhi_epi8(srcVector, srcVector);
if (!cf) {
- dst1 = _mm256_unpacklo_epi8(srcVector, zero);
- dst2 = _mm256_unpackhi_epi8(srcVector, zero);
- const __m256i alpha1 = _mm256_shuffle_epi8(dst1, shuffleMask);
- const __m256i alpha2 = _mm256_shuffle_epi8(dst2, shuffleMask);
- dst1 = _mm256_mullo_epi16(dst1, alpha1);
- dst2 = _mm256_mullo_epi16(dst2, alpha2);
- dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 7));
- dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 7));
+ const __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask);
+ const __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask);
+ dst1 = _mm256_mulhi_epu16(src1, alpha1);
+ dst2 = _mm256_mulhi_epu16(src2, alpha2);
+ dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 15));
+ dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 15));
dst1 = _mm256_blend_epi16(dst1, src1, 0x88);
dst2 = _mm256_blend_epi16(dst2, src2, 0x88);
} else {
@@ -1171,14 +1169,12 @@ static void convertARGBToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, qsizety
const __m256i src1 = _mm256_unpacklo_epi8(srcVector, srcVector);
const __m256i src2 = _mm256_unpackhi_epi8(srcVector, srcVector);
if (!cf) {
- dst1 = _mm256_unpacklo_epi8(srcVector, zero);
- dst2 = _mm256_unpackhi_epi8(srcVector, zero);
- const __m256i alpha1 = _mm256_shuffle_epi8(dst1, shuffleMask);
- const __m256i alpha2 = _mm256_shuffle_epi8(dst2, shuffleMask);
- dst1 = _mm256_mullo_epi16(dst1, alpha1);
- dst2 = _mm256_mullo_epi16(dst2, alpha2);
- dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 7));
- dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 7));
+ const __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask);
+ const __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask);
+ dst1 = _mm256_mulhi_epu16(src1, alpha1);
+ dst2 = _mm256_mulhi_epu16(src2, alpha2);
+ dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 15));
+ dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 15));
dst1 = _mm256_blend_epi16(dst1, src1, 0x88);
dst2 = _mm256_blend_epi16(dst2, src2, 0x88);
} else {
diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp
index 1da3b75ade..d9a687b1b4 100644
--- a/src/gui/painting/qdrawhelper_sse4.cpp
+++ b/src/gui/painting/qdrawhelper_sse4.cpp
@@ -107,28 +107,26 @@ static void convertARGBToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int cou
for (; i < count - 3; i += 4) {
__m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]);
if (!_mm_testz_si128(srcVector, alphaMask)) {
- if (!_mm_testc_si128(srcVector, alphaMask)) {
- if (!RGBA)
- srcVector = _mm_shuffle_epi8(srcVector, rgbaMask);
- __m128i src1 = _mm_unpacklo_epi8(srcVector, zero);
- __m128i src2 = _mm_unpackhi_epi8(srcVector, zero);
+ bool cf = _mm_testc_si128(srcVector, alphaMask);
+
+ if (!RGBA)
+ srcVector = _mm_shuffle_epi8(srcVector, rgbaMask);
+ const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector);
+ const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector);
+ if (!cf) {
__m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask);
__m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask);
- src1 = _mm_mullo_epi16(src1, alpha1);
- src2 = _mm_mullo_epi16(src2, alpha2);
- alpha1 = _mm_unpacklo_epi8(srcVector, srcVector);
- alpha2 = _mm_unpackhi_epi8(srcVector, srcVector);
- src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 7));
- src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 7));
- src1 = _mm_blend_epi16(src1, alpha1, 0x88);
- src2 = _mm_blend_epi16(src2, alpha2, 0x88);
- _mm_storeu_si128((__m128i *)&buffer[i], src1);
- _mm_storeu_si128((__m128i *)&buffer[i + 2], src2);
+ __m128i dst1 = _mm_mulhi_epu16(src1, alpha1);
+ __m128i dst2 = _mm_mulhi_epu16(src2, alpha2);
+ // Map 0->0xfffe to 0->0xffff
+ dst1 = _mm_add_epi16(dst1, _mm_srli_epi16(dst1, 15));
+ dst2 = _mm_add_epi16(dst2, _mm_srli_epi16(dst2, 15));
+ // correct alpha value:
+ dst1 = _mm_blend_epi16(dst1, src1, 0x88);
+ dst2 = _mm_blend_epi16(dst2, src2, 0x88);
+ _mm_storeu_si128((__m128i *)&buffer[i], dst1);
+ _mm_storeu_si128((__m128i *)&buffer[i + 2], dst2);
} else {
- if (!RGBA)
- srcVector = _mm_shuffle_epi8(srcVector, rgbaMask);
- const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector);
- const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector);
_mm_storeu_si128((__m128i *)&buffer[i], src1);
_mm_storeu_si128((__m128i *)&buffer[i + 2], src2);
}