diff options
Diffstat (limited to 'src/gui/painting/qdrawhelper_sse4.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper_sse4.cpp | 55 |
1 files changed, 53 insertions, 2 deletions
diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index 257bad9eca..14bfaabf09 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -44,16 +44,67 @@ QT_BEGIN_NAMESPACE +template<bool RGBA> +static inline void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) +{ + int i = 0; + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); + const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15); + const __m128i half = _mm_set1_epi16(0x0080); + const __m128i zero = _mm_setzero_si128(); + + for (; i < count - 3; i += 4) { + __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); + if (!_mm_testz_si128(srcVector, alphaMask)) { + if (!_mm_testc_si128(srcVector, alphaMask)) { + if (RGBA) + srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); + __m128i src1 = _mm_unpacklo_epi8(srcVector, zero); + __m128i src2 = _mm_unpackhi_epi8(srcVector, zero); + __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask); + __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask); + src1 = _mm_mullo_epi16(src1, alpha1); + src2 = _mm_mullo_epi16(src2, alpha2); + src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 8)); + src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 8)); + src1 = _mm_add_epi16(src1, half); + src2 = _mm_add_epi16(src2, half); + src1 = _mm_srli_epi16(src1, 8); + src2 = _mm_srli_epi16(src2, 8); + src1 = _mm_blend_epi16(src1, alpha1, 0x88); + src2 = _mm_blend_epi16(src2, alpha2, 0x88); + srcVector = _mm_packus_epi16(src1, src2); + _mm_storeu_si128((__m128i *)&buffer[i], srcVector); + } else { + if (RGBA) + _mm_storeu_si128((__m128i *)&buffer[i], _mm_shuffle_epi8(srcVector, rgbaMask)); + else if (buffer != src) + _mm_storeu_si128((__m128i *)&buffer[i], srcVector); + } + } else { + _mm_storeu_si128((__m128i *)&buffer[i], _mm_setzero_si128()); + } + } + + SIMD_EPILOGUE(i, count, 3) { + uint v = qPremultiply(src[i]); + buffer[i] = RGBA ? RGBA2ARGB(v) : v; + } +} + const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { - return qt_convertARGB32ToARGB32PM(buffer, src, count); + convertARGBToARGB32PM_sse4<false>(buffer, src, count); + return buffer; } const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { - return qt_convertRGBA8888ToARGB32PM(buffer, src, count); + convertARGBToARGB32PM_sse4<true>(buffer, src, count); + return buffer; } const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, |