summaryrefslogtreecommitdiffstats
path: root/src/gui
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui')
-rw-r--r--src/gui/painting/qdrawhelper.cpp23
-rw-r--r--src/gui/painting/qdrawhelper_avx2.cpp13
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp61
-rw-r--r--src/gui/painting/qdrawhelper_sse4.cpp55
4 files changed, 125 insertions, 27 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 9b5f15470e..4ea3b37d5f 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -6196,20 +6196,18 @@ static void qInitDrawhelperFunctions()
#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
if (qCpuHasFeature(SSE4_1)) {
-#if !defined(__SSE4_1__)
extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
- qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
- qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
-#endif
extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
+ qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
+ qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4;
@@ -6220,14 +6218,6 @@ static void qInitDrawhelperFunctions()
#if defined(QT_COMPILER_SUPPORTS_AVX2)
if (qCpuHasFeature(AVX2)) {
-#if !defined(__AVX2__)
- extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count,
- const QVector<QRgb> *, QDitherInfo *);
- extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count,
- const QVector<QRgb> *, QDitherInfo *);
- qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
- qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
-#endif
extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h, int const_alpha);
@@ -6277,6 +6267,15 @@ static void qInitDrawhelperFunctions()
sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
+#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count,
+ const QVector<QRgb> *, QDitherInfo *);
+ extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count,
+ const QVector<QRgb> *, QDitherInfo *);
+ qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
+ qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
+#endif
+
#if defined(ENABLE_PIXMAN_DRAWHELPERS)
// The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp
index 9c1335298e..5e17e8abec 100644
--- a/src/gui/painting/qdrawhelper_avx2.cpp
+++ b/src/gui/painting/qdrawhelper_avx2.cpp
@@ -44,19 +44,6 @@
QT_BEGIN_NAMESPACE
-// Autovectorized premultiply functions:
-const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count,
- const QVector<QRgb> *, QDitherInfo *)
-{
- return qt_convertARGB32ToARGB32PM(buffer, src, count);
-}
-
-const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count,
- const QVector<QRgb> *, QDitherInfo *)
-{
- return qt_convertRGBA8888ToARGB32PM(buffer, src, count);
-}
-
// Vectorized blend functions:
// See BYTE_MUL_SSE2 for details.
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index cdb374f823..643c570f65 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -1069,6 +1069,67 @@ const uint * QT_FASTCALL qt_fetchUntransformed_888_neon(uint *buffer, const Oper
return buffer;
}
+#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+template<bool RGBA>
+static inline void convertARGBToARGB32PM_neon(uint *buffer, const uint *src, int count)
+{
+ int i = 0;
+ const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
+ const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7};
+ const uint32x4_t blendMask = vdupq_n_u32(0xff000000);
+
+ for (; i < count - 3; i += 4) {
+ uint32x4_t srcVector = vld1q_u32(src + i);
+ uint32x4_t alphaVector = vshrq_n_u32(srcVector, 24);
+ uint32_t alphaSum = vaddvq_u32(alphaVector);
+ if (alphaSum) {
+ if (alphaSum != 255 * 4) {
+ if (RGBA)
+ srcVector = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask));
+ const uint8x8_t s1 = vreinterpret_u8_u32(vget_low_u32(srcVector));
+ const uint8x8_t s2 = vreinterpret_u8_u32(vget_high_u32(srcVector));
+ const uint8x8_t alpha1 = vtbl1_u8(s1, shuffleMask);
+ const uint8x8_t alpha2 = vtbl1_u8(s2, shuffleMask);
+ uint16x8_t src1 = vmull_u8(s1, alpha1);
+ uint16x8_t src2 = vmull_u8(s2, alpha2);
+ src1 = vsraq_n_u16(src1, src1, 8);
+ src2 = vsraq_n_u16(src2, src2, 8);
+ const uint8x8_t d1 = vrshrn_n_u16(src1, 8);
+ const uint8x8_t d2 = vrshrn_n_u16(src2, 8);
+ const uint32x4_t d = vbslq_u32(blendMask, srcVector, vreinterpretq_u32_u8(vcombine_u8(d1, d2)));
+ vst1q_u32(buffer + i, d);
+ } else {
+ if (RGBA)
+ vst1q_u32(buffer + i, vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask)));
+ else if (buffer != src)
+ vst1q_u32(buffer + i, srcVector);
+ }
+ } else {
+ vst1q_u32(buffer + i, vdupq_n_u32(0));
+ }
+ }
+
+ SIMD_EPILOGUE(i, count, 3) {
+ uint v = qPremultiply(src[i]);
+ buffer[i] = RGBA ? RGBA2ARGB(v) : v;
+ }
+}
+
+const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ convertARGBToARGB32PM_neon<false>(buffer, src, count);
+ return buffer;
+}
+
+const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ convertARGBToARGB32PM_neon<true>(buffer, src, count);
+ return buffer;
+}
+#endif
+
QT_END_NAMESPACE
#endif // __ARM_NEON__
diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp
index 257bad9eca..14bfaabf09 100644
--- a/src/gui/painting/qdrawhelper_sse4.cpp
+++ b/src/gui/painting/qdrawhelper_sse4.cpp
@@ -44,16 +44,67 @@
QT_BEGIN_NAMESPACE
+template<bool RGBA>
+static inline void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count)
+{
+ int i = 0;
+ const __m128i alphaMask = _mm_set1_epi32(0xff000000);
+ const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
+ const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15);
+ const __m128i half = _mm_set1_epi16(0x0080);
+ const __m128i zero = _mm_setzero_si128();
+
+ for (; i < count - 3; i += 4) {
+ __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]);
+ if (!_mm_testz_si128(srcVector, alphaMask)) {
+ if (!_mm_testc_si128(srcVector, alphaMask)) {
+ if (RGBA)
+ srcVector = _mm_shuffle_epi8(srcVector, rgbaMask);
+ __m128i src1 = _mm_unpacklo_epi8(srcVector, zero);
+ __m128i src2 = _mm_unpackhi_epi8(srcVector, zero);
+ __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask);
+ __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask);
+ src1 = _mm_mullo_epi16(src1, alpha1);
+ src2 = _mm_mullo_epi16(src2, alpha2);
+ src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 8));
+ src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 8));
+ src1 = _mm_add_epi16(src1, half);
+ src2 = _mm_add_epi16(src2, half);
+ src1 = _mm_srli_epi16(src1, 8);
+ src2 = _mm_srli_epi16(src2, 8);
+ src1 = _mm_blend_epi16(src1, alpha1, 0x88);
+ src2 = _mm_blend_epi16(src2, alpha2, 0x88);
+ srcVector = _mm_packus_epi16(src1, src2);
+ _mm_storeu_si128((__m128i *)&buffer[i], srcVector);
+ } else {
+ if (RGBA)
+ _mm_storeu_si128((__m128i *)&buffer[i], _mm_shuffle_epi8(srcVector, rgbaMask));
+ else if (buffer != src)
+ _mm_storeu_si128((__m128i *)&buffer[i], srcVector);
+ }
+ } else {
+ _mm_storeu_si128((__m128i *)&buffer[i], _mm_setzero_si128());
+ }
+ }
+
+ SIMD_EPILOGUE(i, count, 3) {
+ uint v = qPremultiply(src[i]);
+ buffer[i] = RGBA ? RGBA2ARGB(v) : v;
+ }
+}
+
const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
- return qt_convertARGB32ToARGB32PM(buffer, src, count);
+ convertARGBToARGB32PM_sse4<false>(buffer, src, count);
+ return buffer;
}
const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
- return qt_convertRGBA8888ToARGB32PM(buffer, src, count);
+ convertARGBToARGB32PM_sse4<true>(buffer, src, count);
+ return buffer;
}
const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count,