summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-01-29 12:25:06 +0100
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-02-09 11:36:49 +0000
commit38aafe1a17c5d3be81edf798dd4dbe745727f4fd (patch)
tree7e742aae377ddd5aef9d53bdcbf500721d2f43f2 /src
parent868201155fd677dbc6d14346f5ea61e82ebce27b (diff)
Optimize unpremultiply on SSE4.1
Adds an SSE4.1 optimized version of qUnpremultiply and uses it in the most drawing conversions methods. This gives a speed-up of little over 2x. Change-Id: Ieb858a94ada1eb86d7af715ac1a100f1587f360d Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src')
-rw-r--r--src/gui/painting/qdrawhelper.cpp42
-rw-r--r--src/gui/painting/qdrawingprimitive_sse2_p.h22
2 files changed, 63 insertions, 1 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index ac73d78afe..83372b198d 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -45,6 +45,7 @@
#include <private/qpaintengine_raster_p.h>
#include <private/qpainter_p.h>
#include <private/qdrawhelper_x86_p.h>
+#include <private/qdrawingprimitive_sse2_p.h>
#include <private/qdrawhelper_neon_p.h>
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
#include <private/qdrawhelper_mips_dsp_p.h>
@@ -572,6 +573,18 @@ static const uint *QT_FASTCALL convertARGB32FromARGB32PM(uint *buffer, const uin
return buffer;
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
+QT_FUNCTION_TARGET(SSE4_1)
+static const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = qUnpremultiply_sse4(src[i]);
+ return buffer;
+}
+#endif
+
+
static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -588,6 +601,17 @@ static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM(uint *buffer, const u
return buffer;
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
+QT_FUNCTION_TARGET(SSE4_1)
+static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = ARGB2RGBA(qUnpremultiply_sse4(src[i]));
+ return buffer;
+}
+#endif
+
static const uint *QT_FASTCALL convertRGBXFromRGB32(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -604,6 +628,17 @@ static const uint *QT_FASTCALL convertRGBXFromARGB32PM(uint *buffer, const uint
return buffer;
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
+QT_FUNCTION_TARGET(SSE4_1)
+static const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply_sse4(src[i]));
+ return buffer;
+}
+#endif
+
template<QtPixelOrder PixelOrder>
static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
@@ -6879,10 +6914,15 @@ void qInitDrawhelperAsm()
}
#endif // SSSE3
-#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
if (qCpuHasFeature(SSE4_1)) {
+#if !defined(__SSE4_1__)
qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
+#endif
+ qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4;
+ qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4;
+ qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4;
}
#endif
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 2b4cdc286e..deac31b797 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -236,4 +236,26 @@ QT_END_NAMESPACE
#endif // __SSE2__
+QT_BEGIN_NAMESPACE
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
+QT_FUNCTION_TARGET(SSE4_1)
+inline QRgb qUnpremultiply_sse4(QRgb p)
+{
+ const uint alpha = qAlpha(p);
+ const uint invAlpha = qt_inv_premul_factor[alpha];
+ const __m128i via = _mm_set1_epi32(invAlpha);
+ const __m128i vr = _mm_set1_epi32(0x8000);
+ __m128i vl = _mm_unpacklo_epi8(_mm_cvtsi32_si128(p), _mm_setzero_si128());
+ vl = _mm_unpacklo_epi16(vl, _mm_setzero_si128());
+ vl = _mm_mullo_epi32(vl, via);
+ vl = _mm_add_epi32(vl, vr);
+ vl = _mm_srai_epi32(vl, 16);
+ vl = _mm_insert_epi32(vl, alpha, 3);
+ vl = _mm_packus_epi32(vl, _mm_setzero_si128());
+ vl = _mm_packus_epi16(vl, _mm_setzero_si128());
+ return _mm_cvtsi128_si32(vl);
+}
+#endif
+QT_END_NAMESPACE
+
#endif // QDRAWINGPRIMITIVE_SSE2_P_H