summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper.cpp
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-01-28 11:07:14 +0100
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-02-04 19:37:22 +0000
commit89edf43c44294888781c308d9b1f1d9bab63645b (patch)
tree7653d09ab651f237bdef38d24917183352847c78 /src/gui/painting/qdrawhelper.cpp
parent5432f2c7a1ff02e1ee0e07e442ceb6c12ca66098 (diff)
Generate SSE4.1 versions of premultiplying methods where convenient
The autovectorized versions of premultiplying conversions are almost twice as fast with SSE4.1 as with SSE2. Therefore this patch lets compilers that can make those versions convenient without duplicating code do that and lets us use them when available. Change-Id: I699035963abe55a38b9ef8ba7b4a8c961c8dfcdd Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r--src/gui/painting/qdrawhelper.cpp36
1 files changed, 32 insertions, 4 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index de4be7177b..ac73d78afe 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -503,14 +503,25 @@ static const uint *QT_FASTCALL convertPassThrough(uint *, const uint *src, int,
return src;
}
-static const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count,
- const QPixelLayout *, const QRgb *)
+static inline const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
buffer[i] = qPremultiply(src[i]);
return buffer;
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
+ const QPixelLayout *layout, const QRgb *clut)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ return convertARGB32ToARGB32PM(buffer, src, count, layout, clut);
+}
+#endif
+
+
static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -519,14 +530,24 @@ static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const u
return buffer;
}
-static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count,
- const QPixelLayout *, const QRgb *)
+static inline const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
buffer[i] = qPremultiply(RGBA2ARGB(src[i]));
return buffer;
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
+ const QPixelLayout *layout, const QRgb *clut)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ return convertRGBA8888ToARGB32PM(buffer, src, count, layout, clut);
+}
+#endif
+
static const uint *QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -6858,6 +6879,13 @@ void qInitDrawhelperAsm()
}
#endif // SSSE3
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+ if (qCpuHasFeature(SSE4_1)) {
+ qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
+ qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
+ }
+#endif
+
functionForModeAsm = qt_functionForMode_SSE2;
functionForModeSolidAsm = qt_functionForModeSolid_SSE2;
#endif // SSE2