summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gui/image/qimage_conversions.cpp32
-rw-r--r--src/gui/painting/qdrawhelper.cpp36
2 files changed, 61 insertions, 7 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp
index 2e8fc1963d..696f95b565 100644
--- a/src/gui/image/qimage_conversions.cpp
+++ b/src/gui/image/qimage_conversions.cpp
@@ -195,7 +195,7 @@ static bool convert_passthrough_inplace(QImageData *data, Qt::ImageConversionFla
return true;
}
-static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
+static inline void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888);
Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied);
@@ -219,6 +219,15 @@ static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt:
}
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ convert_ARGB_to_ARGB_PM(dest, src, flags);
+}
+#endif
+
extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
#ifndef __SSE2__
@@ -232,7 +241,7 @@ static bool convert_ARGB_to_ARGB_PM_inplace(QImageData *data, Qt::ImageConversio
for (int i = 0; i < data->height; ++i) {
const QRgb *end = rgb_data + data->width;
while (rgb_data < end) {
- *rgb_data = PREMUL(*rgb_data);
+ *rgb_data = qPremultiply(*rgb_data);
++rgb_data;
}
rgb_data += pad;
@@ -312,7 +321,7 @@ static bool convert_ARGB_to_RGBA_inplace(QImageData *data, Qt::ImageConversionFl
return true;
}
-static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
+static inline void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_ARGB32);
Q_ASSERT(dest->format == QImage::Format_RGBA8888_Premultiplied);
@@ -336,6 +345,15 @@ static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt:
}
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static void convert_ARGB_to_RGBA_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ convert_ARGB_to_RGBA_PM(dest, src, flags);
+}
+#endif
+
static void convert_RGBA_to_ARGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_RGBX8888 || src->format == QImage::Format_RGBA8888 || src->format == QImage::Format_RGBA8888_Premultiplied);
@@ -2945,6 +2963,14 @@ void qInitImageConversions()
}
#endif
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+ if (qCpuHasFeature(SSE4_1)) {
+ qimage_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4;
+ qimage_converter_map[QImage::Format_RGBA8888][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4;
+ qimage_converter_map[QImage::Format_ARGB32][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_RGBA_PM_sse4;
+ }
+#endif
+
#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon;
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index de4be7177b..ac73d78afe 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -503,14 +503,25 @@ static const uint *QT_FASTCALL convertPassThrough(uint *, const uint *src, int,
return src;
}
-static const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count,
- const QPixelLayout *, const QRgb *)
+static inline const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
buffer[i] = qPremultiply(src[i]);
return buffer;
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
+ const QPixelLayout *layout, const QRgb *clut)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ return convertARGB32ToARGB32PM(buffer, src, count, layout, clut);
+}
+#endif
+
+
static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -519,14 +530,24 @@ static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const u
return buffer;
}
-static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count,
- const QPixelLayout *, const QRgb *)
+static inline const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
buffer[i] = qPremultiply(RGBA2ARGB(src[i]));
return buffer;
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
+ const QPixelLayout *layout, const QRgb *clut)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ return convertRGBA8888ToARGB32PM(buffer, src, count, layout, clut);
+}
+#endif
+
static const uint *QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -6858,6 +6879,13 @@ void qInitDrawhelperAsm()
}
#endif // SSSE3
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+ if (qCpuHasFeature(SSE4_1)) {
+ qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
+ qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
+ }
+#endif
+
functionForModeAsm = qt_functionForMode_SSE2;
functionForModeSolidAsm = qt_functionForModeSolid_SSE2;
#endif // SSE2