summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-04-22 19:22:26 +0000
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-04-23 09:25:28 +0000
commit63d5a42b59149bfdebc9a5a123e7115a813bcaa4 (patch)
tree5a9118f4468e816c3efc2bdfeffaad17c649b03f
parent6535912add3444544c9403e5ecb2b8910ec6e2f5 (diff)
Revert "Remove separate SSE4 unpremultiply function"
Could causedSSE4 instructions to be used on non SSE4 machines in cases when qUnpremultiplywas not inlined. This reverts commit 964ccc58534aac436529007000d1c38d76c88834. Change-Id: Ic676ade8f75129e8d37c4d96cbfb2bdb5b794919 Task-number: QTBUG-45741 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/gui/image/qimage_conversions.cpp6
-rw-r--r--src/gui/image/qimage_sse4.cpp3
-rw-r--r--src/gui/painting/qdrawhelper.cpp10
-rw-r--r--src/gui/painting/qdrawhelper_sse4.cpp7
-rw-r--r--src/gui/painting/qdrawingprimitive_sse2_p.h21
-rw-r--r--src/gui/painting/qrgb.h37
-rw-r--r--tests/auto/gui/painting/qcolor/tst_qcolor.cpp19
7 files changed, 58 insertions, 45 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp
index 8cb886e09b..28e3a48689 100644
--- a/src/gui/image/qimage_conversions.cpp
+++ b/src/gui/image/qimage_conversions.cpp
@@ -117,7 +117,7 @@ static const uint *QT_FASTCALL convertRGB32ToARGB32PM(uint *buffer, const uint *
return buffer;
}
-#if defined(QT_COMPILER_SUPPORTS_SSE4_1) && !defined(__SSE4_1__)
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
extern const uint *QT_FASTCALL convertRGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
#endif
@@ -144,7 +144,7 @@ void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversio
if (src->format == QImage::Format_RGB32)
convertToARGB32PM = convertRGB32ToARGB32PM;
if (dest->format == QImage::Format_RGB32) {
-#if defined(QT_COMPILER_SUPPORTS_SSE4_1) && !defined(__SSE4_1__)
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
if (qCpuHasFeature(SSE4_1))
convertFromARGB32PM = convertRGB32FromARGB32PM_sse4;
else
@@ -193,7 +193,7 @@ bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::Im
if (data->format == QImage::Format_RGB32)
convertToARGB32PM = convertRGB32ToARGB32PM;
if (dst_format == QImage::Format_RGB32) {
-#if defined(QT_COMPILER_SUPPORTS_SSE4_1) && !defined(__SSE4_1__)
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
if (qCpuHasFeature(SSE4_1))
convertFromARGB32PM = convertRGB32FromARGB32PM_sse4;
else
diff --git a/src/gui/image/qimage_sse4.cpp b/src/gui/image/qimage_sse4.cpp
index fb63f5bff9..5fad4f572a 100644
--- a/src/gui/image/qimage_sse4.cpp
+++ b/src/gui/image/qimage_sse4.cpp
@@ -33,6 +33,7 @@
#include <qimage.h>
#include <private/qdrawhelper_p.h>
+#include <private/qdrawingprimitive_sse2_p.h>
#include <private/qimage_p.h>
#include <private/qsimd_p.h>
@@ -44,7 +45,7 @@ const uint *QT_FASTCALL convertRGB32FromARGB32PM_sse4(uint *buffer, const uint *
const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
- buffer[i] = 0xff000000 | qUnpremultiply(src[i]);
+ buffer[i] = 0xff000000 | qUnpremultiply_sse4(src[i]);
return buffer;
}
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 2817a4a145..37f2de98b6 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -6704,22 +6704,24 @@ void qInitDrawhelperAsm()
}
#endif // SSSE3
-#if defined(QT_COMPILER_SUPPORTS_SSE4_1) && !defined(__SSE4_1__)
+#if QT_COMPILER_SUPPORTS_SSE4_1
if (qCpuHasFeature(SSE4_1)) {
+#if !defined(__SSE4_1__)
extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
+ qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
+ qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
+#endif
extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
- qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
- qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4;
}
#endif
-#if defined(QT_COMPILER_SUPPORTS_AVX2) && !defined(__AVX2__)
+#if QT_COMPILER_SUPPORTS_AVX2 && !defined(__AVX2__)
if (qCpuHasFeature(AVX2)) {
extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *);
diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp
index 8e0b2cbb18..43a3958997 100644
--- a/src/gui/painting/qdrawhelper_sse4.cpp
+++ b/src/gui/painting/qdrawhelper_sse4.cpp
@@ -32,6 +32,7 @@
****************************************************************************/
#include <private/qdrawhelper_p.h>
+#include <private/qdrawingprimitive_sse2_p.h>
#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
@@ -53,7 +54,7 @@ const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint
const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
- buffer[i] = qUnpremultiply(src[i]);
+ buffer[i] = qUnpremultiply_sse4(src[i]);
return buffer;
}
@@ -61,7 +62,7 @@ const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uin
const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
- buffer[i] = ARGB2RGBA(qUnpremultiply(src[i]));
+ buffer[i] = ARGB2RGBA(qUnpremultiply_sse4(src[i]));
return buffer;
}
@@ -69,7 +70,7 @@ const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *s
const QPixelLayout *, const QRgb *)
{
for (int i = 0; i < count; ++i)
- buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply(src[i]));
+ buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply_sse4(src[i]));
return buffer;
}
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index fbee06e157..4d0790a502 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -236,4 +236,25 @@ QT_END_NAMESPACE
#endif // __SSE2__
+QT_BEGIN_NAMESPACE
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
+QT_FUNCTION_TARGET(SSE4_1)
+inline QRgb qUnpremultiply_sse4(QRgb p)
+{
+ const uint alpha = qAlpha(p);
+ const uint invAlpha = qt_inv_premul_factor[alpha];
+ const __m128i via = _mm_set1_epi32(invAlpha);
+ const __m128i vr = _mm_set1_epi32(0x8000);
+ __m128i vl = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(p));
+ vl = _mm_mullo_epi32(vl, via);
+ vl = _mm_add_epi32(vl, vr);
+ vl = _mm_srai_epi32(vl, 16);
+ vl = _mm_insert_epi32(vl, alpha, 3);
+ vl = _mm_packus_epi32(vl, _mm_setzero_si128());
+ vl = _mm_packus_epi16(vl, _mm_setzero_si128());
+ return _mm_cvtsi128_si32(vl);
+}
+#endif
+QT_END_NAMESPACE
+
#endif // QDRAWINGPRIMITIVE_SSE2_P_H
diff --git a/src/gui/painting/qrgb.h b/src/gui/painting/qrgb.h
index 05b3a76bce..f7f2185bef 100644
--- a/src/gui/painting/qrgb.h
+++ b/src/gui/painting/qrgb.h
@@ -36,11 +36,6 @@
#include <QtCore/qglobal.h>
#include <QtCore/qprocessordetection.h>
-#if defined(__SSE4_1__)
-#include <smmintrin.h>
-#elif defined(__SSE2__)
-#include <emmintrin.h>
-#endif
QT_BEGIN_NAMESPACE
@@ -92,45 +87,19 @@ inline Q_DECL_RELAXED_CONSTEXPR QRgb qPremultiply(QRgb x)
Q_GUI_EXPORT extern const uint qt_inv_premul_factor[];
-#if defined(__SSE2__)
-inline QRgb qUnpremultiply(QRgb p)
-{
- const uint alpha = qAlpha(p);
- if (alpha == 255 || alpha == 0)
- return p;
- const uint invAlpha = qt_inv_premul_factor[alpha];
- const __m128i via = _mm_set1_epi32(invAlpha);
- const __m128i vr = _mm_set1_epi32(0x8000);
-#ifdef __SSE4_1__
- __m128i vl = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(p));
- vl = _mm_mullo_epi32(vl, via);
-#else
- __m128i vl = _mm_unpacklo_epi8(_mm_cvtsi32_si128(p), _mm_setzero_si128());
- vl = _mm_unpacklo_epi16(vl, vl);
- __m128i vll = _mm_mullo_epi16(vl, via);
- __m128i vlh = _mm_mulhi_epu16(vl, via);
- vl = _mm_add_epi32(vll, _mm_slli_epi32(vlh, 16));
-#endif
- vl = _mm_add_epi32(vl, vr);
- vl = _mm_srli_epi32(vl, 16);
- vl = _mm_packs_epi32(vl, _mm_setzero_si128());
- vl = _mm_insert_epi16(vl, alpha, 3);
- vl = _mm_packus_epi16(vl, _mm_setzero_si128());
- return _mm_cvtsi128_si32(vl);
-}
-#else
inline QRgb qUnpremultiply(QRgb p)
{
const uint alpha = qAlpha(p);
// Alpha 255 and 0 are the two most common values, which makes them beneficial to short-cut.
- if (alpha == 255 || alpha == 0)
+ if (alpha == 255)
return p;
+ if (alpha == 0)
+ return 0;
// (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256.
const uint invAlpha = qt_inv_premul_factor[alpha];
// We add 0x8000 to get even rounding. The rounding also ensures that qPremultiply(qUnpremultiply(p)) == p for all p.
return qRgba((qRed(p)*invAlpha + 0x8000)>>16, (qGreen(p)*invAlpha + 0x8000)>>16, (qBlue(p)*invAlpha + 0x8000)>>16, alpha);
}
-#endif
QT_END_NAMESPACE
diff --git a/tests/auto/gui/painting/qcolor/tst_qcolor.cpp b/tests/auto/gui/painting/qcolor/tst_qcolor.cpp
index c34b1639cc..4fafbf9827 100644
--- a/tests/auto/gui/painting/qcolor/tst_qcolor.cpp
+++ b/tests/auto/gui/painting/qcolor/tst_qcolor.cpp
@@ -38,6 +38,7 @@
#include <qcolor.h>
#include <qdebug.h>
+#include <private/qdrawingprimitive_sse2_p.h>
class tst_QColor : public QObject
{
@@ -103,6 +104,7 @@ private slots:
void achromaticHslHue();
void premultiply();
+ void unpremultiply_sse4();
#ifdef Q_DEAD_CODE_FROM_QT4_X11
void setallowX11ColorNames();
@@ -1445,5 +1447,22 @@ void tst_QColor::premultiply()
}
}
+void tst_QColor::unpremultiply_sse4()
+{
+ // Tests that qUnpremultiply_sse4 returns the same as qUnpremultiply.
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
+ if (qCpuHasFeature(SSE4_1)) {
+ for (uint a = 0; a < 256; a++) {
+ for (uint c = 0; c <= a; c++) {
+ QRgb p = qRgba(c, a-c, c, a);
+ QCOMPARE(qUnpremultiply(p), qUnpremultiply_sse4(p));
+ }
+ }
+ return;
+ }
+#endif
+ QSKIP("SSE4 not supported on this CPU.");
+}
+
QTEST_MAIN(tst_QColor)
#include "tst_qcolor.moc"