From 9be87085d848c9615319d1be7d35a1f9963c3380 Mon Sep 17 00:00:00 2001
From: Allan Sandfeld Jensen <allan.jensen@theqtcompany.com>
Date: Tue, 27 Jan 2015 14:08:17 +0100
Subject: Only use 32bit version of qPremultiply

With auto-vectorization enabled in QtGui, the 32bit version of
qPremultiply is faster than the 64bit version since it can be vectorized
wider (4x on 128bit as opposed to 2x). Since all our important 64bit
targets have SIMD, that makes the 64bit version pointless.

Change-Id: I4e9070a3a3c8e2b54f17a95ba0aee0405cbb8ec9
Reviewed-by: Marc Mutz <marc.mutz@kdab.com>
---
 src/gui/painting/qrgb.h | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

(limited to 'src/gui/painting/qrgb.h')
diff --git a/src/gui/painting/qrgb.h b/src/gui/painting/qrgb.h
index e6bda490ff..7186081cda 100644
--- a/src/gui/painting/qrgb.h
+++ b/src/gui/painting/qrgb.h
@@ -72,21 +72,7 @@ inline Q_DECL_CONSTEXPR int qGray(QRgb rgb)                // convert RGB to gra
 inline Q_DECL_CONSTEXPR bool qIsGray(QRgb rgb)
 { return qRed(rgb) == qGreen(rgb) && qRed(rgb) == qBlue(rgb); }
 
-template <int ProcessorWordSize>
-inline QRgb qPremultiply_impl(QRgb x);
-
-template <> // 64-bit version
-inline QRgb qPremultiply_impl<8>(QRgb x)
-{
-    const uint a = qAlpha(x);
-    quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
-    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
-    t &= 0x000000ff00ff00ff;
-    return (uint(t)) | (uint(t >> 24)) | (a << 24);
-}
-
-template <> // 32-bit version
-inline QRgb qPremultiply_impl<4>(QRgb x)
+inline Q_DECL_RELAXED_CONSTEXPR QRgb qPremultiply(QRgb x)
 {
     const uint a = qAlpha(x);
     uint t = (x & 0xff00ff) * a;
@@ -96,12 +82,9 @@ inline QRgb qPremultiply_impl<4>(QRgb x)
     x = ((x >> 8) & 0xff) * a;
     x = (x + ((x >> 8) & 0xff) + 0x80);
     x &= 0xff00;
-    x |= t | (a << 24);
-    return x;
+    return x | t | (a << 24);
 }
 
-inline QRgb qPremultiply(QRgb x) { return qPremultiply_impl<Q_PROCESSOR_WORDSIZE>(x); }
-
 Q_GUI_EXPORT extern const uint qt_inv_premul_factor[];
 
 inline QRgb qUnpremultiply(QRgb p)
-- 
cgit v1.2.3