From 9be87085d848c9615319d1be7d35a1f9963c3380 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Tue, 27 Jan 2015 14:08:17 +0100 Subject: Only use 32bit version of qPremultiply With auto-vectorization enabled in QtGui, the 32bit version of qPremultiply is faster than the 64bit version since it can be vectorized wider (4x on 128bit as opposed to 2x). Since all our important 64bit targets have SIMD, that makes the 64bit version pointless. Change-Id: I4e9070a3a3c8e2b54f17a95ba0aee0405cbb8ec9 Reviewed-by: Marc Mutz --- src/gui/painting/qrgb.h | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'src/gui/painting/qrgb.h') diff --git a/src/gui/painting/qrgb.h b/src/gui/painting/qrgb.h index e6bda490ff..7186081cda 100644 --- a/src/gui/painting/qrgb.h +++ b/src/gui/painting/qrgb.h @@ -72,21 +72,7 @@ inline Q_DECL_CONSTEXPR int qGray(QRgb rgb) // convert RGB to gra inline Q_DECL_CONSTEXPR bool qIsGray(QRgb rgb) { return qRed(rgb) == qGreen(rgb) && qRed(rgb) == qBlue(rgb); } -template -inline QRgb qPremultiply_impl(QRgb x); - -template <> // 64-bit version -inline QRgb qPremultiply_impl<8>(QRgb x) -{ - const uint a = qAlpha(x); - quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a; - t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8; - t &= 0x000000ff00ff00ff; - return (uint(t)) | (uint(t >> 24)) | (a << 24); -} - -template <> // 32-bit version -inline QRgb qPremultiply_impl<4>(QRgb x) +inline Q_DECL_RELAXED_CONSTEXPR QRgb qPremultiply(QRgb x) { const uint a = qAlpha(x); uint t = (x & 0xff00ff) * a; @@ -96,12 +82,9 @@ inline QRgb qPremultiply_impl<4>(QRgb x) x = ((x >> 8) & 0xff) * a; x = (x + ((x >> 8) & 0xff) + 0x80); x &= 0xff00; - x |= t | (a << 24); - return x; + return x | t | (a << 24); } -inline QRgb qPremultiply(QRgb x) { return qPremultiply_impl(x); } - Q_GUI_EXPORT extern const uint qt_inv_premul_factor[]; inline QRgb qUnpremultiply(QRgb p) -- cgit v1.2.3