From 01c59ac857927043e49d4725062faea715ed49a3 Mon Sep 17 00:00:00 2001
From: Thiago Macieira <thiago.macieira@intel.com>
Date: Sun, 8 Dec 2013 19:23:23 -0800
Subject: Make qt_memfill{16,32} unconditional

These two functions used to cause a runtime detection of the CPU
features in order to improve their performance. Since the last two
commits, there's no runtime detection of either SSE2 or Neon support,
so there's no point in attempting runtime detection.

Task-number: QTBUG-30440
Change-Id: I54fe92787c983003c2cc867ee636daec30063033
Reviewed-by: Marc Mutz <marc.mutz@kdab.com>
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Reviewed-by: Olivier Goffart <ogoffart@woboq.com>
---
 src/gui/painting/qdrawhelper_p.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/gui/painting/qdrawhelper_p.h')
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 418294c56d..6af4aa4943 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -171,6 +171,8 @@ extern MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3];
 extern DrawHelper qDrawHelper[QImage::NImageFormats];
 
 void qBlendTexture(int count, const QSpan *spans, void *userData);
+extern void qt_memfill32(quint32 *dest, quint32 value, int count);
+extern void qt_memfill16(quint16 *dest, quint16 value, int count);
 
 typedef void (QT_FASTCALL *CompositionFunction)(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, uint const_alpha);
 typedef void (QT_FASTCALL *CompositionFunctionSolid)(uint *dest, int length, uint color, uint const_alpha);
@@ -386,8 +388,6 @@ static inline qreal qRadialDeterminant(qreal a, qreal b, qreal c)
     return (b * b) - (4 * a * c);
 }
 
-extern void (*qt_memfill32)(quint32 *dest, quint32 value, int count);
-
 template <class RadialFetchFunc> Q_STATIC_TEMPLATE_FUNCTION
 const uint * QT_FASTCALL qt_fetch_radial_gradient_template(uint *buffer, const Operator *op, const QSpanData *data,
                                                            int y, int x, int length)
@@ -726,7 +726,6 @@ template<> inline void qt_memfill(quint32 *dest, quint32 color, int count)
 
 template<> inline void qt_memfill(quint16 *dest, quint16 color, int count)
 {
-    extern void (*qt_memfill16)(quint16 *dest, quint16 value, int count);
     qt_memfill16(dest, color, count);
 }
 
-- 
cgit v1.2.3


From 407eee51e6ce261b51ecacdcb9a4e2ff6364699a Mon Sep 17 00:00:00 2001
From: Allan Sandfeld Jensen <allan.jensen@digia.com>
Date: Wed, 15 Jan 2014 15:02:09 +0100
Subject: Optimize INV_PREMUL

Our inverse premultiply is rather unoptimized. It's major weakness is
using three divide operation which are the slowest integer operations.

This patch reduces the three divisions to just one but using an
approximation that is accurate for all uchar values.

The patch also adds a general short-cut for alpha==255.

Together these improvements makes it 2 to 16 times faster depending on
how many 0 and 255 alpha values are encountered.

Change-Id: I96d7098a1bc320793b0d0526637acd1fdb5a43eb
Reviewed-by: Gunnar Sletta <gunnar.sletta@jollamobile.com>
---
 src/gui/painting/qdrawhelper_p.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'src/gui/painting/qdrawhelper_p.h')

diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 6af4aa4943..4b11a8e92b 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -691,12 +691,16 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) {
     return t;
 }
 
-#define INV_PREMUL(p)                                   \
-    (qAlpha(p) == 0 ? 0 :                               \
-    ((qAlpha(p) << 24)                                  \
-     | (((255*qRed(p))/ qAlpha(p)) << 16)               \
-     | (((255*qGreen(p)) / qAlpha(p))  << 8)            \
-     | ((255*qBlue(p)) / qAlpha(p))))
+static Q_ALWAYS_INLINE uint INV_PREMUL(uint p) {
+    const uint alpha = qAlpha(p);
+    if (alpha == 255)
+        return p;
+    if (alpha == 0)
+        return 0;
+    // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256.
+    const uint invAlpha = 0x00ff00ffU / alpha;
+    return qRgba((qRed(p)*invAlpha)>>16, (qGreen(p)*invAlpha)>>16, (qBlue(p)*invAlpha)>>16, alpha);
+}
 
 struct quint24 {
     quint24(uint value);
-- 
cgit v1.2.3


From 6f7d370adec3054656f36b0d2a0777a8a1df3602 Mon Sep 17 00:00:00 2001
From: Allan Sandfeld Jensen <allan.jensen@digia.com>
Date: Wed, 15 Jan 2014 14:08:14 +0100
Subject: Make conversion of semi-transparency to opaque formats consistent

Currently the non-standard opaque formats all perform inverse premultiply
on semi transparent pixels when saving. This is only performed on RGB16
and RGB32 when explicitly converting to the formats, but not when drawing.

This patch changes the conversion to be consistent across the board so
less common formats are treated the same as the most common ones. This
also makes drawing and converting to the these formats much faster.

Task-number: QTBUG-36143
Change-Id: I877ddb1c1dbb4d2dd9b4b9192525e50375a1ae60
Reviewed-by: Gunnar Sletta <gunnar.sletta@jollamobile.com>
---
 src/gui/painting/qdrawhelper_p.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gui/painting/qdrawhelper_p.h')

diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 4b11a8e92b..3c945338a6 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -1034,6 +1034,7 @@ struct QPixelLayout
     BPP bpp;
     ConvertFunc convertToARGB32PM;
     ConvertFunc convertFromARGB32PM;
+    ConvertFunc convertFromRGB32;
 };
 
 typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count);
-- 
cgit v1.2.3


From 0226795cf33363a872c777034e0d8934ffaa3819 Mon Sep 17 00:00:00 2001
From: Allan Sandfeld Jensen <allan.jensen@digia.com>
Date: Thu, 23 Jan 2014 16:09:20 +0100
Subject: Round evenly in INV_PREMUL

Currently INV_PREMUL rounds strictly down. While PREMUL rounds evenly.

This patch adds 0x8000 to the intermediate results in INV_PREMUL before
right shifting, thereby achieving even rounding.

The rounding also makes PREMUL(INV_PREMUL()) into an identify operation,
which means we can safely convert ARGB32PM to ARGB32 and back without
ever losing color details. A test is added to verify this.

Change-Id: I1267e109caddcff0c01d726cb5c1c1e9fa5f7996
Reviewed-by: Gunnar Sletta <gunnar.sletta@jollamobile.com>
---
 src/gui/painting/qdrawhelper_p.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gui/painting/qdrawhelper_p.h')

diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 3c945338a6..0f98b07229 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -699,7 +699,8 @@ static Q_ALWAYS_INLINE uint INV_PREMUL(uint p) {
         return 0;
     // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256.
     const uint invAlpha = 0x00ff00ffU / alpha;
-    return qRgba((qRed(p)*invAlpha)>>16, (qGreen(p)*invAlpha)>>16, (qBlue(p)*invAlpha)>>16, alpha);
+    // We add 0x8000 to get even rounding. The rounding also ensures that PREMUL(INV_PREMUL(p)) == p for all p.
+    return qRgba((qRed(p)*invAlpha + 0x8000)>>16, (qGreen(p)*invAlpha + 0x8000)>>16, (qBlue(p)*invAlpha + 0x8000)>>16, alpha);
 }
 
 struct quint24 {
-- 
cgit v1.2.3


From a7b8ef08415b8056661c3db5950842ee546891b9 Mon Sep 17 00:00:00 2001
From: Allan Sandfeld Jensen <allan.jensen@digia.com>
Date: Wed, 29 Jan 2014 11:41:31 +0100
Subject: Export optimized premultiply and unpremultiply methods

This patch optimizes the unpremultiply method further by using a
lookup table to avoid any divisions at all.

The opportunity is taken to export both premultiply and unpremultiply
since they are commonly used methods relevant to the exported QRgb type
that can be both premultiplied and unpremultipled ARGB.

[ChangeLog][QtGui][QColor] Exported highly optimized methods for
premultiply and unpremultiply of QRgb values.

Change-Id: I658bcf57b0bc73c34c1765b64617d43b63ae820b
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Gunnar Sletta <gunnar.sletta@jollamobile.com>
---
 src/gui/painting/qdrawhelper_p.h | 35 ++++-------------------------------
 1 file changed, 4 insertions(+), 31 deletions(-)

(limited to 'src/gui/painting/qdrawhelper_p.h')

diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 0f98b07229..bbeb73f0af 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -599,14 +599,6 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
     return (uint(t)) | (uint(t >> 24));
 }
 
-static Q_ALWAYS_INLINE uint PREMUL(uint x) {
-    uint a = x >> 24;
-    quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
-    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
-    t &= 0x000000ff00ff00ff;
-    return (uint(t)) | (uint(t >> 24)) | (a << 24);
-}
-
 #else // 32-bit versions
 
 static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
@@ -639,20 +631,9 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
 #  pragma pop
 #endif
 
-static Q_ALWAYS_INLINE uint PREMUL(uint x) {
-    uint a = x >> 24;
-    uint t = (x & 0xff00ff) * a;
-    t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
-    t &= 0xff00ff;
-
-    x = ((x >> 8) & 0xff) * a;
-    x = (x + ((x >> 8) & 0xff) + 0x80);
-    x &= 0xff00;
-    x |= t | (a << 24);
-    return x;
-}
 #endif
 
+
 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
 static Q_ALWAYS_INLINE quint32 RGBA2ARGB(quint32 x) {
     quint32 rgb = x >> 8;
@@ -691,17 +672,9 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) {
     return t;
 }
 
-static Q_ALWAYS_INLINE uint INV_PREMUL(uint p) {
-    const uint alpha = qAlpha(p);
-    if (alpha == 255)
-        return p;
-    if (alpha == 0)
-        return 0;
-    // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256.
-    const uint invAlpha = 0x00ff00ffU / alpha;
-    // We add 0x8000 to get even rounding. The rounding also ensures that PREMUL(INV_PREMUL(p)) == p for all p.
-    return qRgba((qRed(p)*invAlpha + 0x8000)>>16, (qGreen(p)*invAlpha + 0x8000)>>16, (qBlue(p)*invAlpha + 0x8000)>>16, alpha);
-}
+// FIXME: Remove when all Qt modules have stopped using PREMUL and INV_PREMUL
+#define PREMUL(x) qPremultiply(x)
+#define INV_PREMUL(p) qUnpremultiply(p)
 
 struct quint24 {
     quint24(uint value);
-- 
cgit v1.2.3