From 01c59ac857927043e49d4725062faea715ed49a3 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sun, 8 Dec 2013 19:23:23 -0800 Subject: Make qt_memfill{16,32} unconditional These two functions used to cause a runtime detection of the CPU features in order to improve their performance. Since the last two commits, there's no runtime detection of either SSE2 or Neon support, so there's no point in attempting runtime detection. Task-number: QTBUG-30440 Change-Id: I54fe92787c983003c2cc867ee636daec30063033 Reviewed-by: Marc Mutz Reviewed-by: Lars Knoll Reviewed-by: Olivier Goffart --- src/gui/painting/qdrawhelper_p.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gui/painting/qdrawhelper_p.h') diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 418294c56d..6af4aa4943 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -171,6 +171,8 @@ extern MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3]; extern DrawHelper qDrawHelper[QImage::NImageFormats]; void qBlendTexture(int count, const QSpan *spans, void *userData); +extern void qt_memfill32(quint32 *dest, quint32 value, int count); +extern void qt_memfill16(quint16 *dest, quint16 value, int count); typedef void (QT_FASTCALL *CompositionFunction)(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, uint const_alpha); typedef void (QT_FASTCALL *CompositionFunctionSolid)(uint *dest, int length, uint color, uint const_alpha); @@ -386,8 +388,6 @@ static inline qreal qRadialDeterminant(qreal a, qreal b, qreal c) return (b * b) - (4 * a * c); } -extern void (*qt_memfill32)(quint32 *dest, quint32 value, int count); - template Q_STATIC_TEMPLATE_FUNCTION const uint * QT_FASTCALL qt_fetch_radial_gradient_template(uint *buffer, const Operator *op, const QSpanData *data, int y, int x, int length) @@ -726,7 +726,6 @@ template<> inline void qt_memfill(quint32 *dest, quint32 color, int count) template<> inline void qt_memfill(quint16 *dest, quint16 color, int count) { - extern void (*qt_memfill16)(quint16 *dest, quint16 value, int count); qt_memfill16(dest, color, count); } -- cgit v1.2.3 From 407eee51e6ce261b51ecacdcb9a4e2ff6364699a Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 15 Jan 2014 15:02:09 +0100 Subject: Optimize INV_PREMUL Our inverse premultiply is rather unoptimized. It's major weakness is using three divide operation which are the slowest integer operations. This patch reduces the three divisions to just one but using an approximation that is accurate for all uchar values. The patch also adds a general short-cut for alpha==255. Together these improvements makes it 2 to 16 times faster depending on how many 0 and 255 alpha values are encountered. Change-Id: I96d7098a1bc320793b0d0526637acd1fdb5a43eb Reviewed-by: Gunnar Sletta --- src/gui/painting/qdrawhelper_p.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'src/gui/painting/qdrawhelper_p.h') diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 6af4aa4943..4b11a8e92b 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -691,12 +691,16 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) { return t; } -#define INV_PREMUL(p) \ - (qAlpha(p) == 0 ? 0 : \ - ((qAlpha(p) << 24) \ - | (((255*qRed(p))/ qAlpha(p)) << 16) \ - | (((255*qGreen(p)) / qAlpha(p)) << 8) \ - | ((255*qBlue(p)) / qAlpha(p)))) +static Q_ALWAYS_INLINE uint INV_PREMUL(uint p) { + const uint alpha = qAlpha(p); + if (alpha == 255) + return p; + if (alpha == 0) + return 0; + // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256. + const uint invAlpha = 0x00ff00ffU / alpha; + return qRgba((qRed(p)*invAlpha)>>16, (qGreen(p)*invAlpha)>>16, (qBlue(p)*invAlpha)>>16, alpha); +} struct quint24 { quint24(uint value); -- cgit v1.2.3 From 6f7d370adec3054656f36b0d2a0777a8a1df3602 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 15 Jan 2014 14:08:14 +0100 Subject: Make conversion of semi-transparency to opaque formats consistent Currently the non-standard opaque formats all perform inverse premultiply on semi transparent pixels when saving. This is only performed on RGB16 and RGB32 when explicitly converting to the formats, but not when drawing. This patch changes the conversion to be consistent across the board so less common formats are treated the same as the most common ones. This also makes drawing and converting to the these formats much faster. Task-number: QTBUG-36143 Change-Id: I877ddb1c1dbb4d2dd9b4b9192525e50375a1ae60 Reviewed-by: Gunnar Sletta --- src/gui/painting/qdrawhelper_p.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gui/painting/qdrawhelper_p.h') diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 4b11a8e92b..3c945338a6 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -1034,6 +1034,7 @@ struct QPixelLayout BPP bpp; ConvertFunc convertToARGB32PM; ConvertFunc convertFromARGB32PM; + ConvertFunc convertFromRGB32; }; typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count); -- cgit v1.2.3 From 0226795cf33363a872c777034e0d8934ffaa3819 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Thu, 23 Jan 2014 16:09:20 +0100 Subject: Round evenly in INV_PREMUL Currently INV_PREMUL rounds strictly down. While PREMUL rounds evenly. This patch adds 0x8000 to the intermediate results in INV_PREMUL before right shifting, thereby achieving even rounding. The rounding also makes PREMUL(INV_PREMUL()) into an identify operation, which means we can safely convert ARGB32PM to ARGB32 and back without ever losing color details. A test is added to verify this. Change-Id: I1267e109caddcff0c01d726cb5c1c1e9fa5f7996 Reviewed-by: Gunnar Sletta --- src/gui/painting/qdrawhelper_p.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gui/painting/qdrawhelper_p.h') diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 3c945338a6..0f98b07229 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -699,7 +699,8 @@ static Q_ALWAYS_INLINE uint INV_PREMUL(uint p) { return 0; // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256. const uint invAlpha = 0x00ff00ffU / alpha; - return qRgba((qRed(p)*invAlpha)>>16, (qGreen(p)*invAlpha)>>16, (qBlue(p)*invAlpha)>>16, alpha); + // We add 0x8000 to get even rounding. The rounding also ensures that PREMUL(INV_PREMUL(p)) == p for all p. + return qRgba((qRed(p)*invAlpha + 0x8000)>>16, (qGreen(p)*invAlpha + 0x8000)>>16, (qBlue(p)*invAlpha + 0x8000)>>16, alpha); } struct quint24 { -- cgit v1.2.3 From a7b8ef08415b8056661c3db5950842ee546891b9 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 29 Jan 2014 11:41:31 +0100 Subject: Export optimized premultiply and unpremultiply methods This patch optimizes the unpremultiply method further by using a lookup table to avoid any divisions at all. The opportunity is taken to export both premultiply and unpremultiply since they are commonly used methods relevant to the exported QRgb type that can be both premultiplied and unpremultipled ARGB. [ChangeLog][QtGui][QColor] Exported highly optimized methods for premultiply and unpremultiply of QRgb values. Change-Id: I658bcf57b0bc73c34c1765b64617d43b63ae820b Reviewed-by: Thiago Macieira Reviewed-by: Gunnar Sletta --- src/gui/painting/qdrawhelper_p.h | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) (limited to 'src/gui/painting/qdrawhelper_p.h') diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 0f98b07229..bbeb73f0af 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -599,14 +599,6 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) { return (uint(t)) | (uint(t >> 24)); } -static Q_ALWAYS_INLINE uint PREMUL(uint x) { - uint a = x >> 24; - quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a; - t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8; - t &= 0x000000ff00ff00ff; - return (uint(t)) | (uint(t >> 24)) | (a << 24); -} - #else // 32-bit versions static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) { @@ -639,20 +631,9 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) { # pragma pop #endif -static Q_ALWAYS_INLINE uint PREMUL(uint x) { - uint a = x >> 24; - uint t = (x & 0xff00ff) * a; - t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8; - t &= 0xff00ff; - - x = ((x >> 8) & 0xff) * a; - x = (x + ((x >> 8) & 0xff) + 0x80); - x &= 0xff00; - x |= t | (a << 24); - return x; -} #endif + #if Q_BYTE_ORDER == Q_BIG_ENDIAN static Q_ALWAYS_INLINE quint32 RGBA2ARGB(quint32 x) { quint32 rgb = x >> 8; @@ -691,17 +672,9 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) { return t; } -static Q_ALWAYS_INLINE uint INV_PREMUL(uint p) { - const uint alpha = qAlpha(p); - if (alpha == 255) - return p; - if (alpha == 0) - return 0; - // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256. - const uint invAlpha = 0x00ff00ffU / alpha; - // We add 0x8000 to get even rounding. The rounding also ensures that PREMUL(INV_PREMUL(p)) == p for all p. - return qRgba((qRed(p)*invAlpha + 0x8000)>>16, (qGreen(p)*invAlpha + 0x8000)>>16, (qBlue(p)*invAlpha + 0x8000)>>16, alpha); -} +// FIXME: Remove when all Qt modules have stopped using PREMUL and INV_PREMUL +#define PREMUL(x) qPremultiply(x) +#define INV_PREMUL(p) qUnpremultiply(p) struct quint24 { quint24(uint value); -- cgit v1.2.3