From 3ce88e13b9cc56e00bc3f075e9649c6b291ae01c Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Mon, 9 Mar 2015 10:16:18 +0100 Subject: Add AVX2 autovectorized versions of premultiply Following up on using GCC's autovectorizing for faster SSE4.1 premultiply, this patch adds specialized autovectorized versions of premultiply for AVX2, giving another almost doubling in speed. To make the speed up for AVX2 and also SSE4_1 available to non-GCC compilers, the target-specific methods have been moved to separate files. Change-Id: I97ce05be67f4adeeb9a096eef80fd5fb662099f3 Reviewed-by: Gunnar Sletta --- src/gui/painting/qdrawhelper_p.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/gui/painting/qdrawhelper_p.h') diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 08bc0776f7..51e51fd53f 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -893,6 +893,22 @@ inline int qBlue565(quint16 rgb) { return (b << 3) | (b >> 2); } + +static Q_ALWAYS_INLINE const uint *qt_convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count) +{ + for (int i = 0; i < count; ++i) + buffer[i] = qPremultiply(src[i]); + return buffer; +} + +static Q_ALWAYS_INLINE const uint *qt_convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count) +{ + for (int i = 0; i < count; ++i) + buffer[i] = qPremultiply(RGBA2ARGB(src[i])); + return buffer; +} + + const uint qt_bayer_matrix[16][16] = { { 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc, 0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff}, -- cgit v1.2.3