diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gui/painting/qblendfunctions.cpp | 187 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 67 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 26 |
3 files changed, 62 insertions, 218 deletions
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp index dbdd82e432..0898a20998 100644 --- a/src/gui/painting/qblendfunctions.cpp +++ b/src/gui/painting/qblendfunctions.cpp @@ -395,169 +395,6 @@ void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, } } -template<QtPixelOrder PixelOrder> -static void qt_blend_argb32pm_on_a2rgb30pm(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_argb32pm_on_a2rgb30pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - if (const_alpha == 256) { - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = src[x]; - dst[x] = qConvertArgb32ToA2rgb30<PixelOrder>(s) + BYTE_MUL_RGB30(dst[x], 255 - qAlpha(s)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } else if (const_alpha != 0) { - const_alpha = (const_alpha * 255) >> 8; - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = src[x]; - dst[x] = BYTE_MUL_RGB30(qConvertArgb32ToA2rgb30<PixelOrder>(s), const_alpha) + BYTE_MUL_RGB30(dst[x], 255 - qt_div_255(qAlpha(s) * const_alpha)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } -} - -template<QtPixelOrder PixelOrder> -static void qt_blend_rgb32_on_rgb30(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_rgb32_on_rgb30: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - if (const_alpha != 256) { - qt_blend_argb32pm_on_a2rgb30pm<PixelOrder>(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - return; - } - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - dst[x] = qConvertRgb32ToRgb30<PixelOrder>(src[x]); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } -} - -static void qt_blend_a2rgb30pm_on_a2rgb30pm(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_a2rgb30pm_on_a2rgb30pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - if (const_alpha == 256) { - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = src[x]; - dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - qAlphaRgb30(s)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } else if (const_alpha != 0) { - const uint const_alpha255 = (const_alpha * 255) >> 8; - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint a = (qAlphaRgb30(src[x]) * const_alpha) >> 8; - uint s = BYTE_MUL_RGB30(src[x], const_alpha255); - dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - a); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } -} - - -static void qt_blend_rgb30_on_rgb30(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_rgb30_on_rgb30: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - if (const_alpha != 256) { - qt_blend_a2rgb30pm_on_a2rgb30pm(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - return; - } - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - int len = w * 4; - for (int y=0; y<h; ++y) { - memcpy(dst, src, len); - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } -} - -static void qt_blend_a2bgr30pm_on_a2rgb30pm(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_a2bgr30pm_on_a2rgb32pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - if (const_alpha == 256) { - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = qRgbSwapRgb30(src[x]); - dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - qAlphaRgb30(s)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } else if (const_alpha != 0) { - const uint const_alpha255 = (const_alpha * 255) >> 8; - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint a = (qAlphaRgb30(src[x]) * const_alpha) >> 8; - uint s = BYTE_MUL_RGB30(src[x], const_alpha255); - dst[x] = qRgbSwapRgb30(s) + BYTE_MUL_RGB30(dst[x], 255 - a); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } -} - struct Blend_RGB32_on_RGB32_NoAlpha { inline void write(quint32 *dst, quint32 src) { *dst = src; } @@ -772,30 +609,6 @@ void qInitBlendFunctions() qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32; qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32; #endif - qBlendFunctions[QImage::Format_BGR30][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderBGR>; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderBGR>; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_BGR30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_RGB30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderBGR>; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderBGR>; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_BGR30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_RGB30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderRGB>; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderRGB>; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_BGR30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_RGB30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderRGB>; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderRGB>; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_BGR30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_RGB30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; qTransformFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_transform_image_rgb32_on_rgb32; qTransformFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_argb32; diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 07e5a3d19b..0cf7e20605 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1252,25 +1252,54 @@ static inline void qConvertARGB64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *b const __m128i cmask = _mm_set1_epi32(0x000003ff); int i = 0; __m128i vr, vg, vb, va; - for (; i < count-1; i += 2) { - __m128i vs = _mm_loadu_si128((const __m128i*)buffer); - buffer += 2; - vr = _mm_srli_epi64(vs, 6); - vg = _mm_srli_epi64(vs, 16 + 6 - 10); - vb = _mm_srli_epi64(vs, 32 + 6); - vr = _mm_and_si128(vr, cmask); - vg = _mm_and_si128(vg, gmask); - vb = _mm_and_si128(vb, cmask); - va = _mm_srli_epi64(vs, 48 + 14); - if (PixelOrder == PixelOrderRGB) - vr = _mm_slli_epi32(vr, 20); - else - vb = _mm_slli_epi32(vb, 20); - va = _mm_slli_epi32(va, 30); - __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); - vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); - _mm_storel_epi64((__m128i*)dest, vd); - dest += 2; + if (i < count && (const uintptr_t)buffer & 0x8) { + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + ++i; + } + + for (; i < count-15; i += 16) { + // Repremultiplying is really expensive and hard to do in SIMD without AVX2, + // so we try to avoid it by checking if it is needed 16 samples at a time. + __m128i vOr = _mm_set1_epi32(0); + __m128i vAnd = _mm_set1_epi32(0xffffffff); + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)(buffer + j)); + vOr = _mm_or_si128(vOr, vs); + vAnd = _mm_and_si128(vAnd, vs); + } + const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7)); + const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7)); + + if (andAlpha == 0xffff) { + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)buffer); + buffer += 2; + vr = _mm_srli_epi64(vs, 6); + vg = _mm_srli_epi64(vs, 16 + 6 - 10); + vb = _mm_srli_epi64(vs, 32 + 6); + vr = _mm_and_si128(vr, cmask); + vg = _mm_and_si128(vg, gmask); + vb = _mm_and_si128(vb, cmask); + va = _mm_srli_epi64(vs, 48 + 14); + if (PixelOrder == PixelOrderRGB) + vr = _mm_slli_epi32(vr, 20); + else + vb = _mm_slli_epi32(vb, 20); + va = _mm_slli_epi32(va, 30); + __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); + vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); + _mm_storel_epi64((__m128i*)dest, vd); + dest += 2; + } + } else if (orAlpha == 0) { + for (int j = 0; j < 16; ++j) { + *dest++ = 0; + buffer++; + } + } else { + for (int j = 0; j < 16; ++j) + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + } } for (; i < count; ++i) diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 2c222b97c2..0034bfdf91 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -712,18 +712,6 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) { static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; } static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; } -static Q_ALWAYS_INLINE uint BYTE_MUL_RGB30(uint x, uint a) { - uint xa = x >> 30; - uint xr = (x >> 20) & 0x3ff; - uint xg = (x >> 10) & 0x3ff; - uint xb = x & 0x3ff; - xa = qt_div_255(xa * a); - xr = qt_div_255(xr * a); - xg = qt_div_255(xg * a); - xb = qt_div_255(xb * a); - return (xa << 30) | (xr << 20) | (xg << 10) | xb; -} - static Q_ALWAYS_INLINE uint qAlphaRgb30(uint c) { uint a = c >> 30; @@ -897,6 +885,18 @@ inline QRgb qRepremultiply(QRgb p) return qPremultiply(p); } +template<unsigned int Shift> +inline QRgba64 qRepremultiply(QRgba64 p) +{ + const uint alpha = p.alpha(); + if (alpha == 65535 || alpha == 0) + return p; + p = p.unpremultiplied(); + Q_CONSTEXPR uint mult = 65535 / (65535 >> Shift); + p.setAlpha(mult * (alpha >> Shift)); + return p.premultiplied(); +} + template<> inline uint qConvertArgb32ToA2rgb30<PixelOrderBGR>(QRgb c) { @@ -1000,6 +1000,7 @@ template<enum QtPixelOrder> inline unsigned int qConvertRgb64ToRgb30(QRgba64); template<> inline unsigned int qConvertRgb64ToRgb30<PixelOrderBGR>(QRgba64 c) { + c = qRepremultiply<14>(c); const uint a = c.alpha() >> 14; const uint r = c.red() >> 6; const uint g = c.green() >> 6; @@ -1010,6 +1011,7 @@ inline unsigned int qConvertRgb64ToRgb30<PixelOrderBGR>(QRgba64 c) template<> inline unsigned int qConvertRgb64ToRgb30<PixelOrderRGB>(QRgba64 c) { + c = qRepremultiply<14>(c); const uint a = c.alpha() >> 14; const uint r = c.red() >> 6; const uint g = c.green() >> 6; |