diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-06-30 13:26:25 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-08-11 09:33:06 +0000 |
commit | 55512950281260b85ea40c08239e459c76b696d2 (patch) | |
tree | 98faefc98c2d7e81b65517d36ccfecfed9a79240 | |
parent | 49ea02f435876d514898d587700a21e1a8972f09 (diff) |
Fix repremultiply from RGB64 to RGB30
Just like from RGB32 to RGB30 we must also repremultiply when converting
from RGB64 because the alpha channel loses more precision than the other
color channels.
Since this is not approximated accurately in the simple blending
functions and the functions are no longer needed now the main render
engine supports higher accuracy, the simple blending routines for RGB30
have been removed.
Change-Id: I2b7b8eb015e330a487848fc4370ad3a1e966be91
Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
-rw-r--r-- | src/gui/painting/qblendfunctions.cpp | 187 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 67 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 26 | ||||
-rw-r--r-- | tests/auto/gui/painting/qpainter/tst_qpainter.cpp | 4 |
4 files changed, 66 insertions, 218 deletions
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp index dbdd82e432..0898a20998 100644 --- a/src/gui/painting/qblendfunctions.cpp +++ b/src/gui/painting/qblendfunctions.cpp @@ -395,169 +395,6 @@ void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, } } -template<QtPixelOrder PixelOrder> -static void qt_blend_argb32pm_on_a2rgb30pm(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_argb32pm_on_a2rgb30pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - if (const_alpha == 256) { - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = src[x]; - dst[x] = qConvertArgb32ToA2rgb30<PixelOrder>(s) + BYTE_MUL_RGB30(dst[x], 255 - qAlpha(s)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } else if (const_alpha != 0) { - const_alpha = (const_alpha * 255) >> 8; - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = src[x]; - dst[x] = BYTE_MUL_RGB30(qConvertArgb32ToA2rgb30<PixelOrder>(s), const_alpha) + BYTE_MUL_RGB30(dst[x], 255 - qt_div_255(qAlpha(s) * const_alpha)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } -} - -template<QtPixelOrder PixelOrder> -static void qt_blend_rgb32_on_rgb30(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_rgb32_on_rgb30: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - if (const_alpha != 256) { - qt_blend_argb32pm_on_a2rgb30pm<PixelOrder>(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - return; - } - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - dst[x] = qConvertRgb32ToRgb30<PixelOrder>(src[x]); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } -} - -static void qt_blend_a2rgb30pm_on_a2rgb30pm(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_a2rgb30pm_on_a2rgb30pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - if (const_alpha == 256) { - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = src[x]; - dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - qAlphaRgb30(s)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } else if (const_alpha != 0) { - const uint const_alpha255 = (const_alpha * 255) >> 8; - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint a = (qAlphaRgb30(src[x]) * const_alpha) >> 8; - uint s = BYTE_MUL_RGB30(src[x], const_alpha255); - dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - a); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } -} - - -static void qt_blend_rgb30_on_rgb30(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_rgb30_on_rgb30: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - if (const_alpha != 256) { - qt_blend_a2rgb30pm_on_a2rgb30pm(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - return; - } - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - int len = w * 4; - for (int y=0; y<h; ++y) { - memcpy(dst, src, len); - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } -} - -static void qt_blend_a2bgr30pm_on_a2rgb30pm(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha) -{ -#ifdef QT_DEBUG_DRAW - fprintf(stdout, "qt_blend_a2bgr30pm_on_a2rgb32pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", - destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); - fflush(stdout); -#endif - - const uint *src = (const uint *) srcPixels; - uint *dst = (uint *) destPixels; - if (const_alpha == 256) { - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = qRgbSwapRgb30(src[x]); - dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - qAlphaRgb30(s)); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } else if (const_alpha != 0) { - const uint const_alpha255 = (const_alpha * 255) >> 8; - for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint a = (qAlphaRgb30(src[x]) * const_alpha) >> 8; - uint s = BYTE_MUL_RGB30(src[x], const_alpha255); - dst[x] = qRgbSwapRgb30(s) + BYTE_MUL_RGB30(dst[x], 255 - a); - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - } - } -} - struct Blend_RGB32_on_RGB32_NoAlpha { inline void write(quint32 *dst, quint32 src) { *dst = src; } @@ -772,30 +609,6 @@ void qInitBlendFunctions() qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32; qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32; #endif - qBlendFunctions[QImage::Format_BGR30][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderBGR>; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderBGR>; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_BGR30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_RGB30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_BGR30][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderBGR>; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderBGR>; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_BGR30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_RGB30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderRGB>; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderRGB>; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_BGR30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_RGB30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_RGB30][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderRGB>; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderRGB>; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_BGR30] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_RGB30] = qt_blend_rgb30_on_rgb30; - qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm; qTransformFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_transform_image_rgb32_on_rgb32; qTransformFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_argb32; diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 07e5a3d19b..0cf7e20605 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1252,25 +1252,54 @@ static inline void qConvertARGB64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *b const __m128i cmask = _mm_set1_epi32(0x000003ff); int i = 0; __m128i vr, vg, vb, va; - for (; i < count-1; i += 2) { - __m128i vs = _mm_loadu_si128((const __m128i*)buffer); - buffer += 2; - vr = _mm_srli_epi64(vs, 6); - vg = _mm_srli_epi64(vs, 16 + 6 - 10); - vb = _mm_srli_epi64(vs, 32 + 6); - vr = _mm_and_si128(vr, cmask); - vg = _mm_and_si128(vg, gmask); - vb = _mm_and_si128(vb, cmask); - va = _mm_srli_epi64(vs, 48 + 14); - if (PixelOrder == PixelOrderRGB) - vr = _mm_slli_epi32(vr, 20); - else - vb = _mm_slli_epi32(vb, 20); - va = _mm_slli_epi32(va, 30); - __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); - vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); - _mm_storel_epi64((__m128i*)dest, vd); - dest += 2; + if (i < count && (const uintptr_t)buffer & 0x8) { + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + ++i; + } + + for (; i < count-15; i += 16) { + // Repremultiplying is really expensive and hard to do in SIMD without AVX2, + // so we try to avoid it by checking if it is needed 16 samples at a time. + __m128i vOr = _mm_set1_epi32(0); + __m128i vAnd = _mm_set1_epi32(0xffffffff); + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)(buffer + j)); + vOr = _mm_or_si128(vOr, vs); + vAnd = _mm_and_si128(vAnd, vs); + } + const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7)); + const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7)); + + if (andAlpha == 0xffff) { + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)buffer); + buffer += 2; + vr = _mm_srli_epi64(vs, 6); + vg = _mm_srli_epi64(vs, 16 + 6 - 10); + vb = _mm_srli_epi64(vs, 32 + 6); + vr = _mm_and_si128(vr, cmask); + vg = _mm_and_si128(vg, gmask); + vb = _mm_and_si128(vb, cmask); + va = _mm_srli_epi64(vs, 48 + 14); + if (PixelOrder == PixelOrderRGB) + vr = _mm_slli_epi32(vr, 20); + else + vb = _mm_slli_epi32(vb, 20); + va = _mm_slli_epi32(va, 30); + __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); + vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); + _mm_storel_epi64((__m128i*)dest, vd); + dest += 2; + } + } else if (orAlpha == 0) { + for (int j = 0; j < 16; ++j) { + *dest++ = 0; + buffer++; + } + } else { + for (int j = 0; j < 16; ++j) + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + } } for (; i < count; ++i) diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 2c222b97c2..0034bfdf91 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -712,18 +712,6 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) { static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; } static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; } -static Q_ALWAYS_INLINE uint BYTE_MUL_RGB30(uint x, uint a) { - uint xa = x >> 30; - uint xr = (x >> 20) & 0x3ff; - uint xg = (x >> 10) & 0x3ff; - uint xb = x & 0x3ff; - xa = qt_div_255(xa * a); - xr = qt_div_255(xr * a); - xg = qt_div_255(xg * a); - xb = qt_div_255(xb * a); - return (xa << 30) | (xr << 20) | (xg << 10) | xb; -} - static Q_ALWAYS_INLINE uint qAlphaRgb30(uint c) { uint a = c >> 30; @@ -897,6 +885,18 @@ inline QRgb qRepremultiply(QRgb p) return qPremultiply(p); } +template<unsigned int Shift> +inline QRgba64 qRepremultiply(QRgba64 p) +{ + const uint alpha = p.alpha(); + if (alpha == 65535 || alpha == 0) + return p; + p = p.unpremultiplied(); + Q_CONSTEXPR uint mult = 65535 / (65535 >> Shift); + p.setAlpha(mult * (alpha >> Shift)); + return p.premultiplied(); +} + template<> inline uint qConvertArgb32ToA2rgb30<PixelOrderBGR>(QRgb c) { @@ -1000,6 +1000,7 @@ template<enum QtPixelOrder> inline unsigned int qConvertRgb64ToRgb30(QRgba64); template<> inline unsigned int qConvertRgb64ToRgb30<PixelOrderBGR>(QRgba64 c) { + c = qRepremultiply<14>(c); const uint a = c.alpha() >> 14; const uint r = c.red() >> 6; const uint g = c.green() >> 6; @@ -1010,6 +1011,7 @@ inline unsigned int qConvertRgb64ToRgb30<PixelOrderBGR>(QRgba64 c) template<> inline unsigned int qConvertRgb64ToRgb30<PixelOrderRGB>(QRgba64 c) { + c = qRepremultiply<14>(c); const uint a = c.alpha() >> 14; const uint r = c.red() >> 6; const uint g = c.green() >> 6; diff --git a/tests/auto/gui/painting/qpainter/tst_qpainter.cpp b/tests/auto/gui/painting/qpainter/tst_qpainter.cpp index 3e98e630c2..e8d90edd2d 100644 --- a/tests/auto/gui/painting/qpainter/tst_qpainter.cpp +++ b/tests/auto/gui/painting/qpainter/tst_qpainter.cpp @@ -4882,8 +4882,12 @@ void tst_QPainter::blendARGBonRGB_data() << QPainter::CompositionMode_SourceOver << qRgba(85, 0, 0, 85) << 85; QTest::newRow("ARGB source RGB30") << QImage::Format_RGB30 << QImage::Format_ARGB32 << QPainter::CompositionMode_Source << qRgba(255, 0, 0, 85) << 85; + QTest::newRow("ARGB source RGB30") << QImage::Format_RGB30 << QImage::Format_ARGB32 + << QPainter::CompositionMode_Source << qRgba(255, 0, 0, 120) << 85; QTest::newRow("ARGB_PM source RGB30") << QImage::Format_RGB30 << QImage::Format_ARGB32_Premultiplied << QPainter::CompositionMode_Source << qRgba(85, 0, 0, 85) << 85; + QTest::newRow("ARGB_PM source RGB30") << QImage::Format_RGB30 << QImage::Format_ARGB32_Premultiplied + << QPainter::CompositionMode_Source << qRgba(180, 0, 0, 180) << 170; QTest::newRow("ARGB source-in RGB30") << QImage::Format_RGB30 << QImage::Format_ARGB32 << QPainter::CompositionMode_SourceIn << qRgba(255, 0, 0, 85) << 85; QTest::newRow("ARGB_PM source-in RGB30") << QImage::Format_RGB30 << QImage::Format_ARGB32_Premultiplied |