summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gui/painting/qblendfunctions.cpp187
-rw-r--r--src/gui/painting/qdrawhelper.cpp67
-rw-r--r--src/gui/painting/qdrawhelper_p.h26
3 files changed, 62 insertions, 218 deletions
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp
index dbdd82e432..0898a20998 100644
--- a/src/gui/painting/qblendfunctions.cpp
+++ b/src/gui/painting/qblendfunctions.cpp
@@ -395,169 +395,6 @@ void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl,
}
}
-template<QtPixelOrder PixelOrder>
-static void qt_blend_argb32pm_on_a2rgb30pm(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
-{
-#ifdef QT_DEBUG_DRAW
- fprintf(stdout, "qt_blend_argb32pm_on_a2rgb30pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n",
- destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
- fflush(stdout);
-#endif
-
- const uint *src = (const uint *) srcPixels;
- uint *dst = (uint *) destPixels;
- if (const_alpha == 256) {
- for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
- uint s = src[x];
- dst[x] = qConvertArgb32ToA2rgb30<PixelOrder>(s) + BYTE_MUL_RGB30(dst[x], 255 - qAlpha(s));
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
- } else if (const_alpha != 0) {
- const_alpha = (const_alpha * 255) >> 8;
- for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
- uint s = src[x];
- dst[x] = BYTE_MUL_RGB30(qConvertArgb32ToA2rgb30<PixelOrder>(s), const_alpha) + BYTE_MUL_RGB30(dst[x], 255 - qt_div_255(qAlpha(s) * const_alpha));
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
- }
-}
-
-template<QtPixelOrder PixelOrder>
-static void qt_blend_rgb32_on_rgb30(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
-{
-#ifdef QT_DEBUG_DRAW
- fprintf(stdout, "qt_blend_rgb32_on_rgb30: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n",
- destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
- fflush(stdout);
-#endif
-
- if (const_alpha != 256) {
- qt_blend_argb32pm_on_a2rgb30pm<PixelOrder>(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
- return;
- }
-
- const uint *src = (const uint *) srcPixels;
- uint *dst = (uint *) destPixels;
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- dst[x] = qConvertRgb32ToRgb30<PixelOrder>(src[x]);
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
-}
-
-static void qt_blend_a2rgb30pm_on_a2rgb30pm(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
-{
-#ifdef QT_DEBUG_DRAW
- fprintf(stdout, "qt_blend_a2rgb30pm_on_a2rgb30pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n",
- destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
- fflush(stdout);
-#endif
-
- const uint *src = (const uint *) srcPixels;
- uint *dst = (uint *) destPixels;
- if (const_alpha == 256) {
- for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
- uint s = src[x];
- dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - qAlphaRgb30(s));
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
- } else if (const_alpha != 0) {
- const uint const_alpha255 = (const_alpha * 255) >> 8;
- for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
- uint a = (qAlphaRgb30(src[x]) * const_alpha) >> 8;
- uint s = BYTE_MUL_RGB30(src[x], const_alpha255);
- dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - a);
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
- }
-}
-
-
-static void qt_blend_rgb30_on_rgb30(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
-{
-#ifdef QT_DEBUG_DRAW
- fprintf(stdout, "qt_blend_rgb30_on_rgb30: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n",
- destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
- fflush(stdout);
-#endif
-
- if (const_alpha != 256) {
- qt_blend_a2rgb30pm_on_a2rgb30pm(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
- return;
- }
-
- const uint *src = (const uint *) srcPixels;
- uint *dst = (uint *) destPixels;
- int len = w * 4;
- for (int y=0; y<h; ++y) {
- memcpy(dst, src, len);
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
-}
-
-static void qt_blend_a2bgr30pm_on_a2rgb30pm(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
-{
-#ifdef QT_DEBUG_DRAW
- fprintf(stdout, "qt_blend_a2bgr30pm_on_a2rgb32pm: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n",
- destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
- fflush(stdout);
-#endif
-
- const uint *src = (const uint *) srcPixels;
- uint *dst = (uint *) destPixels;
- if (const_alpha == 256) {
- for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
- uint s = qRgbSwapRgb30(src[x]);
- dst[x] = s + BYTE_MUL_RGB30(dst[x], 255 - qAlphaRgb30(s));
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
- } else if (const_alpha != 0) {
- const uint const_alpha255 = (const_alpha * 255) >> 8;
- for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
- uint a = (qAlphaRgb30(src[x]) * const_alpha) >> 8;
- uint s = BYTE_MUL_RGB30(src[x], const_alpha255);
- dst[x] = qRgbSwapRgb30(s) + BYTE_MUL_RGB30(dst[x], 255 - a);
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
- }
-}
-
struct Blend_RGB32_on_RGB32_NoAlpha {
inline void write(quint32 *dst, quint32 src) { *dst = src; }
@@ -772,30 +609,6 @@ void qInitBlendFunctions()
qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32;
qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32;
#endif
- qBlendFunctions[QImage::Format_BGR30][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderBGR>;
- qBlendFunctions[QImage::Format_BGR30][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderBGR>;
- qBlendFunctions[QImage::Format_BGR30][QImage::Format_BGR30] = qt_blend_rgb30_on_rgb30;
- qBlendFunctions[QImage::Format_BGR30][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_BGR30][QImage::Format_RGB30] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_BGR30][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderBGR>;
- qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderBGR>;
- qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_BGR30] = qt_blend_rgb30_on_rgb30;
- qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_RGB30] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_A2BGR30_Premultiplied][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_RGB30][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderRGB>;
- qBlendFunctions[QImage::Format_RGB30][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderRGB>;
- qBlendFunctions[QImage::Format_RGB30][QImage::Format_BGR30] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_RGB30][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_RGB30][QImage::Format_RGB30] = qt_blend_rgb30_on_rgb30;
- qBlendFunctions[QImage::Format_RGB30][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb30<PixelOrderRGB>;
- qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32pm_on_a2rgb30pm<PixelOrderRGB>;
- qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_BGR30] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_A2BGR30_Premultiplied] = qt_blend_a2bgr30pm_on_a2rgb30pm;
- qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_RGB30] = qt_blend_rgb30_on_rgb30;
- qBlendFunctions[QImage::Format_A2RGB30_Premultiplied][QImage::Format_A2RGB30_Premultiplied] = qt_blend_a2rgb30pm_on_a2rgb30pm;
qTransformFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_transform_image_rgb32_on_rgb32;
qTransformFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_argb32;
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 07e5a3d19b..0cf7e20605 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -1252,25 +1252,54 @@ static inline void qConvertARGB64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *b
const __m128i cmask = _mm_set1_epi32(0x000003ff);
int i = 0;
__m128i vr, vg, vb, va;
- for (; i < count-1; i += 2) {
- __m128i vs = _mm_loadu_si128((const __m128i*)buffer);
- buffer += 2;
- vr = _mm_srli_epi64(vs, 6);
- vg = _mm_srli_epi64(vs, 16 + 6 - 10);
- vb = _mm_srli_epi64(vs, 32 + 6);
- vr = _mm_and_si128(vr, cmask);
- vg = _mm_and_si128(vg, gmask);
- vb = _mm_and_si128(vb, cmask);
- va = _mm_srli_epi64(vs, 48 + 14);
- if (PixelOrder == PixelOrderRGB)
- vr = _mm_slli_epi32(vr, 20);
- else
- vb = _mm_slli_epi32(vb, 20);
- va = _mm_slli_epi32(va, 30);
- __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
- vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
- _mm_storel_epi64((__m128i*)dest, vd);
- dest += 2;
+ if (i < count && (const uintptr_t)buffer & 0x8) {
+ *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
+ ++i;
+ }
+
+ for (; i < count-15; i += 16) {
+ // Repremultiplying is really expensive and hard to do in SIMD without AVX2,
+ // so we try to avoid it by checking if it is needed 16 samples at a time.
+ __m128i vOr = _mm_set1_epi32(0);
+ __m128i vAnd = _mm_set1_epi32(0xffffffff);
+ for (int j = 0; j < 16; j += 2) {
+ __m128i vs = _mm_load_si128((const __m128i*)(buffer + j));
+ vOr = _mm_or_si128(vOr, vs);
+ vAnd = _mm_and_si128(vAnd, vs);
+ }
+ const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7));
+ const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7));
+
+ if (andAlpha == 0xffff) {
+ for (int j = 0; j < 16; j += 2) {
+ __m128i vs = _mm_load_si128((const __m128i*)buffer);
+ buffer += 2;
+ vr = _mm_srli_epi64(vs, 6);
+ vg = _mm_srli_epi64(vs, 16 + 6 - 10);
+ vb = _mm_srli_epi64(vs, 32 + 6);
+ vr = _mm_and_si128(vr, cmask);
+ vg = _mm_and_si128(vg, gmask);
+ vb = _mm_and_si128(vb, cmask);
+ va = _mm_srli_epi64(vs, 48 + 14);
+ if (PixelOrder == PixelOrderRGB)
+ vr = _mm_slli_epi32(vr, 20);
+ else
+ vb = _mm_slli_epi32(vb, 20);
+ va = _mm_slli_epi32(va, 30);
+ __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
+ vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
+ _mm_storel_epi64((__m128i*)dest, vd);
+ dest += 2;
+ }
+ } else if (orAlpha == 0) {
+ for (int j = 0; j < 16; ++j) {
+ *dest++ = 0;
+ buffer++;
+ }
+ } else {
+ for (int j = 0; j < 16; ++j)
+ *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
+ }
}
for (; i < count; ++i)
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 2c222b97c2..0034bfdf91 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -712,18 +712,6 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) {
static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; }
static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; }
-static Q_ALWAYS_INLINE uint BYTE_MUL_RGB30(uint x, uint a) {
- uint xa = x >> 30;
- uint xr = (x >> 20) & 0x3ff;
- uint xg = (x >> 10) & 0x3ff;
- uint xb = x & 0x3ff;
- xa = qt_div_255(xa * a);
- xr = qt_div_255(xr * a);
- xg = qt_div_255(xg * a);
- xb = qt_div_255(xb * a);
- return (xa << 30) | (xr << 20) | (xg << 10) | xb;
-}
-
static Q_ALWAYS_INLINE uint qAlphaRgb30(uint c)
{
uint a = c >> 30;
@@ -897,6 +885,18 @@ inline QRgb qRepremultiply(QRgb p)
return qPremultiply(p);
}
+template<unsigned int Shift>
+inline QRgba64 qRepremultiply(QRgba64 p)
+{
+ const uint alpha = p.alpha();
+ if (alpha == 65535 || alpha == 0)
+ return p;
+ p = p.unpremultiplied();
+ Q_CONSTEXPR uint mult = 65535 / (65535 >> Shift);
+ p.setAlpha(mult * (alpha >> Shift));
+ return p.premultiplied();
+}
+
template<>
inline uint qConvertArgb32ToA2rgb30<PixelOrderBGR>(QRgb c)
{
@@ -1000,6 +1000,7 @@ template<enum QtPixelOrder> inline unsigned int qConvertRgb64ToRgb30(QRgba64);
template<>
inline unsigned int qConvertRgb64ToRgb30<PixelOrderBGR>(QRgba64 c)
{
+ c = qRepremultiply<14>(c);
const uint a = c.alpha() >> 14;
const uint r = c.red() >> 6;
const uint g = c.green() >> 6;
@@ -1010,6 +1011,7 @@ inline unsigned int qConvertRgb64ToRgb30<PixelOrderBGR>(QRgba64 c)
template<>
inline unsigned int qConvertRgb64ToRgb30<PixelOrderRGB>(QRgba64 c)
{
+ c = qRepremultiply<14>(c);
const uint a = c.alpha() >> 14;
const uint r = c.red() >> 6;
const uint g = c.green() >> 6;