diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-06-30 13:26:25 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-08-11 09:33:06 +0000 |
commit | 55512950281260b85ea40c08239e459c76b696d2 (patch) | |
tree | 98faefc98c2d7e81b65517d36ccfecfed9a79240 /src/gui/painting/qdrawhelper.cpp | |
parent | 49ea02f435876d514898d587700a21e1a8972f09 (diff) |
Fix repremultiply from RGB64 to RGB30
Just like from RGB32 to RGB30 we must also repremultiply when converting
from RGB64 because the alpha channel loses more precision than the other
color channels.
Since this is not approximated accurately in the simple blending
functions and the functions are no longer needed now the main render
engine supports higher accuracy, the simple blending routines for RGB30
have been removed.
Change-Id: I2b7b8eb015e330a487848fc4370ad3a1e966be91
Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 67 |
1 files changed, 48 insertions, 19 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 07e5a3d19b..0cf7e20605 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1252,25 +1252,54 @@ static inline void qConvertARGB64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *b const __m128i cmask = _mm_set1_epi32(0x000003ff); int i = 0; __m128i vr, vg, vb, va; - for (; i < count-1; i += 2) { - __m128i vs = _mm_loadu_si128((const __m128i*)buffer); - buffer += 2; - vr = _mm_srli_epi64(vs, 6); - vg = _mm_srli_epi64(vs, 16 + 6 - 10); - vb = _mm_srli_epi64(vs, 32 + 6); - vr = _mm_and_si128(vr, cmask); - vg = _mm_and_si128(vg, gmask); - vb = _mm_and_si128(vb, cmask); - va = _mm_srli_epi64(vs, 48 + 14); - if (PixelOrder == PixelOrderRGB) - vr = _mm_slli_epi32(vr, 20); - else - vb = _mm_slli_epi32(vb, 20); - va = _mm_slli_epi32(va, 30); - __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); - vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); - _mm_storel_epi64((__m128i*)dest, vd); - dest += 2; + if (i < count && (const uintptr_t)buffer & 0x8) { + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + ++i; + } + + for (; i < count-15; i += 16) { + // Repremultiplying is really expensive and hard to do in SIMD without AVX2, + // so we try to avoid it by checking if it is needed 16 samples at a time. + __m128i vOr = _mm_set1_epi32(0); + __m128i vAnd = _mm_set1_epi32(0xffffffff); + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)(buffer + j)); + vOr = _mm_or_si128(vOr, vs); + vAnd = _mm_and_si128(vAnd, vs); + } + const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7)); + const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7)); + + if (andAlpha == 0xffff) { + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)buffer); + buffer += 2; + vr = _mm_srli_epi64(vs, 6); + vg = _mm_srli_epi64(vs, 16 + 6 - 10); + vb = _mm_srli_epi64(vs, 32 + 6); + vr = _mm_and_si128(vr, cmask); + vg = _mm_and_si128(vg, gmask); + vb = _mm_and_si128(vb, cmask); + va = _mm_srli_epi64(vs, 48 + 14); + if (PixelOrder == PixelOrderRGB) + vr = _mm_slli_epi32(vr, 20); + else + vb = _mm_slli_epi32(vb, 20); + va = _mm_slli_epi32(va, 30); + __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); + vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); + _mm_storel_epi64((__m128i*)dest, vd); + dest += 2; + } + } else if (orAlpha == 0) { + for (int j = 0; j < 16; ++j) { + *dest++ = 0; + buffer++; + } + } else { + for (int j = 0; j < 16; ++j) + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + } } for (; i < count; ++i) |