diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2016-11-22 16:41:15 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2016-12-01 19:22:11 +0000 |
commit | d133bbf02a87080ccb0bbfa0ebdd96e51d2bc733 (patch) | |
tree | fbf9b26b18932c8380500fe46385394c34e0ecc4 /src/gui/painting/qrgba64_p.h | |
parent | 5d35eea3e15023d326694b3cf08795480b175773 (diff) |
Optimized LCD text blending
Make a QRGba64 helper function for LCD blending following our standard
form.
Change-Id: Ib29cf8e780a042ed46554c55c67a698a0e3eddcd
Reviewed-by: Erik Verbruggen <erik.verbruggen@qt.io>
Diffstat (limited to 'src/gui/painting/qrgba64_p.h')
-rw-r--r-- | src/gui/painting/qrgba64_p.h | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h index 08e73942df..91b5926e43 100644 --- a/src/gui/painting/qrgba64_p.h +++ b/src/gui/painting/qrgba64_p.h @@ -248,6 +248,52 @@ Q_ALWAYS_INLINE uint16x4_t addWithSaturation(uint16x4_t a, uint16x4_t b) } #endif +inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha) +{ + QRgba64 blend; +#ifdef __SSE2__ + __m128i vd = _mm_loadl_epi64((const __m128i *)&d); + __m128i vs = _mm_loadl_epi64((const __m128i *)&s); + __m128i va = _mm_cvtsi32_si128(rgbAlpha); + va = _mm_unpacklo_epi8(va, va); + __m128i vb = _mm_xor_si128(_mm_set1_epi16(-1), va); + + vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va)); + vd = _mm_unpacklo_epi16(_mm_mullo_epi16(vd, vb), _mm_mulhi_epu16(vd, vb)); + vd = _mm_add_epi32(vd, vs); + vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, 16)); + vd = _mm_add_epi32(vd, _mm_set1_epi32(0x8000)); + vd = _mm_srai_epi32(vd, 16); + vd = _mm_packs_epi32(vd, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i *)&blend, vd); +#elif defined(__ARM_NEON__) + uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d)); + uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s)); + uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(rgbAlpha)); + uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]); + uint16x4_t vb = vdup_n_u16(0xffff); + vb = vsub_u16(vb, va); + + uint32x4_t vs32 = vmull_u16(vs, va); + uint32x4_t vd32 = vmull_u16(vd, vb); + vd32 = vaddq_u32(vd32, vs32); + vd32 = vsraq_n_u32(vd32, vd32, 16); + vd = vrshrn_n_u32(vd32, 16); + vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd)); +#else + const int mr = qRed(rgbAlpha); + const int mg = qGreen(rgbAlpha); + const int mb = qBlue(rgbAlpha); + blend.setRed (qt_div_255(s.red() * mr + d.red() * (255 - mr))); + blend.setGreen(qt_div_255(s.green() * mg + d.green() * (255 - mg))); + blend.setBlue (qt_div_255(s.blue() * mb + d.blue() * (255 - mb))); + blend.setAlpha(s.alpha()); +#endif + return blend; +} + + QT_END_NAMESPACE #endif // QRGBA64_P_H |