diff options
Diffstat (limited to 'src/gui/painting/qrgba64_p.h')
-rw-r--r-- | src/gui/painting/qrgba64_p.h | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h index 0dadc038fa..7776a5b08a 100644 --- a/src/gui/painting/qrgba64_p.h +++ b/src/gui/painting/qrgba64_p.h @@ -185,6 +185,60 @@ inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b) qMin(a.alpha() + b.alpha(), 65535)); } +#if defined __SSE2__ +Q_ALWAYS_INLINE uint toArgb32(__m128i v) +{ + v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); + v = _mm_add_epi32(v, _mm_set1_epi32(128)); + v = _mm_sub_epi32(v, _mm_srli_epi32(v, 8)); + v = _mm_srli_epi32(v, 8); + v = _mm_packs_epi32(v, v); + v = _mm_packus_epi16(v, v); + return _mm_cvtsi128_si32(v); +} +#elif defined __ARM_NEON__ +Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v) +{ + v = vsub_u16(v, vrshr_n_u16(v, 8)); + v = vrshr_n_u16(v, 8); + uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v)); + return vget_lane_u32(vreinterpret_u32_u8(v8), 0); +} +#endif + +inline uint toArgb32(QRgba64 rgba64) +{ +#if defined __SSE2__ + __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); + v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2)); + return toArgb32(v); +#elif defined __ARM_NEON__ + uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN + const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 }; + v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask)); +#else + v = vext_u16(v, v, 3); +#endif + return toArgb32(v); +#else + return rgba64.toArgb32(); +#endif +} + +inline uint toRgba8888(QRgba64 rgba64) +{ +#if defined __SSE2__ + __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); + return toArgb32(v); +#elif defined __ARM_NEON__ + uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); + return toArgb32(v); +#else + return ARGB2RGBA(toArgb32(rgba64)); +#endif +} + #if defined(__SSE2__) Q_ALWAYS_INLINE __m128i addWithSaturation(__m128i a, __m128i b) { @@ -199,6 +253,53 @@ Q_ALWAYS_INLINE uint16x4_t addWithSaturation(uint16x4_t a, uint16x4_t b) } #endif +inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha) +{ + QRgba64 blend; +#if defined(__SSE2__) + __m128i vd = _mm_loadl_epi64((const __m128i *)&d); + __m128i vs = _mm_loadl_epi64((const __m128i *)&s); + __m128i va = _mm_cvtsi32_si128(rgbAlpha); + va = _mm_unpacklo_epi8(va, va); + va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(3, 0, 1, 2)); + __m128i vb = _mm_xor_si128(_mm_set1_epi16(-1), va); + + vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va)); + vd = _mm_unpacklo_epi16(_mm_mullo_epi16(vd, vb), _mm_mulhi_epu16(vd, vb)); + vd = _mm_add_epi32(vd, vs); + vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, 16)); + vd = _mm_add_epi32(vd, _mm_set1_epi32(0x8000)); + vd = _mm_srai_epi32(vd, 16); + vd = _mm_packs_epi32(vd, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i *)&blend, vd); +#elif defined(__ARM_NEON__) + uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d)); + uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s)); + uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(ARGB2RGBA(rgbAlpha))); + uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]); + uint16x4_t vb = vdup_n_u16(0xffff); + vb = vsub_u16(vb, va); + + uint32x4_t vs32 = vmull_u16(vs, va); + uint32x4_t vd32 = vmull_u16(vd, vb); + vd32 = vaddq_u32(vd32, vs32); + vd32 = vsraq_n_u32(vd32, vd32, 16); + vd = vrshrn_n_u32(vd32, 16); + vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd)); +#else + const int mr = qRed(rgbAlpha); + const int mg = qGreen(rgbAlpha); + const int mb = qBlue(rgbAlpha); + blend.setRed (qt_div_255(s.red() * mr + d.red() * (255 - mr))); + blend.setGreen(qt_div_255(s.green() * mg + d.green() * (255 - mg))); + blend.setBlue (qt_div_255(s.blue() * mb + d.blue() * (255 - mb))); + blend.setAlpha(s.alpha()); +#endif + return blend; +} + + QT_END_NAMESPACE #endif // QRGBA64_P_H |