summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qrgba64_p.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui/painting/qrgba64_p.h')
-rw-r--r--src/gui/painting/qrgba64_p.h95
1 files changed, 95 insertions, 0 deletions
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index 0dadc038fa..91b5926e43 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -185,6 +185,55 @@ inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
qMin(a.alpha() + b.alpha(), 65535));
}
+#if defined __SSE2__
+Q_ALWAYS_INLINE uint toArgb32(__m128i v)
+{
+ v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
+ v = _mm_add_epi32(v, _mm_set1_epi32(128));
+ v = _mm_sub_epi32(v, _mm_srli_epi32(v, 8));
+ v = _mm_srli_epi32(v, 8);
+ v = _mm_packs_epi32(v, v);
+ v = _mm_packus_epi16(v, v);
+ return _mm_cvtsi128_si32(v);
+}
+#elif defined __ARM_NEON__
+Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v)
+{
+ v = vsub_u16(v, vrshr_n_u16(v, 8));
+ v = vrshr_n_u16(v, 8);
+ uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v));
+ return vget_lane_u32(vreinterpret_u32_u8(v8), 0);
+}
+#endif
+
+inline uint toArgb32(QRgba64 rgba64)
+{
+#if defined __SSE2__
+ __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
+ v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2));
+ return toArgb32(v);
+#elif defined __ARM_NEON__
+ uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
+ v = vext_u16(v, v, 1);
+ return toArgb32(v);
+#else
+ return rgba64.toArgb32();
+#endif
+}
+
+inline uint toRgba8888(QRgba64 rgba64)
+{
+#if defined __SSE2__
+ __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
+ return toArgb32(v);
+#elif defined __ARM_NEON__
+ uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
+ return toArgb32(v);
+#else
+ return ARGB2RGBA(toArgb32(rgba64));
+#endif
+}
+
#if defined(__SSE2__)
Q_ALWAYS_INLINE __m128i addWithSaturation(__m128i a, __m128i b)
{
@@ -199,6 +248,52 @@ Q_ALWAYS_INLINE uint16x4_t addWithSaturation(uint16x4_t a, uint16x4_t b)
}
#endif
+inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
+{
+ QRgba64 blend;
+#ifdef __SSE2__
+ __m128i vd = _mm_loadl_epi64((const __m128i *)&d);
+ __m128i vs = _mm_loadl_epi64((const __m128i *)&s);
+ __m128i va = _mm_cvtsi32_si128(rgbAlpha);
+ va = _mm_unpacklo_epi8(va, va);
+ __m128i vb = _mm_xor_si128(_mm_set1_epi16(-1), va);
+
+ vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
+ vd = _mm_unpacklo_epi16(_mm_mullo_epi16(vd, vb), _mm_mulhi_epu16(vd, vb));
+ vd = _mm_add_epi32(vd, vs);
+ vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, 16));
+ vd = _mm_add_epi32(vd, _mm_set1_epi32(0x8000));
+ vd = _mm_srai_epi32(vd, 16);
+ vd = _mm_packs_epi32(vd, _mm_setzero_si128());
+
+ _mm_storel_epi64((__m128i *)&blend, vd);
+#elif defined(__ARM_NEON__)
+ uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d));
+ uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s));
+ uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(rgbAlpha));
+ uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]);
+ uint16x4_t vb = vdup_n_u16(0xffff);
+ vb = vsub_u16(vb, va);
+
+ uint32x4_t vs32 = vmull_u16(vs, va);
+ uint32x4_t vd32 = vmull_u16(vd, vb);
+ vd32 = vaddq_u32(vd32, vs32);
+ vd32 = vsraq_n_u32(vd32, vd32, 16);
+ vd = vrshrn_n_u32(vd32, 16);
+ vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd));
+#else
+ const int mr = qRed(rgbAlpha);
+ const int mg = qGreen(rgbAlpha);
+ const int mb = qBlue(rgbAlpha);
+ blend.setRed (qt_div_255(s.red() * mr + d.red() * (255 - mr)));
+ blend.setGreen(qt_div_255(s.green() * mg + d.green() * (255 - mg)));
+ blend.setBlue (qt_div_255(s.blue() * mb + d.blue() * (255 - mb)));
+ blend.setAlpha(s.alpha());
+#endif
+ return blend;
+}
+
+
QT_END_NAMESPACE
#endif // QRGBA64_P_H