summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qrgba64_p.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui/painting/qrgba64_p.h')
-rw-r--r--src/gui/painting/qrgba64_p.h101
1 files changed, 84 insertions, 17 deletions
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index cf3dad5f90..0dadc038fa 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -51,6 +51,7 @@
// We mean it.
//
+#include <QtGui/private/qtguiglobal_p.h>
#include <QtGui/qrgba64.h>
#include <QtGui/private/qdrawhelper_p.h>
#include <private/qsimd_p.h>
@@ -72,27 +73,49 @@ inline QRgba64 multiplyAlpha256(QRgba64 rgba64, uint alpha256)
inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
{
+ return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535),
+ qt_div_65535(rgba64.green() * alpha65535),
+ qt_div_65535(rgba64.blue() * alpha65535),
+ qt_div_65535(rgba64.alpha() * alpha65535));
+}
+
#ifdef __SSE2__
- const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0));
- __m128i vs = _mm_loadl_epi64((__m128i*)&rgba64);
+Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, __m128i va)
+{
+ __m128i vs = rgba64;
vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
vs = _mm_add_epi32(vs, _mm_srli_epi32(vs, 16));
vs = _mm_add_epi32(vs, _mm_set1_epi32(0x8000));
vs = _mm_srai_epi32(vs, 16);
vs = _mm_packs_epi32(vs, _mm_setzero_si128());
- _mm_storel_epi64((__m128i*)&rgba64, vs);
- return rgba64;
-#else
- return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535),
- qt_div_65535(rgba64.green() * alpha65535),
- qt_div_65535(rgba64.blue() * alpha65535),
- qt_div_65535(rgba64.alpha() * alpha65535));
+ return vs;
+}
+Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, uint alpha65535)
+{
+ const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0));
+ return multiplyAlpha65535(rgba64, va);
+}
#endif
+
+#if defined(__ARM_NEON__)
+Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535)
+{
+ uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs * alpha
+ vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
+ return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16
}
+Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535)
+{
+ uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs * alpha
+ vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
+ return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16
+}
+#endif
-inline QRgba64 multiplyAlpha255(QRgba64 rgba64, uint alpha255)
+template<typename T>
+inline T multiplyAlpha255(T rgba64, uint alpha255)
{
-#ifdef __SSE2__
+#if defined(__SSE2__) || defined(__ARM_NEON__)
return multiplyAlpha65535(rgba64, alpha255 * 257);
#else
return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255),
@@ -112,25 +135,69 @@ inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2));
}
+#if defined __SSE2__
+Q_ALWAYS_INLINE __m128i interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2)
+{
+ return _mm_add_epi32(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
+}
+#endif
+
+#if defined __ARM_NEON__
+Q_ALWAYS_INLINE uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
+{
+ return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
+}
+#endif
+
inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
{
return QRgba64::fromRgba64(multiplyAlpha65535(x, alpha1) + multiplyAlpha65535(y, alpha2));
}
+#if defined __SSE2__
+Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2)
+{
+ return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+// alpha2 below is const-ref because otherwise MSVC2013 complains that it can't 16-byte align the argument.
+Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2)
+{
+ return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+#endif
+
+#if defined __ARM_NEON__
+Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
+{
+ return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2)
+{
+ return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+#endif
+
inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
{
-#if defined(__SSE2__) && defined(Q_PROCESSOR_X86_64)
- __m128i va = _mm_cvtsi64_si128((quint64)a);
- __m128i vb = _mm_cvtsi64_si128((quint64)b);
- va = _mm_adds_epu16(va, vb);
- return QRgba64::fromRgba64(_mm_cvtsi128_si64(va));
-#else
return QRgba64::fromRgba64(qMin(a.red() + b.red(), 65535),
qMin(a.green() + b.green(), 65535),
qMin(a.blue() + b.blue(), 65535),
qMin(a.alpha() + b.alpha(), 65535));
+}
+
+#if defined(__SSE2__)
+Q_ALWAYS_INLINE __m128i addWithSaturation(__m128i a, __m128i b)
+{
+ return _mm_adds_epu16(a, b);
+}
#endif
+
+#if defined(__ARM_NEON__)
+Q_ALWAYS_INLINE uint16x4_t addWithSaturation(uint16x4_t a, uint16x4_t b)
+{
+ return vqmovn_u32(vaddl_u16(a, b));
}
+#endif
QT_END_NAMESPACE