summaryrefslogtreecommitdiffstats
path: root/src/gui
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui')
-rw-r--r--src/gui/painting/qcompositionfunctions.cpp100
-rw-r--r--src/gui/painting/qrgba64_p.h100
2 files changed, 153 insertions, 47 deletions
diff --git a/src/gui/painting/qcompositionfunctions.cpp b/src/gui/painting/qcompositionfunctions.cpp
index 9312ee9540..641550766a 100644
--- a/src/gui/painting/qcompositionfunctions.cpp
+++ b/src/gui/painting/qcompositionfunctions.cpp
@@ -87,6 +87,32 @@ QT_BEGIN_NAMESPACE
}\
}
+#if defined __SSE2__
+# define LOAD(ptr) _mm_loadl_epi64((__m128i*)(ptr))
+# define CONVERT(value) _mm_shufflelo_epi16(_mm_cvtsi32_si128(value), _MM_SHUFFLE(0, 0, 0, 0))
+# define STORE(ptr, value) _mm_storel_epi64((__m128i*)(ptr), value)
+# define ADD(p, q) _mm_add_epi32(p, q)
+# define ALPHA(c) _mm_shufflelo_epi16(c, _MM_SHUFFLE(3, 3, 3, 3))
+# define CONST(n) CONVERT(n)
+# define INVALPHA(c) _mm_sub_epi32(CONST(65535), ALPHA(c))
+#elif defined __ARM_NEON__
+# define LOAD(ptr) vreinterpret_u16_u64(vld1_u64((quint64*)(ptr)))
+# define CONVERT(value) vreinterpret_u16_u64(vmov_n_u64(value))
+# define STORE(ptr, value) vst1_u64((quint64*)(ptr), vreinterpret_u64_u16(value))
+# define ADD(p, q) vadd_u16(p, q)
+# define ALPHA(c) vdup_lane_u16(c, 3)
+# define CONST(n) vdup_n_u16(n)
+# define INVALPHA(c) vmvn_u16(ALPHA(c))
+#else
+# define LOAD(ptr) *ptr
+# define CONVERT(value) value
+# define STORE(ptr, value) *ptr = value
+# define ADD(p, q) (p + q)
+# define ALPHA(c) (c).alpha()
+# define CONST(n) n
+# define INVALPHA(c) (65535 - ALPHA(c))
+#endif
+
void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha)
{
comp_func_Clear_impl(dest, length, const_alpha);
@@ -99,7 +125,7 @@ void QT_FASTCALL comp_func_solid_Clear_rgb64(QRgba64 *dest, int length, QRgba64,
else {
int ialpha = 255 - const_alpha;
for (int i = 0; i < length; ++i) {
- dest[i] = multiplyAlpha255(dest[i], ialpha);
+ STORE(&dest[i], multiplyAlpha255(LOAD(&dest[i]), ialpha));
}
}
}
@@ -116,7 +142,7 @@ void QT_FASTCALL comp_func_Clear_rgb64(QRgba64 *dest, const QRgba64 *, int lengt
else {
int ialpha = 255 - const_alpha;
for (int i = 0; i < length; ++i) {
- dest[i] = multiplyAlpha255(dest[i], ialpha);
+ STORE(&dest[i], multiplyAlpha255(LOAD(&dest[i]), ialpha));
}
}
}
@@ -146,9 +172,9 @@ void QT_FASTCALL comp_func_solid_Source_rgb64(QRgba64 *dest, int length, QRgba64
qt_memfill64((quint64*)dest, color, length);
else {
int ialpha = 255 - const_alpha;
- color = multiplyAlpha255(color, const_alpha);
+ auto c = multiplyAlpha255(CONVERT(color), const_alpha);
for (int i = 0; i < length; ++i) {
- dest[i] = color + multiplyAlpha255(dest[i], ialpha);
+ STORE(&dest[i], ADD(c, multiplyAlpha255(LOAD(&dest[i]), ialpha)));
}
}
}
@@ -174,7 +200,7 @@ void QT_FASTCALL comp_func_Source_rgb64(QRgba64 *Q_DECL_RESTRICT dest, const QRg
else {
int ialpha = 255 - const_alpha;
for (int i = 0; i < length; ++i) {
- dest[i] = interpolate255(src[i], const_alpha, dest[i], ialpha);
+ STORE(&dest[i], interpolate255(LOAD(&src[i]), const_alpha, LOAD(&dest[i]), ialpha));
}
}
}
@@ -221,10 +247,12 @@ void QT_FASTCALL comp_func_solid_SourceOver_rgb64(QRgba64 *dest, int length, QRg
if (const_alpha == 255 && color.isOpaque()) {
qt_memfill64((quint64*)dest, color, length);
} else {
+ auto c = CONVERT(color);
if (const_alpha != 255)
- color = multiplyAlpha255(color, const_alpha);
+ c = multiplyAlpha255(c, const_alpha);
+ auto cAlpha = INVALPHA(c);
for (int i = 0; i < length; ++i) {
- dest[i] = color + multiplyAlpha65535(dest[i], 65535 - color.alpha());
+ STORE(&dest[i], ADD(c, multiplyAlpha65535(LOAD(&dest[i]), cAlpha)));
}
}
}
@@ -258,12 +286,12 @@ void QT_FASTCALL comp_func_SourceOver_rgb64(QRgba64 *Q_DECL_RESTRICT dest, const
if (s.isOpaque())
dest[i] = s;
else if (!s.isTransparent())
- dest[i] = s + multiplyAlpha65535(dest[i], 65535 - s.alpha());
+ STORE(&dest[i], ADD(CONVERT(s), multiplyAlpha65535(LOAD(&dest[i]), 65535 - s.alpha())));
}
} else {
for (int i = 0; i < length; ++i) {
- QRgba64 s = multiplyAlpha255(src[i], const_alpha);
- dest[i] = s + multiplyAlpha65535(dest[i], 65535 - s.alpha());
+ auto s = multiplyAlpha255(LOAD(&src[i]), const_alpha);
+ STORE(&dest[i], ADD(s, multiplyAlpha65535(LOAD(&dest[i]), INVALPHA(s))));
}
}
}
@@ -287,11 +315,12 @@ void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint co
void QT_FASTCALL comp_func_solid_DestinationOver_rgb64(QRgba64 *dest, int length, QRgba64 color, uint const_alpha)
{
+ auto c = CONVERT(color);
if (const_alpha != 255)
- color = multiplyAlpha255(color, const_alpha);
+ c = multiplyAlpha255(c, const_alpha);
for (int i = 0; i < length; ++i) {
- QRgba64 d = dest[i];
- dest[i] = d + multiplyAlpha65535(color, 65535 - d.alpha());
+ auto d = LOAD(&dest[i]);
+ STORE(&dest[i], ADD(d, multiplyAlpha65535(c, INVALPHA(d))));
}
}
@@ -318,14 +347,14 @@ void QT_FASTCALL comp_func_DestinationOver_rgb64(QRgba64 *Q_DECL_RESTRICT dest,
{
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
- QRgba64 d = dest[i];
- dest[i] = d + multiplyAlpha65535(src[i], 65535 - d.alpha());
+ auto d = LOAD(&dest[i]);
+ STORE(&dest[i], ADD(d, multiplyAlpha65535(LOAD(&src[i]), INVALPHA(d))));
}
} else {
for (int i = 0; i < length; ++i) {
- QRgba64 d = dest[i];
- QRgba64 s = multiplyAlpha255(src[i], const_alpha);
- dest[i] = d + multiplyAlpha65535(s, 65535 - d.alpha());
+ auto d = LOAD(&dest[i]);
+ auto s = multiplyAlpha255(LOAD(&src[i]), const_alpha);
+ STORE(&dest[i], ADD(d, multiplyAlpha65535(s, INVALPHA(d))));
}
}
}
@@ -393,15 +422,15 @@ void QT_FASTCALL comp_func_SourceIn_rgb64(QRgba64 *Q_DECL_RESTRICT dest, const Q
{
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
- dest[i] = multiplyAlpha65535(src[i], dest[i].alpha());
+ STORE(&dest[i], multiplyAlpha65535(LOAD(&src[i]), dest[i].alpha()));
}
} else {
uint ca = const_alpha * 257;
- uint cia = 65535 - ca;
+ auto cia = CONST(65535 - ca);
for (int i = 0; i < length; ++i) {
- QRgba64 d = dest[i];
- QRgba64 s = multiplyAlpha65535(src[i], ca);
- dest[i] = interpolate65535(s, d.alpha(), d, cia);
+ auto d = LOAD(&dest[i]);
+ auto s = multiplyAlpha65535(LOAD(&src[i]), ca);
+ STORE(&dest[i], interpolate65535(s, ALPHA(d), d, cia));
}
}
}
@@ -431,7 +460,7 @@ void QT_FASTCALL comp_func_solid_DestinationIn_rgb64(QRgba64 *dest, int length,
if (const_alpha != 255)
a = qt_div_65535(a * ca64k) + 65535 - ca64k;
for (int i = 0; i < length; ++i) {
- dest[i] = multiplyAlpha65535(dest[i], a);
+ STORE(&dest[i], multiplyAlpha65535(LOAD(&dest[i]), a));
}
}
@@ -885,14 +914,19 @@ void QT_FASTCALL comp_func_solid_Plus(uint *dest, int length, uint color, uint c
void QT_FASTCALL comp_func_solid_Plus_rgb64(QRgba64 *dest, int length, QRgba64 color, uint const_alpha)
{
+ auto b = CONVERT(color);
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
- dest[i] = addWithSaturation(dest[i], color);
+ auto a = LOAD(&dest[i]);
+ a = addWithSaturation(a, b);
+ STORE(&dest[i], a);
}
} else {
for (int i = 0; i < length; ++i) {
- QRgba64 d = addWithSaturation(dest[i], color);
- dest[i] = interpolate255(d, const_alpha, dest[i], 255 - const_alpha);
+ auto a = LOAD(&dest[i]);
+ auto d = addWithSaturation(a, b);
+ a = interpolate255(d, const_alpha, a, 255 - const_alpha);
+ STORE(&dest[i], a);
}
}
}
@@ -924,12 +958,18 @@ void QT_FASTCALL comp_func_Plus_rgb64(QRgba64 *Q_DECL_RESTRICT dest, const QRgba
{
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
- dest[i] = addWithSaturation(dest[i], src[i]);
+ auto a = LOAD(&dest[i]);
+ auto b = LOAD(&src[i]);
+ a = addWithSaturation(a, b);
+ STORE(&dest[i], a);
}
} else {
for (int i = 0; i < length; ++i) {
- QRgba64 d = addWithSaturation(dest[i], src[i]);
- dest[i] = interpolate255(d, const_alpha, dest[i], 255 - const_alpha);
+ auto a = LOAD(&dest[i]);
+ auto b = LOAD(&src[i]);
+ auto d = addWithSaturation(a, b);
+ a = interpolate255(d, const_alpha, a, 255 - const_alpha);
+ STORE(&dest[i], a);
}
}
}
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index cf3dad5f90..b88a5d4c92 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -72,27 +72,49 @@ inline QRgba64 multiplyAlpha256(QRgba64 rgba64, uint alpha256)
inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
{
+ return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535),
+ qt_div_65535(rgba64.green() * alpha65535),
+ qt_div_65535(rgba64.blue() * alpha65535),
+ qt_div_65535(rgba64.alpha() * alpha65535));
+}
+
#ifdef __SSE2__
- const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0));
- __m128i vs = _mm_loadl_epi64((__m128i*)&rgba64);
+Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, __m128i va)
+{
+ __m128i vs = rgba64;
vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
vs = _mm_add_epi32(vs, _mm_srli_epi32(vs, 16));
vs = _mm_add_epi32(vs, _mm_set1_epi32(0x8000));
vs = _mm_srai_epi32(vs, 16);
vs = _mm_packs_epi32(vs, _mm_setzero_si128());
- _mm_storel_epi64((__m128i*)&rgba64, vs);
- return rgba64;
-#else
- return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535),
- qt_div_65535(rgba64.green() * alpha65535),
- qt_div_65535(rgba64.blue() * alpha65535),
- qt_div_65535(rgba64.alpha() * alpha65535));
+ return vs;
+}
+Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, uint alpha65535)
+{
+ const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0));
+ return multiplyAlpha65535(rgba64, va);
+}
#endif
+
+#if defined(__ARM_NEON__)
+Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535)
+{
+ uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs * alpha
+ vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
+ return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16
}
+Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535)
+{
+ uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs * alpha
+ vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
+ return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16
+}
+#endif
-inline QRgba64 multiplyAlpha255(QRgba64 rgba64, uint alpha255)
+template<typename T>
+inline T multiplyAlpha255(T rgba64, uint alpha255)
{
-#ifdef __SSE2__
+#if defined(__SSE2__) || defined(__ARM_NEON__)
return multiplyAlpha65535(rgba64, alpha255 * 257);
#else
return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255),
@@ -112,25 +134,69 @@ inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2));
}
+#if defined __SSE2__
+Q_ALWAYS_INLINE __m128i interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2)
+{
+ return _mm_add_epi32(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
+}
+#endif
+
+#if defined __ARM_NEON__
+Q_ALWAYS_INLINE uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
+{
+ return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
+}
+#endif
+
inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
{
return QRgba64::fromRgba64(multiplyAlpha65535(x, alpha1) + multiplyAlpha65535(y, alpha2));
}
+#if defined __SSE2__
+Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2)
+{
+ return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+// alpha2 below is const-ref because otherwise MSVC2013 complains that it can't 16-byte align the argument.
+Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2)
+{
+ return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+#endif
+
+#if defined __ARM_NEON__
+Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
+{
+ return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2)
+{
+ return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
+}
+#endif
+
inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
{
-#if defined(__SSE2__) && defined(Q_PROCESSOR_X86_64)
- __m128i va = _mm_cvtsi64_si128((quint64)a);
- __m128i vb = _mm_cvtsi64_si128((quint64)b);
- va = _mm_adds_epu16(va, vb);
- return QRgba64::fromRgba64(_mm_cvtsi128_si64(va));
-#else
return QRgba64::fromRgba64(qMin(a.red() + b.red(), 65535),
qMin(a.green() + b.green(), 65535),
qMin(a.blue() + b.blue(), 65535),
qMin(a.alpha() + b.alpha(), 65535));
+}
+
+#if defined(__SSE2__)
+Q_ALWAYS_INLINE __m128i addWithSaturation(__m128i a, __m128i b)
+{
+ return _mm_adds_epu16(a, b);
+}
#endif
+
+#if defined(__ARM_NEON__)
+Q_ALWAYS_INLINE uint16x4_t addWithSaturation(uint16x4_t a, uint16x4_t b)
+{
+ return vqmovn_u32(vaddl_u16(a, b));
}
+#endif
QT_END_NAMESPACE