summaryrefslogtreecommitdiffstats
path: root/src/gui
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2014-02-02 16:23:00 -0800
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-02-12 20:12:17 +0100
commitab1ec81f5875c7a78eb0543b562b50058ec251fa (patch)
tree6ae14e546508eb064731794ecfedc0ec6ede7815 /src/gui
parentaf8c35bda4fd245061e3a43df32f705903e855dd (diff)
Optimize qt_memfill32 a little
Benchmarking shows it took up to 3.5% of Qt Creator's initialization cost. Optimize by modifying only one variable per loop: instead of updating n and dst128, we only update one variable at a time. Removing the Duff's Device also improves the code, since the compiler won't try to update dst128 four times per loop, only once. The moving of the epilogue close to the prologue was just to make the code a little cleaner. Change-Id: I5b74e27d520ca821f380aef0533c244805f003b7 Reviewed-by: Gunnar Sletta <gunnar.sletta@jollamobile.com>
Diffstat (limited to 'src/gui')
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp30
1 files changed, 18 insertions, 12 deletions
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index d11ba0b26c..f5523f7113 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -259,26 +259,32 @@ void qt_memfill32(quint32 *dest, quint32 value, int count)
case 12: *dest++ = value; --count;
}
+ const int rest = count & 0x3;
+ if (rest) {
+ switch (rest) {
+ case 3: dest[count - 3] = value;
+ case 2: dest[count - 2] = value;
+ case 1: dest[count - 1] = value;
+ }
+ }
+
int count128 = count / 4;
__m128i *dst128 = reinterpret_cast<__m128i*>(dest);
+ __m128i *end128 = dst128 + count128;
const __m128i value128 = _mm_set_epi32(value, value, value, value);
- int n = (count128 + 3) / 4;
+ while (dst128 + 3 < end128) {
+ _mm_stream_si128(dst128 + 0, value128);
+ _mm_stream_si128(dst128 + 1, value128);
+ _mm_stream_si128(dst128 + 2, value128);
+ _mm_stream_si128(dst128 + 3, value128);
+ dst128 += 4;
+ }
+
switch (count128 & 0x3) {
- case 0: do { _mm_stream_si128(dst128++, value128);
case 3: _mm_stream_si128(dst128++, value128);
case 2: _mm_stream_si128(dst128++, value128);
case 1: _mm_stream_si128(dst128++, value128);
- } while (--n > 0);
- }
-
- const int rest = count & 0x3;
- if (rest) {
- switch (rest) {
- case 3: dest[count - 3] = value;
- case 2: dest[count - 2] = value;
- case 1: dest[count - 1] = value;
- }
}
}