summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd_p.h
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-12-12 22:41:04 -0800
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-31 21:51:11 +0100
commitf32a5b158f3929a8f391240b4f21dde1db294637 (patch)
tree21b41aa36fc01c50ced58f1a34ba6064162b46cc /src/corelib/tools/qsimd_p.h
parent34821e226a94858480e57bb25ac7655bfd19f1e6 (diff)
Improve ucstrncmp with SSE2
The benchmarks showed that the basic SSE2-based building block improves performance by about 50% with data extracted from a Qt Creator run. None of the other alternatives provide clear better results -- the best was 3.8% and with only one compiler. Change-Id: I77314785afecfacaf21c41fd79c97cadf357f895 Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'src/corelib/tools/qsimd_p.h')
-rw-r--r--src/corelib/tools/qsimd_p.h13
1 files changed, 13 insertions, 0 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 1e428b6aeb..dd720f88da 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -256,6 +256,12 @@ static __forceinline unsigned long _bit_scan_reverse(uint val)
_BitScanReverse(&result, val);
return result;
}
+static __forceinline unsigned long _bit_scan_forward(uint val)
+{
+ unsigned long result;
+ _BitScanForward(&result, val);
+ return result;
+}
# elif (defined(Q_CC_CLANG) || (defined(Q_CC_GNU) && __GNUC__ * 100 + __GNUC_MINOR__ < 405)) \
&& !defined(Q_CC_INTEL)
// Clang is missing the intrinsic for _bit_scan_reverse
@@ -267,6 +273,13 @@ unsigned _bit_scan_reverse(unsigned val)
asm("bsr %1, %0" : "=r" (result) : "r" (val));
return result;
}
+static inline __attribute__((always_inline))
+unsigned _bit_scan_forward(unsigned val)
+{
+ unsigned result;
+ asm("bsf %1, %0" : "=r" (result) : "r" (val));
+ return result;
+}
# endif
#endif // Q_PROCESSOR_X86