diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2013-12-12 22:41:04 -0800 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2014-01-31 21:51:11 +0100 |
commit | f32a5b158f3929a8f391240b4f21dde1db294637 (patch) | |
tree | 21b41aa36fc01c50ced58f1a34ba6064162b46cc /src/corelib/tools/qsimd_p.h | |
parent | 34821e226a94858480e57bb25ac7655bfd19f1e6 (diff) |
Improve ucstrncmp with SSE2
The benchmarks showed that the basic SSE2-based building block
improves performance by about 50% with data extracted from a Qt
Creator run. None of the other alternatives provide clear better
results -- the best was 3.8% and with only one compiler.
Change-Id: I77314785afecfacaf21c41fd79c97cadf357f895
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'src/corelib/tools/qsimd_p.h')
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 1e428b6aeb..dd720f88da 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -256,6 +256,12 @@ static __forceinline unsigned long _bit_scan_reverse(uint val) _BitScanReverse(&result, val); return result; } +static __forceinline unsigned long _bit_scan_forward(uint val) +{ + unsigned long result; + _BitScanForward(&result, val); + return result; +} # elif (defined(Q_CC_CLANG) || (defined(Q_CC_GNU) && __GNUC__ * 100 + __GNUC_MINOR__ < 405)) \ && !defined(Q_CC_INTEL) // Clang is missing the intrinsic for _bit_scan_reverse @@ -267,6 +273,13 @@ unsigned _bit_scan_reverse(unsigned val) asm("bsr %1, %0" : "=r" (result) : "r" (val)); return result; } +static inline __attribute__((always_inline)) +unsigned _bit_scan_forward(unsigned val) +{ + unsigned result; + asm("bsf %1, %0" : "=r" (result) : "r" (val)); + return result; +} # endif #endif // Q_PROCESSOR_X86 |