From e70324f8dd1f191556599cf60100dd0ad0b16708 Mon Sep 17 00:00:00 2001 From: Erik Verbruggen Date: Thu, 3 Dec 2015 12:50:44 +0100 Subject: Remove _bit_scan_{forward,reverse} Use qCountTrailingZeroBits and qCountLeadingZeroBits from qalgorithms.h instead. Also extended these versions for MSVC. The _bit_scan_* versions stem from a time before the glorious days of qalgorithms.h. A big advantage is that these functions can be used on all platforms. Change-Id: I5a1b886371520310a7fe16e617635ea335046beb Reviewed-by: Simon Hausmann --- src/corelib/tools/qalgorithms.h | 139 +++++++++++++++++++++++++++++++++------- src/corelib/tools/qsimd_p.h | 53 --------------- src/corelib/tools/qstring.cpp | 8 +-- 3 files changed, 119 insertions(+), 81 deletions(-) (limited to 'src/corelib/tools') diff --git a/src/corelib/tools/qalgorithms.h b/src/corelib/tools/qalgorithms.h index 854276d150..568b9cc95c 100644 --- a/src/corelib/tools/qalgorithms.h +++ b/src/corelib/tools/qalgorithms.h @@ -516,6 +516,105 @@ QT_DEPRECATED_X("Use std::binary_search") Q_OUTOFLINE_TEMPLATE RandomAccessItera #endif // QT_DEPRECATED_SINCE(5, 2) +// Clang had a bug where __builtin_ctz/clz is not marked as constexpr. +#if defined Q_CC_CLANG && defined __apple_build_version__ && __clang_major__ < 7 +# undef QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ +#else +# define QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ +#endif + +#if defined QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ +#if defined(Q_CC_GNU) +# define QT_HAS_BUILTIN_CTZS +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctzs(quint16 v) Q_DECL_NOTHROW +{ +# if QT_HAS_BUILTIN(__builtin_ctzs) || defined(__BMI__) + return __builtin_ctzs(v); +# else + return __builtin_ctz(v); +# endif +} +#define QT_HAS_BUILTIN_CLZS +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clzs(quint16 v) Q_DECL_NOTHROW +{ +# if QT_HAS_BUILTIN(__builtin_clzs) || defined(__BMI__) + return __builtin_clzs(v); +# else + return __builtin_clz(v) - 16U; +# endif +} +#define QT_HAS_BUILTIN_CTZ +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctz(quint32 v) Q_DECL_NOTHROW +{ + return __builtin_ctz(v); +} +#define QT_HAS_BUILTIN_CLZ +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clz(quint32 v) Q_DECL_NOTHROW +{ + return __builtin_clz(v); +} +#define QT_HAS_BUILTIN_CTZLL +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctzll(quint64 v) Q_DECL_NOTHROW +{ + return __builtin_ctzll(v); +} +#define QT_HAS_BUILTIN_CLZLL +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clzll(quint64 v) Q_DECL_NOTHROW +{ + return __builtin_clzll(v); +} +#elif defined(Q_CC_MSVC) && !defined(Q_OS_WINCE) +#define QT_HAS_BUILTIN_CTZ +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_ctz(quint32 val) +{ + unsigned long result; + _BitScanForward(&result, val); + return result; +} +#define QT_HAS_BUILTIN_CLZ +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_clz(quint32 val) +{ + unsigned long result; + _BitScanReverse(&result, val); + // Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31 + // and the lsb index is 0. The result for the index when counting up: msb index is 0 (because it + // starts there), and the lsb index is 31. + result ^= sizeof(quint32) * 8 - 1; + return result; +} +#if Q_PROCESSOR_WORDSIZE == 8 +// These are only defined for 64bit builds. +#define QT_HAS_BUILTIN_CTZLL +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_ctzll(quint64 val) +{ + unsigned long result; + _BitScanForward64(&result, val); + return result; +} +// MSVC calls it _BitScanReverse and returns the carry flag, which we don't need +#define QT_HAS_BUILTIN_CLZLL +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_clzll(quint64 val) +{ + unsigned long result; + _BitScanReverse64(&result, val); + // see qt_builtin_clz + result ^= sizeof(quint64) * 8 - 1; + return result; +} +#endif +# define QT_HAS_BUILTIN_CTZS +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctzs(quint16 v) Q_DECL_NOTHROW +{ + return qt_builtin_ctz(v); +} +#define QT_HAS_BUILTIN_CLZS +Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clzs(quint16 v) Q_DECL_NOTHROW +{ + return qt_builtin_clz(v) - 16U; +} +#endif +#endif // QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ + } //namespace QAlgorithmsPrivate @@ -586,8 +685,8 @@ Q_DECL_CONST_FUNCTION Q_DECL_CONSTEXPR inline uint qPopulationCount(long unsigne Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint32 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) - return v ? __builtin_ctz(v) : 32U; +#if defined(QT_HAS_BUILTIN_CTZ) + return v ? QAlgorithmsPrivate::qt_builtin_ctz(v) : 32U; #else // see http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel unsigned int c = 32; // c will be the number of zero bits on the right @@ -604,8 +703,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint32 v) Q_DECL_NO Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint8 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) - return v ? __builtin_ctz(v) : 8U; +#if defined(QT_HAS_BUILTIN_CTZ) + return v ? QAlgorithmsPrivate::qt_builtin_ctz(v) : 8U; #else unsigned int c = 8; // c will be the number of zero bits on the right v &= -signed(v); @@ -619,12 +718,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint8 v) Q_DECL_NOT Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint16 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) -# if QT_HAS_BUILTIN(__builtin_ctzs) || defined(__BMI__) - return v ? __builtin_ctzs(v) : 16U; -# else - return v ? __builtin_ctz(v) : 16U; -# endif +#if defined(QT_HAS_BUILTIN_CTZS) + return v ? QAlgorithmsPrivate::qt_builtin_ctzs(v) : 16U; #else unsigned int c = 16; // c will be the number of zero bits on the right v &= -signed(v); @@ -639,8 +734,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint16 v) Q_DECL_NO Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint64 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) - return v ? __builtin_ctzll(v) : 64; +#if defined(QT_HAS_BUILTIN_CTZLL) + return v ? QAlgorithmsPrivate::qt_builtin_ctzll(v) : 64; #else quint32 x = static_cast(v); return x ? qCountTrailingZeroBits(x) @@ -655,8 +750,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(unsigned long v) Q_D Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint32 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) - return v ? __builtin_clz(v) : 32U; +#if defined(QT_HAS_BUILTIN_CLZ) + return v ? QAlgorithmsPrivate::qt_builtin_clz(v) : 32U; #else // Hacker's Delight, 2nd ed. Fig 5-16, p. 102 v = v | (v >> 1); @@ -670,8 +765,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint32 v) Q_DECL_NOT Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint8 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) - return v ? __builtin_clz(v)-24U : 8U; +#if defined(QT_HAS_BUILTIN_CLZ) + return v ? QAlgorithmsPrivate::qt_builtin_clz(v)-24U : 8U; #else v = v | (v >> 1); v = v | (v >> 2); @@ -682,12 +777,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint8 v) Q_DECL_NOTH Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint16 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) -# if QT_HAS_BUILTIN(__builtin_clzs) || defined(__BMI__) - return v ? __builtin_clzs(v) : 16U; -# else - return v ? __builtin_clz(v)-16U : 16U; -# endif +#if defined(QT_HAS_BUILTIN_CLZS) + return v ? QAlgorithmsPrivate::qt_builtin_clzs(v) : 16U; #else v = v | (v >> 1); v = v | (v >> 2); @@ -699,8 +790,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint16 v) Q_DECL_NOT Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint64 v) Q_DECL_NOTHROW { -#if defined(Q_CC_GNU) - return v ? __builtin_clzll(v) : 64U; +#if defined(QT_HAS_BUILTIN_CLZLL) + return v ? QAlgorithmsPrivate::qt_builtin_clzll(v) : 64U; #else v = v | (v >> 1); v = v | (v >> 2); diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 48ef686bbd..8cf0c5a4d2 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -465,59 +465,6 @@ static inline quint64 qCpuFeatures() #define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (Q_UINT64_C(1) << CpuFeature ## feature)) \ || (qCpuFeatures() & (Q_UINT64_C(1) << CpuFeature ## feature))) -#if QT_HAS_BUILTIN(__builtin_clz) && QT_HAS_BUILTIN(__builtin_ctz) && defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) -static Q_ALWAYS_INLINE unsigned _bit_scan_reverse(unsigned val) -{ - Q_ASSERT(val != 0); // if val==0, the result is undefined. - unsigned result = static_cast(__builtin_clz(val)); // Count Leading Zeros - // Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31 - // and the lsb inde is 0. The result for _bit_scan_reverse is expected to be the index when - // counting up: msb index is 0 (because it starts there), and the lsb index is 31. - result ^= sizeof(unsigned) * 8 - 1; - return result; -} -static Q_ALWAYS_INLINE unsigned _bit_scan_forward(unsigned val) -{ - Q_ASSERT(val != 0); // if val==0, the result is undefined. - return static_cast(__builtin_ctz(val)); // Count Trailing Zeros -} -#elif defined(Q_PROCESSOR_X86) -// Bit scan functions for x86 -# if defined(Q_CC_MSVC) -// MSVC calls it _BitScanReverse and returns the carry flag, which we don't need -static __forceinline unsigned long _bit_scan_reverse(uint val) -{ - unsigned long result; - _BitScanReverse(&result, val); - return result; -} -static __forceinline unsigned long _bit_scan_forward(uint val) -{ - unsigned long result; - _BitScanForward(&result, val); - return result; -} -# elif (defined(Q_CC_CLANG) || (defined(Q_CC_GNU) && Q_CC_GNU < 405)) \ - && !defined(Q_CC_INTEL) -// Clang is missing the intrinsic for _bit_scan_reverse -// GCC only added it in version 4.5 -static inline __attribute__((always_inline)) -unsigned _bit_scan_reverse(unsigned val) -{ - unsigned result; - asm("bsr %1, %0" : "=r" (result) : "r" (val)); - return result; -} -static inline __attribute__((always_inline)) -unsigned _bit_scan_forward(unsigned val) -{ - unsigned result; - asm("bsf %1, %0" : "=r" (result) : "r" (val)); - return result; -} -# endif -#endif // Q_PROCESSOR_X86 - #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ for (; i < static_cast(qMin(static_cast(length), ((4 - ((reinterpret_cast(ptr) >> 2) & 0x3)) & 0x3))); ++i) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 21f3e34c6f..7a13f2087b 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -468,7 +468,7 @@ static int ucstrncmp(const QChar *a, const QChar *b, int l) uint mask = ~_mm_movemask_epi8(result); if (ushort(mask)) { // found a different byte - uint idx = uint(_bit_scan_forward(mask)); + uint idx = qCountTrailingZeroBits(mask); return reinterpret_cast(ptr + idx)->unicode() - reinterpret_cast(ptr + distance + idx)->unicode(); } @@ -571,7 +571,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) # endif if (mask) { // found a different character - uint idx = uint(_bit_scan_forward(mask)); + uint idx = qCountTrailingZeroBits(mask); return uc[offset + idx / 2] - c[offset + idx / 2]; } } @@ -589,7 +589,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) uint mask = ~_mm_movemask_epi8(result); if (ushort(mask)) { // found a different character - uint idx = uint(_bit_scan_forward(mask)); + uint idx = qCountTrailingZeroBits(mask); return uc[offset + idx / 2] - c[offset + idx / 2]; } @@ -683,7 +683,7 @@ static int findChar(const QChar *str, int len, QChar ch, int from, // found a match // same as: return n - s + _bit_scan_forward(mask) / 2 return (reinterpret_cast(n) - reinterpret_cast(s) - + _bit_scan_forward(mask)) >> 1; + + qCountTrailingZeroBits(mask)) >> 1; } } -- cgit v1.2.3