diff options
Diffstat (limited to 'src/corelib/global/qsimd_p.h')
-rw-r--r-- | src/corelib/global/qsimd_p.h | 79 |
1 files changed, 22 insertions, 57 deletions
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 1d12902a0a..012eb6cf4f 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -218,29 +218,31 @@ asm( // x86-64 sub-architecture version 3 // // The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2, -// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a -// sub-target for us. The first AMD processor with AVX2 support (Zen) has the -// same features, but had already introduced BMI1 in the previous generation. -// This feature set was chosen as the version 3 of the x86-64 ISA (x86-64-v3) -// and is supported by GCC and Clang. -// -// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc -// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell). -# define ARCH_HASWELL_MACROS (__AVX2__ + __BMI2__ + __FMA__ + __LZCNT__) -# if ARCH_HASWELL_MACROS != 0 -# if ARCH_HASWELL_MACROS != 4 +// BMI1, BMI2, FMA, LZCNT, MOVBE. This feature set was chosen as the version 3 +// of the x86-64 ISA (x86-64-v3) and is supported by GCC and Clang. On systems +// with the GNU libc, libraries with this feature can be installed on a +// "glibc-hwcaps/x86-64-v3" subdir. macOS's fat binaries support the "x86_64h" +// sub-architecture too. + +# if defined(__AVX2__) +// List of features present with -march=x86-64-v3 and not architecturally +// implied by __AVX2__ +# define ARCH_HASWELL_MACROS \ + (__AVX2__ + __BMI__ + __BMI2__ + __F16C__ + __FMA__ + __LZCNT__ + __POPCNT__) +# if ARCH_HASWELL_MACROS != 7 # error "Please enable all x86-64-v3 extensions; you probably want to use -march=haswell or -march=x86-64-v3 instead of -mavx2" # endif static_assert(ARCH_HASWELL_MACROS, "Undeclared identifiers indicate which features are missing."); # define __haswell__ 1 +# undef ARCH_HASWELL_MACROS # endif -# undef ARCH_HASWELL_MACROS // x86-64 sub-architecture version 4 // // Similar to the above, x86-64-v4 matches the AVX512 variant of the Intel Core // 6th generation (codename "Skylake"). AMD Zen4 is the their first processor -// with AVX512 support and it includes all of these too. +// with AVX512 support and it includes all of these too. The GNU libc subdir for +// this is "glibc-hwcaps/x86-64-v4". // # define ARCH_SKX_MACROS (__AVX512F__ + __AVX512BW__ + __AVX512CD__ + __AVX512DQ__ + __AVX512VL__) # if ARCH_SKX_MACROS != 0 @@ -323,12 +325,19 @@ static const uint64_t qCompilerCpuFeatures = 0 #if defined __ARM_NEON__ | CpuFeatureNEON #endif +#if !(defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64)) + // Yocto Project recipes enable Crypto extension for all ARMv8 configs, + // even for targets without the Crypto extension. That's wrong, but as + // the compiler never generates the code for them on their own, most + // code never notices the problem. But we would. By not setting the + // bits here, we force a runtime detection. #if defined __ARM_FEATURE_CRC32 | CpuFeatureCRC32 #endif #if defined __ARM_FEATURE_CRYPTO | CpuFeatureAES #endif +#endif // Q_OS_LINUX && Q_PROCESSOR_ARM64 #if defined __mips_dsp | CpuFeatureDSP #endif @@ -378,50 +387,6 @@ static inline uint64_t qCpuFeatures() #define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \ || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature)) -/* - Small wrapper around x86's PAUSE and ARM's YIELD instructions. - - This is completely different from QThread::yieldCurrentThread(), which is - an OS-level operation that takes the whole thread off the CPU. - - This is just preventing one SMT thread from filling a core's pipeline with - speculated further loop iterations (which need to be expensively flushed on - final success) when it could just give those pipeline slots to a second SMT - thread that can do something useful with the core, such as unblocking this - SMT thread :) - - So, instead of - - while (!condition) - ; - - it's better to use - - while (!condition) - qYieldCpu(); -*/ -static inline void qYieldCpu() -{ -#if defined(Q_PROCESSOR_X86) - _mm_pause(); -#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7 /* yield was added in ARMv7 */ -# if __has_builtin(__builtin_arm_yield) /* e.g. Clang */ - __builtin_arm_yield(); -# elif defined(Q_OS_INTEGRITY) || \ - (defined(Q_CC_GNU) && !defined(Q_CC_CLANG)) - /* - - Integrity is missing the arm_acle.h header - - GCC doesn't have __yield() in arm_acle.h - https://stackoverflow.com/a/70076751/134841 - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105416 - */ - asm volatile("yield"); /* this works everywhere */ -# else - __yield(); /* this is what should work everywhere */ -# endif -#endif -} - #ifdef __cplusplus } // extern "C" |