diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2023-08-23 09:51:10 -0700 |
---|---|---|
committer | Qt Cherry-pick Bot <cherrypick_bot@qt-project.org> | 2023-09-14 15:52:19 +0000 |
commit | 0290ee0d6f59caa6d7a6eccebc9f66edd5722e62 (patch) | |
tree | d20f90e1bc2f5066f933318bea55e96640d8452e | |
parent | 2f04d66ce0996bf3e8b15a736eeb7f38217bcdb9 (diff) |
qsimd_p.h: rework the check for x86-64-v3 features
Instead of checking that all the features be present if any of them are
-- which is not an acceptable proposition, because each of them were
productized in at least one processor before -- let's simply insist that
they all be present if AVX2 is. That's what we can guarantee: all AVX2-
capable processors are capable of:
- AVX and earlier SSE (architecturally implied, so not checked)
- BMI1
- BMI2
- F16C
- FMA
- LZCNT
- POPCNT
This restores the original set of features that were checked in commit
ad65bbe4c061c4c1521b928a18ef9d68b7c69cbb when this was introduced, but
only if AVX2 is set. It also POPCNT, which was introduced with the
Nehalem architecture (which matches x86-64-v2) but aren't implied by
AVX.
GCC's -march=x86-64-v3 implies CRC32, but -march=haswell does not
because there were SKUs lacking CRC32, AES and PCLMULQDQ. This is
probably a bug in GCC.
Fixes: QTBUG-116357
Task-number: QTBUG-111698
Task-number: QTBUG-107072
Change-Id: Ifa1111900d6945ea8e05fffd177e113eaa506dde
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
(cherry picked from commit 39d3af0c29147ff319e89c0be602d99af0af0996)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
(cherry picked from commit cd8667cb17f34e6ba3b0d0b94d7cd05be839f48e)
-rw-r--r-- | src/corelib/global/qsimd_p.h | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 5d53ec7dd4..9f1833c93e 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -218,29 +218,31 @@ asm( // x86-64 sub-architecture version 3 // // The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2, -// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a -// sub-target for us. The first AMD processor with AVX2 support (Zen) has the -// same features, but had already introduced BMI1 in the previous generation. -// This feature set was chosen as the version 3 of the x86-64 ISA (x86-64-v3) -// and is supported by GCC and Clang. -// -// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc -// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell). -# define ARCH_HASWELL_MACROS (__AVX2__ + __FMA__) -# if ARCH_HASWELL_MACROS != 0 -# if ARCH_HASWELL_MACROS != 2 +// BMI1, BMI2, FMA, LZCNT, MOVBE. This feature set was chosen as the version 3 +// of the x86-64 ISA (x86-64-v3) and is supported by GCC and Clang. On systems +// with the GNU libc, libraries with this feature can be installed on a +// "glibc-hwcaps/x86-64-v3" subdir. macOS's fat binaries support the "x86_64h" +// sub-architecture too. + +# if defined(__AVX2__) +// List of features present with -march=x86-64-v3 and not architecturally +// implied by __AVX2__ +# define ARCH_HASWELL_MACROS \ + (__AVX2__ && __BMI__ && __BMI2__ && __F16C__ && __FMA__ && __LZCNT__ && __POPCNT__) +# if ARCH_HASWELL_MACROS == 0 # error "Please enable all x86-64-v3 extensions; you probably want to use -march=haswell or -march=x86-64-v3 instead of -mavx2" # endif static_assert(ARCH_HASWELL_MACROS, "Undeclared identifiers indicate which features are missing."); # define __haswell__ 1 +# undef ARCH_HASWELL_MACROS # endif -# undef ARCH_HASWELL_MACROS // x86-64 sub-architecture version 4 // // Similar to the above, x86-64-v4 matches the AVX512 variant of the Intel Core // 6th generation (codename "Skylake"). AMD Zen4 is the their first processor -// with AVX512 support and it includes all of these too. +// with AVX512 support and it includes all of these too. The GNU libc subdir for +// this is "glibc-hwcaps/x86-64-v4". // # define ARCH_SKX_MACROS (__AVX512F__ + __AVX512BW__ + __AVX512CD__ + __AVX512DQ__ + __AVX512VL__) # if ARCH_SKX_MACROS != 0 |