summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2023-08-23 09:51:10 -0700
committerQt Cherry-pick Bot <cherrypick_bot@qt-project.org>2023-09-14 15:52:19 +0000
commit0290ee0d6f59caa6d7a6eccebc9f66edd5722e62 (patch)
treed20f90e1bc2f5066f933318bea55e96640d8452e
parent2f04d66ce0996bf3e8b15a736eeb7f38217bcdb9 (diff)
qsimd_p.h: rework the check for x86-64-v3 features
Instead of checking that all the features be present if any of them are -- which is not an acceptable proposition, because each of them were productized in at least one processor before -- let's simply insist that they all be present if AVX2 is. That's what we can guarantee: all AVX2- capable processors are capable of: - AVX and earlier SSE (architecturally implied, so not checked) - BMI1 - BMI2 - F16C - FMA - LZCNT - POPCNT This restores the original set of features that were checked in commit ad65bbe4c061c4c1521b928a18ef9d68b7c69cbb when this was introduced, but only if AVX2 is set. It also POPCNT, which was introduced with the Nehalem architecture (which matches x86-64-v2) but aren't implied by AVX. GCC's -march=x86-64-v3 implies CRC32, but -march=haswell does not because there were SKUs lacking CRC32, AES and PCLMULQDQ. This is probably a bug in GCC. Fixes: QTBUG-116357 Task-number: QTBUG-111698 Task-number: QTBUG-107072 Change-Id: Ifa1111900d6945ea8e05fffd177e113eaa506dde Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> (cherry picked from commit 39d3af0c29147ff319e89c0be602d99af0af0996) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org> (cherry picked from commit cd8667cb17f34e6ba3b0d0b94d7cd05be839f48e)
-rw-r--r--src/corelib/global/qsimd_p.h28
1 files changed, 15 insertions, 13 deletions
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h
index 5d53ec7dd4..9f1833c93e 100644
--- a/src/corelib/global/qsimd_p.h
+++ b/src/corelib/global/qsimd_p.h
@@ -218,29 +218,31 @@ asm(
// x86-64 sub-architecture version 3
//
// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
-// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
-// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
-// same features, but had already introduced BMI1 in the previous generation.
-// This feature set was chosen as the version 3 of the x86-64 ISA (x86-64-v3)
-// and is supported by GCC and Clang.
-//
-// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
-// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
-# define ARCH_HASWELL_MACROS (__AVX2__ + __FMA__)
-# if ARCH_HASWELL_MACROS != 0
-# if ARCH_HASWELL_MACROS != 2
+// BMI1, BMI2, FMA, LZCNT, MOVBE. This feature set was chosen as the version 3
+// of the x86-64 ISA (x86-64-v3) and is supported by GCC and Clang. On systems
+// with the GNU libc, libraries with this feature can be installed on a
+// "glibc-hwcaps/x86-64-v3" subdir. macOS's fat binaries support the "x86_64h"
+// sub-architecture too.
+
+# if defined(__AVX2__)
+// List of features present with -march=x86-64-v3 and not architecturally
+// implied by __AVX2__
+# define ARCH_HASWELL_MACROS \
+ (__AVX2__ && __BMI__ && __BMI2__ && __F16C__ && __FMA__ && __LZCNT__ && __POPCNT__)
+# if ARCH_HASWELL_MACROS == 0
# error "Please enable all x86-64-v3 extensions; you probably want to use -march=haswell or -march=x86-64-v3 instead of -mavx2"
# endif
static_assert(ARCH_HASWELL_MACROS, "Undeclared identifiers indicate which features are missing.");
# define __haswell__ 1
+# undef ARCH_HASWELL_MACROS
# endif
-# undef ARCH_HASWELL_MACROS
// x86-64 sub-architecture version 4
//
// Similar to the above, x86-64-v4 matches the AVX512 variant of the Intel Core
// 6th generation (codename "Skylake"). AMD Zen4 is the their first processor
-// with AVX512 support and it includes all of these too.
+// with AVX512 support and it includes all of these too. The GNU libc subdir for
+// this is "glibc-hwcaps/x86-64-v4".
//
# define ARCH_SKX_MACROS (__AVX512F__ + __AVX512BW__ + __AVX512CD__ + __AVX512DQ__ + __AVX512VL__)
# if ARCH_SKX_MACROS != 0