summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2023-08-23 09:51:10 -0700
committerThiago Macieira <thiago.macieira@intel.com>2023-09-12 19:05:47 -0700
commit39d3af0c29147ff319e89c0be602d99af0af0996 (patch)
treead216c2fb54cacea840232e36a80692fef1f4d80
parent6da6a17de9ccfcd5458ea72507b131660e0ab948 (diff)
qsimd_p.h: rework the check for x86-64-v3 features
Instead of checking that all the features be present if any of them are -- which is not an acceptable proposition, because each of them were productized in at least one processor before -- let's simply insist that they all be present if AVX2 is. That's what we can guarantee: all AVX2- capable processors are capable of: - AVX and earlier SSE (architecturally implied, so not checked) - BMI1 - BMI2 - F16C - FMA - LZCNT - POPCNT This restores the original set of features that were checked in commit ad65bbe4c061c4c1521b928a18ef9d68b7c69cbb when this was introduced, but only if AVX2 is set. It also POPCNT, which was introduced with the Nehalem architecture (which matches x86-64-v2) but aren't implied by AVX. GCC's -march=x86-64-v3 implies CRC32, but -march=haswell does not because there were SKUs lacking CRC32, AES and PCLMULQDQ. This is probably a bug in GCC. Fixes: QTBUG-116357 Task-number: QTBUG-111698 Task-number: QTBUG-107072 Pick-to: 6.5 6.6 Change-Id: Ifa1111900d6945ea8e05fffd177e113eaa506dde Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/global/qsimd_p.h28
1 files changed, 15 insertions, 13 deletions
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h
index 55adcef659..2136b390d6 100644
--- a/src/corelib/global/qsimd_p.h
+++ b/src/corelib/global/qsimd_p.h
@@ -218,29 +218,31 @@ asm(
// x86-64 sub-architecture version 3
//
// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
-// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
-// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
-// same features, but had already introduced BMI1 in the previous generation.
-// This feature set was chosen as the version 3 of the x86-64 ISA (x86-64-v3)
-// and is supported by GCC and Clang.
-//
-// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
-// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
-# define ARCH_HASWELL_MACROS (__AVX2__ + __FMA__)
-# if ARCH_HASWELL_MACROS != 0
-# if ARCH_HASWELL_MACROS != 2
+// BMI1, BMI2, FMA, LZCNT, MOVBE. This feature set was chosen as the version 3
+// of the x86-64 ISA (x86-64-v3) and is supported by GCC and Clang. On systems
+// with the GNU libc, libraries with this feature can be installed on a
+// "glibc-hwcaps/x86-64-v3" subdir. macOS's fat binaries support the "x86_64h"
+// sub-architecture too.
+
+# if defined(__AVX2__)
+// List of features present with -march=x86-64-v3 and not architecturally
+// implied by __AVX2__
+# define ARCH_HASWELL_MACROS \
+ (__AVX2__ && __BMI__ && __BMI2__ && __F16C__ && __FMA__ && __LZCNT__ && __POPCNT__)
+# if ARCH_HASWELL_MACROS == 0
# error "Please enable all x86-64-v3 extensions; you probably want to use -march=haswell or -march=x86-64-v3 instead of -mavx2"
# endif
static_assert(ARCH_HASWELL_MACROS, "Undeclared identifiers indicate which features are missing.");
# define __haswell__ 1
+# undef ARCH_HASWELL_MACROS
# endif
-# undef ARCH_HASWELL_MACROS
// x86-64 sub-architecture version 4
//
// Similar to the above, x86-64-v4 matches the AVX512 variant of the Intel Core
// 6th generation (codename "Skylake"). AMD Zen4 is the their first processor
-// with AVX512 support and it includes all of these too.
+// with AVX512 support and it includes all of these too. The GNU libc subdir for
+// this is "glibc-hwcaps/x86-64-v4".
//
# define ARCH_SKX_MACROS (__AVX512F__ + __AVX512BW__ + __AVX512CD__ + __AVX512DQ__ + __AVX512VL__)
# if ARCH_SKX_MACROS != 0