summaryrefslogtreecommitdiffstats
path: root/src/corelib/global/qsimd.cpp
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2020-07-13 17:09:54 -0700
committerThiago Macieira <thiago.macieira@intel.com>2020-07-22 10:46:09 -0700
commit034427a45a0f220d6a7e38bac4c70cfd60b07982 (patch)
treec94f645324040d32e839fb8f7ae9d53ca77e26b2 /src/corelib/global/qsimd.cpp
parent7284384eda22457adc1503c4f4d78f4f2772c45e (diff)
Deal with macOS not enabling AVX512 state in XCR0
All Mac Pros launched since 2019 have AVX512 support (CLX processor) but the Darwin kernel does not enable the state bits in the XCR0 register for all processes by default. Instead, it traps the #UD exception and decodes the instruction to find out if it was an AVX512 instruction. If so, it adds memory to the thread's context switch space. See [1]. Good solution, but forces us to have OS-specific code to determine if the OS really supports AVX512. No other OS appears to require this. For future features (namely, Intel Advanced Matrix Extensions), there's an official way of implementing this, via the eXtended Feature Disable register. Qt has no AVX512 code yet, so this is just being future-proof. Apple has yet to announce when or if their Mac Pros will switch from Intel Xeon to ARM. [1] https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/fpu.c#L174 Change-Id: Ieffc3453b88c4517a1dbfffd162175ead343e622 Reviewed-by: Tor Arne Vestbø <tor.arne.vestbo@qt.io>
Diffstat (limited to 'src/corelib/global/qsimd.cpp')
-rw-r--r--src/corelib/global/qsimd.cpp68
1 files changed, 50 insertions, 18 deletions
diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp
index 6ad6795fec..d91c1542c2 100644
--- a/src/corelib/global/qsimd.cpp
+++ b/src/corelib/global/qsimd.cpp
@@ -323,23 +323,49 @@ static void xgetbv(uint in, uint &eax, uint &edx)
#endif
}
+// Flags from the XCR0 state register
+enum XCR0Flags {
+ X87 = 1 << 0,
+ XMM0_15 = 1 << 1,
+ YMM0_15Hi128 = 1 << 2,
+ BNDRegs = 1 << 3,
+ BNDCSR = 1 << 4,
+ OpMask = 1 << 5,
+ ZMM0_15Hi256 = 1 << 6,
+ ZMM16_31 = 1 << 7,
+
+ SSEState = XMM0_15,
+ AVXState = XMM0_15 | YMM0_15Hi128,
+ AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
+};
+
+static quint64 adjustedXcr0(quint64 xcr0)
+{
+ /*
+ * Some OSes hide their capability of context-switching the AVX512 state in
+ * the XCR0 register. They do that so the first time we execute an
+ * instruction that may access the AVX512 state (requiring the EVEX prefix)
+ * they allocate the necessary context switch space.
+ *
+ * This behavior is deprecated with the XFD (Extended Feature Disable)
+ * register, but we can't change existing OSes.
+ */
+#ifdef Q_OS_DARWIN
+ // from <machine/cpu_capabilities.h> in xnu
+ // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h>
+ constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000);
+ constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000;
+ constexpr quintptr cpu_capabilities64 = commpage + 0x10;
+ quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
+ if (capab & kHasAVX512F)
+ xcr0 |= AVX512State;
+#endif
+
+ return xcr0;
+}
+
static quint64 detectProcessorFeatures()
{
- // Flags from the CR0 / XCR0 state register
- enum XCR0Flags {
- X87 = 1 << 0,
- XMM0_15 = 1 << 1,
- YMM0_15Hi128 = 1 << 2,
- BNDRegs = 1 << 3,
- BNDCSR = 1 << 4,
- OpMask = 1 << 5,
- ZMM0_15Hi256 = 1 << 6,
- ZMM16_31 = 1 << 7,
-
- SSEState = XMM0_15,
- AVXState = XMM0_15 | YMM0_15Hi128,
- AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
- };
static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
@@ -367,16 +393,22 @@ static quint64 detectProcessorFeatures()
}
// now check the AVX state
- uint xgetbvA = 0, xgetbvD = 0;
+ quint64 xcr0 = 0;
if (results[Leaf1ECX] & (1u << 27)) {
// XGETBV enabled
+ uint xgetbvA = 0, xgetbvD = 0;
xgetbv(0, xgetbvA, xgetbvD);
+
+ xcr0 = xgetbvA;
+ if (sizeof(XCR0Flags) > sizeof(xgetbvA))
+ xcr0 |= quint64(xgetbvD) << 32;
+ xcr0 = adjustedXcr0(xcr0);
}
- if ((xgetbvA & AVXState) != AVXState) {
+ if ((xcr0 & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
- } else if ((xgetbvA & AVX512State) != AVX512State) {
+ } else if ((xcr0 & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
}