summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2011-12-25 20:36:14 -0200
committerQt by Nokia <qt-info@nokia.com>2012-04-25 14:58:36 +0200
commitdc6d1c6c3f877ea7f3fd0fde35a5916964093d5c (patch)
tree408d00effc7c63f8d10ee91677d36861845064bb /src/corelib
parent01ee0985319a0f41be12c3c8ff3813a74b43835e (diff)
Update the x86 feature-detection code, unifying x86 and x86-64
Most of it is the same for both of them, so let's avoid doing everything twice. Or more, since we may support x32 soon. For Windows, use the intrinsics. For GCC, we'd like to use cpuid.h, but it only exists since GCC 4.3, so we can't. And properly detect AVX support: it's not enough to detect that the processor supports them, we also need to check that the OS enabled support for the 256-bit registers. Change-Id: Ibb4872cdb774de5701b18c40f4e612330a266214 Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com> Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/tools/qsimd.cpp238
1 files changed, 113 insertions, 125 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index 75d9f77119..6a790a432d 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Copyright (C) 2012 Intel Corporation.
** Contact: http://www.qt-project.org/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -145,17 +146,27 @@ static inline uint detectProcessorFeatures()
return features;
}
-#elif defined(Q_PROCESSOR_X86_32)
-static inline uint detectProcessorFeatures()
+#elif defined(Q_PROCESSOR_X86)
+
+#ifdef Q_PROCESSOR_X86_32
+# define PICreg "%%ebx"
+#else
+# define PICreg "%%rbx"
+#endif
+
+static int maxBasicCpuidSupported()
{
- uint features = 0;
+#ifdef Q_OS_WIN
+ // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
+ int info[4];
+ __cpuid(info, 0);
+ return info[0];
+#elif defined(Q_CC_GNU)
+ long tmp1;
- unsigned int extended_result = 0;
- unsigned int feature_result = 0;
- uint result = 0;
- /* see p. 118 of amd64 instruction set manual Vol3 */
-#if defined(Q_CC_GNU)
- long cpuid_supported, tmp1;
+# ifdef Q_PROCESSOR_X86_32
+ // check if the CPUID instruction is supported
+ long cpuid_supported;
asm ("pushf\n"
"pop %0\n"
"mov %0, %1\n"
@@ -167,132 +178,100 @@ static inline uint detectProcessorFeatures()
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
: "=a" (cpuid_supported), "=r" (tmp1)
);
- if (cpuid_supported) {
- asm ("xchg %%ebx, %2\n"
- "cpuid\n"
- "xchg %%ebx, %2\n"
- : "=&c" (feature_result), "=d" (result), "=&r" (tmp1)
- : "a" (1));
-
- asm ("xchg %%ebx, %1\n"
- "cpuid\n"
- "cmp $0x80000000, %%eax\n"
- "jnbe 1f\n"
- "xor %0, %0\n"
- "jmp 2f\n"
- "1:\n"
- "mov $0x80000001, %%eax\n"
- "cpuid\n"
- "2:\n"
- "xchg %%ebx, %1\n"
- : "=&d" (extended_result), "=&r" (tmp1)
- : "a" (0x80000000)
- : "%ecx"
- );
- }
+ if (!cpuid_supported)
+ return 0;
+# endif
-#elif defined (Q_OS_WIN)
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- pushfd
- pop eax
- mov ebx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ebx
- jz skip
-
- mov eax, 1
- cpuid
- mov result, edx
- mov feature_result, ecx
- skip:
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
+ int result;
+ asm ("xchg "PICreg", %1\n"
+ "cpuid\n"
+ "xchg "PICreg", %1\n"
+ : "=&a" (result), "=&r" (tmp1)
+ : "0" (0)
+ : "ecx", "edx");
+ return result;
+#else
+ return 0;
+#endif
+}
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- pushfd
- pop eax
- mov ebx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ebx
- jz skip2
-
- mov eax, 80000000h
- cpuid
- cmp eax, 80000000h
- jbe skip2
- mov eax, 80000001h
- cpuid
- mov extended_result, edx
- skip2:
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
+static void cpuidFeatures01(uint &ecx, uint &edx)
+{
+#ifdef Q_OS_WIN
+ int info[4];
+ __cpuid(info, 1);
+ ecx = info[2];
+ edx = info[3];
+#elif defined(Q_CC_GNU)
+ long tmp1;
+ asm ("xchg "PICreg", %2\n"
+ "cpuid\n"
+ "xchg "PICreg", %2\n"
+ : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
+ : "a" (1));
#endif
+}
+static void cpuidFeatures07_00(uint &ebx)
+{
+#ifdef Q_OS_WIN
+ int info[4];
+ __cpuidex(info, 7, 0);
+ ebx = info[1];
+#elif defined(Q_CC_GNU)
+ unsigned long rbx; // in case it's 64-bit
+ asm ("xchg "PICreg", %0\n"
+ "cpuid\n"
+ "xchg "PICreg", %0\n"
+ : "=&r" (rbx)
+ : "a" (7), "c" (0)
+ : "%edx");
+ ebx = rbx;
+#endif
+}
- // result now contains the standard feature bits
- if (result & (1u << 26))
- features |= SSE2;
- if (feature_result & (1u))
- features |= SSE3;
- if (feature_result & (1u << 9))
- features |= SSSE3;
- if (feature_result & (1u << 19))
- features |= SSE4_1;
- if (feature_result & (1u << 20))
- features |= SSE4_2;
- if (feature_result & (1u << 28))
- features |= AVX;
+#ifdef Q_OS_WIN
+namespace QtXgetbvHack {
+ inline quint64 _xgetbv(int) { return 0; }
+}
+using namespace QtXgetbvHack;
+#endif
- return features;
+static void xgetbv(int in, uint &eax, uint &edx)
+{
+#ifdef Q_OS_WIN
+ quint64 result = _xgetbv(in);
+ eax = result;
+ edx = result >> 32;
+#elif defined(Q_CC_GNU)
+ asm ("xgetbv"
+ : "=a" (eax), "=d" (edx)
+ : "c" (in));
+#endif
}
-#elif defined(Q_PROCESSOR_X86_64) || defined(Q_OS_WIN64)
static inline uint detectProcessorFeatures()
{
- uint features = SSE2;
- uint feature_result = 0;
-
-#if defined (Q_OS_WIN64)
- {
- int info[4];
- __cpuid(info, 1);
- feature_result = info[2];
- }
-#elif defined(Q_CC_GNU)
- quint64 tmp;
- asm ("xchg %%rbx, %1\n"
- "cpuid\n"
- "xchg %%rbx, %1\n"
- : "=&c" (feature_result), "=&r" (tmp)
- : "a" (1)
- : "%edx"
- );
+ uint features = 0;
+ if (maxBasicCpuidSupported() < 1)
+ return 0;
+
+#if defined(Q_PROCESSOR_X86_32)
+ unsigned int feature_result = 0;
+ uint result = 0;
+ cpuidFeatures01(feature_result, result);
+
+ // result now contains the standard feature bits
+ if (result & (1u << 26))
+ features |= SSE2;
+#else
+ // x86-64 or x32
+ features = SSE2;
+ uint feature_result = 0, tmp;
+ cpuidFeatures01(feature_result, tmp);
#endif
+ // common part between 32- and 64-bit
if (feature_result & (1u))
features |= SSE3;
if (feature_result & (1u << 9))
@@ -301,8 +280,17 @@ static inline uint detectProcessorFeatures()
features |= SSE4_1;
if (feature_result & (1u << 20))
features |= SSE4_2;
- if (feature_result & (1u << 28))
- features |= AVX;
+ uint xgetbvA = 0, xgetbvD = 0;
+ if (feature_result & (1u << 27)) {
+ // XGETBV enabled
+ xgetbv(0, xgetbvA, xgetbvD);
+ }
+
+ if ((xgetbvA & 6) == 6) {
+ // support for YMM and XMM registers is enabled
+ if (feature_result & (1u << 28))
+ features |= AVX;
+ }
return features;
}