summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/corelib/tools/qsimd.cpp238
1 files changed, 113 insertions, 125 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index 75d9f77119..6a790a432d 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Copyright (C) 2012 Intel Corporation.
** Contact: http://www.qt-project.org/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -145,17 +146,27 @@ static inline uint detectProcessorFeatures()
return features;
}
-#elif defined(Q_PROCESSOR_X86_32)
-static inline uint detectProcessorFeatures()
+#elif defined(Q_PROCESSOR_X86)
+
+#ifdef Q_PROCESSOR_X86_32
+# define PICreg "%%ebx"
+#else
+# define PICreg "%%rbx"
+#endif
+
+static int maxBasicCpuidSupported()
{
- uint features = 0;
+#ifdef Q_OS_WIN
+ // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
+ int info[4];
+ __cpuid(info, 0);
+ return info[0];
+#elif defined(Q_CC_GNU)
+ long tmp1;
- unsigned int extended_result = 0;
- unsigned int feature_result = 0;
- uint result = 0;
- /* see p. 118 of amd64 instruction set manual Vol3 */
-#if defined(Q_CC_GNU)
- long cpuid_supported, tmp1;
+# ifdef Q_PROCESSOR_X86_32
+ // check if the CPUID instruction is supported
+ long cpuid_supported;
asm ("pushf\n"
"pop %0\n"
"mov %0, %1\n"
@@ -167,132 +178,100 @@ static inline uint detectProcessorFeatures()
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
: "=a" (cpuid_supported), "=r" (tmp1)
);
- if (cpuid_supported) {
- asm ("xchg %%ebx, %2\n"
- "cpuid\n"
- "xchg %%ebx, %2\n"
- : "=&c" (feature_result), "=d" (result), "=&r" (tmp1)
- : "a" (1));
-
- asm ("xchg %%ebx, %1\n"
- "cpuid\n"
- "cmp $0x80000000, %%eax\n"
- "jnbe 1f\n"
- "xor %0, %0\n"
- "jmp 2f\n"
- "1:\n"
- "mov $0x80000001, %%eax\n"
- "cpuid\n"
- "2:\n"
- "xchg %%ebx, %1\n"
- : "=&d" (extended_result), "=&r" (tmp1)
- : "a" (0x80000000)
- : "%ecx"
- );
- }
+ if (!cpuid_supported)
+ return 0;
+# endif
-#elif defined (Q_OS_WIN)
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- pushfd
- pop eax
- mov ebx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ebx
- jz skip
-
- mov eax, 1
- cpuid
- mov result, edx
- mov feature_result, ecx
- skip:
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
+ int result;
+ asm ("xchg "PICreg", %1\n"
+ "cpuid\n"
+ "xchg "PICreg", %1\n"
+ : "=&a" (result), "=&r" (tmp1)
+ : "0" (0)
+ : "ecx", "edx");
+ return result;
+#else
+ return 0;
+#endif
+}
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- pushfd
- pop eax
- mov ebx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ebx
- jz skip2
-
- mov eax, 80000000h
- cpuid
- cmp eax, 80000000h
- jbe skip2
- mov eax, 80000001h
- cpuid
- mov extended_result, edx
- skip2:
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
+static void cpuidFeatures01(uint &ecx, uint &edx)
+{
+#ifdef Q_OS_WIN
+ int info[4];
+ __cpuid(info, 1);
+ ecx = info[2];
+ edx = info[3];
+#elif defined(Q_CC_GNU)
+ long tmp1;
+ asm ("xchg "PICreg", %2\n"
+ "cpuid\n"
+ "xchg "PICreg", %2\n"
+ : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
+ : "a" (1));
#endif
+}
+static void cpuidFeatures07_00(uint &ebx)
+{
+#ifdef Q_OS_WIN
+ int info[4];
+ __cpuidex(info, 7, 0);
+ ebx = info[1];
+#elif defined(Q_CC_GNU)
+ unsigned long rbx; // in case it's 64-bit
+ asm ("xchg "PICreg", %0\n"
+ "cpuid\n"
+ "xchg "PICreg", %0\n"
+ : "=&r" (rbx)
+ : "a" (7), "c" (0)
+ : "%edx");
+ ebx = rbx;
+#endif
+}
- // result now contains the standard feature bits
- if (result & (1u << 26))
- features |= SSE2;
- if (feature_result & (1u))
- features |= SSE3;
- if (feature_result & (1u << 9))
- features |= SSSE3;
- if (feature_result & (1u << 19))
- features |= SSE4_1;
- if (feature_result & (1u << 20))
- features |= SSE4_2;
- if (feature_result & (1u << 28))
- features |= AVX;
+#ifdef Q_OS_WIN
+namespace QtXgetbvHack {
+ inline quint64 _xgetbv(int) { return 0; }
+}
+using namespace QtXgetbvHack;
+#endif
- return features;
+static void xgetbv(int in, uint &eax, uint &edx)
+{
+#ifdef Q_OS_WIN
+ quint64 result = _xgetbv(in);
+ eax = result;
+ edx = result >> 32;
+#elif defined(Q_CC_GNU)
+ asm ("xgetbv"
+ : "=a" (eax), "=d" (edx)
+ : "c" (in));
+#endif
}
-#elif defined(Q_PROCESSOR_X86_64) || defined(Q_OS_WIN64)
static inline uint detectProcessorFeatures()
{
- uint features = SSE2;
- uint feature_result = 0;
-
-#if defined (Q_OS_WIN64)
- {
- int info[4];
- __cpuid(info, 1);
- feature_result = info[2];
- }
-#elif defined(Q_CC_GNU)
- quint64 tmp;
- asm ("xchg %%rbx, %1\n"
- "cpuid\n"
- "xchg %%rbx, %1\n"
- : "=&c" (feature_result), "=&r" (tmp)
- : "a" (1)
- : "%edx"
- );
+ uint features = 0;
+ if (maxBasicCpuidSupported() < 1)
+ return 0;
+
+#if defined(Q_PROCESSOR_X86_32)
+ unsigned int feature_result = 0;
+ uint result = 0;
+ cpuidFeatures01(feature_result, result);
+
+ // result now contains the standard feature bits
+ if (result & (1u << 26))
+ features |= SSE2;
+#else
+ // x86-64 or x32
+ features = SSE2;
+ uint feature_result = 0, tmp;
+ cpuidFeatures01(feature_result, tmp);
#endif
+ // common part between 32- and 64-bit
if (feature_result & (1u))
features |= SSE3;
if (feature_result & (1u << 9))
@@ -301,8 +280,17 @@ static inline uint detectProcessorFeatures()
features |= SSE4_1;
if (feature_result & (1u << 20))
features |= SSE4_2;
- if (feature_result & (1u << 28))
- features |= AVX;
+ uint xgetbvA = 0, xgetbvD = 0;
+ if (feature_result & (1u << 27)) {
+ // XGETBV enabled
+ xgetbv(0, xgetbvA, xgetbvD);
+ }
+
+ if ((xgetbvA & 6) == 6) {
+ // support for YMM and XMM registers is enabled
+ if (feature_result & (1u << 28))
+ features |= AVX;
+ }
return features;
}