summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r--src/corelib/tools/qsimd.cpp278
1 files changed, 146 insertions, 132 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index fb6219273f..ac21e5beb9 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Copyright (C) 2012 Intel Corporation.
** Contact: http://www.qt-project.org/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -46,9 +47,12 @@
#if defined(Q_OS_WIN)
# if defined(Q_OS_WINCE)
# include <qt_windows.h>
+# include <cmnintrin.h>
# endif
# if !defined(Q_CC_GNU)
-# include <intrin.h>
+# ifndef Q_OS_WINCE
+# include <intrin.h>
+# endif
# endif
#elif defined(Q_OS_LINUX) && defined(__arm__)
#include "private/qcore_unix_p.h"
@@ -98,7 +102,7 @@ static inline uint detectProcessorFeatures()
return features;
}
-#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
+#elif defined(Q_PROCESSOR_ARM) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
static inline uint detectProcessorFeatures()
{
uint features = 0;
@@ -142,17 +146,22 @@ static inline uint detectProcessorFeatures()
return features;
}
-#elif defined(__i386__) || defined(_M_IX86)
-static inline uint detectProcessorFeatures()
-{
- uint features = 0;
+#elif defined(Q_PROCESSOR_X86)
- unsigned int extended_result = 0;
- unsigned int feature_result = 0;
- uint result = 0;
- /* see p. 118 of amd64 instruction set manual Vol3 */
+#ifdef Q_PROCESSOR_X86_32
+# define PICreg "%%ebx"
+#else
+# define PICreg "%%rbx"
+#endif
+
+static int maxBasicCpuidSupported()
+{
#if defined(Q_CC_GNU)
- long cpuid_supported, tmp1;
+ qintptr tmp1;
+
+# ifdef Q_PROCESSOR_X86_32
+ // check if the CPUID instruction is supported
+ long cpuid_supported;
asm ("pushf\n"
"pop %0\n"
"mov %0, %1\n"
@@ -164,146 +173,141 @@ static inline uint detectProcessorFeatures()
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
: "=a" (cpuid_supported), "=r" (tmp1)
);
- if (cpuid_supported) {
- asm ("xchg %%ebx, %2\n"
- "cpuid\n"
- "xchg %%ebx, %2\n"
- : "=&c" (feature_result), "=d" (result), "=&r" (tmp1)
- : "a" (1));
-
- asm ("xchg %%ebx, %1\n"
- "cpuid\n"
- "cmp $0x80000000, %%eax\n"
- "jnbe 1f\n"
- "xor %0, %0\n"
- "jmp 2f\n"
- "1:\n"
- "mov $0x80000001, %%eax\n"
- "cpuid\n"
- "2:\n"
- "xchg %%ebx, %1\n"
- : "=&d" (extended_result), "=&r" (tmp1)
- : "a" (0x80000000)
- : "%ecx"
- );
- }
+ if (!cpuid_supported)
+ return 0;
+# endif
-#elif defined (Q_OS_WIN)
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- pushfd
- pop eax
- mov ebx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ebx
- jz skip
-
- mov eax, 1
- cpuid
- mov result, edx
- mov feature_result, ecx
- skip:
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
+ int result;
+ asm ("xchg " PICreg", %1\n"
+ "cpuid\n"
+ "xchg " PICreg", %1\n"
+ : "=&a" (result), "=&r" (tmp1)
+ : "0" (0)
+ : "ecx", "edx");
+ return result;
+#elif defined(Q_OS_WIN)
+ // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
+ int info[4];
+ __cpuid(info, 0);
+ return info[0];
+#else
+ return 0;
+#endif
+}
- _asm {
- push eax
- push ebx
- push ecx
- push edx
- pushfd
- pop eax
- mov ebx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ebx
- jz skip2
-
- mov eax, 80000000h
- cpuid
- cmp eax, 80000000h
- jbe skip2
- mov eax, 80000001h
- cpuid
- mov extended_result, edx
- skip2:
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
+static void cpuidFeatures01(uint &ecx, uint &edx)
+{
+#if defined(Q_CC_GNU)
+ qintptr tmp1;
+ asm ("xchg " PICreg", %2\n"
+ "cpuid\n"
+ "xchg " PICreg", %2\n"
+ : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
+ : "a" (1));
+#elif defined(Q_OS_WIN)
+ int info[4];
+ __cpuid(info, 1);
+ ecx = info[2];
+ edx = info[3];
#endif
+}
+static void cpuidFeatures07_00(uint &ebx)
+{
+#if defined(Q_CC_GNU)
+ quintptr rbx; // in case it's 64-bit
+ asm ("xchg " PICreg", %0\n"
+ "cpuid\n"
+ "xchg " PICreg", %0\n"
+ : "=&r" (rbx)
+ : "a" (7), "c" (0)
+ : "%edx");
+ ebx = rbx;
+#elif defined(Q_OS_WIN)
+ int info[4];
+ __cpuidex(info, 7, 0);
+ ebx = info[1];
+#endif
+}
- // result now contains the standard feature bits
- if (result & (1u << 26))
- features |= SSE2;
- if (feature_result & (1u))
- features |= SSE3;
- if (feature_result & (1u << 9))
- features |= SSSE3;
- if (feature_result & (1u << 19))
- features |= SSE4_1;
- if (feature_result & (1u << 20))
- features |= SSE4_2;
- if (feature_result & (1u << 28))
- features |= AVX;
+#ifdef Q_OS_WIN
+namespace QtXgetbvHack {
+ inline quint64 _xgetbv(int) { return 0; }
+}
+using namespace QtXgetbvHack;
+#endif
- return features;
+static void xgetbv(int in, uint &eax, uint &edx)
+{
+#ifdef Q_OS_WIN
+ quint64 result = _xgetbv(in);
+ eax = result;
+ edx = result >> 32;
+#elif defined(Q_CC_GNU)
+ asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
+ : "=a" (eax), "=d" (edx)
+ : "c" (in));
+#endif
}
-#elif defined(__x86_64) || defined(Q_OS_WIN64)
static inline uint detectProcessorFeatures()
{
- uint features = SSE2;
- uint feature_result = 0;
-
-#if defined (Q_OS_WIN64)
- {
- int info[4];
- __cpuid(info, 1);
- feature_result = info[2];
- }
-#elif defined(Q_CC_GNU)
- quint64 tmp;
- asm ("xchg %%rbx, %1\n"
- "cpuid\n"
- "xchg %%rbx, %1\n"
- : "=&c" (feature_result), "=&r" (tmp)
- : "a" (1)
- : "%edx"
- );
+ uint features = 0;
+ int cpuidLevel = maxBasicCpuidSupported();
+ if (cpuidLevel < 1)
+ return 0;
+
+ uint cpuid01ECX = 0, cpuid01EDX = 0;
+ cpuidFeatures01(cpuid01ECX, cpuid01EDX);
+#if defined(Q_PROCESSOR_X86_32)
+ // x86 might not have SSE2 support
+ if (cpuid01EDX & (1u << 26))
+ features |= SSE2;
+#else
+ // x86-64 or x32
+ features = SSE2;
#endif
- if (feature_result & (1u))
+ // common part between 32- and 64-bit
+ if (cpuid01ECX & (1u))
features |= SSE3;
- if (feature_result & (1u << 9))
+ if (cpuid01ECX & (1u << 9))
features |= SSSE3;
- if (feature_result & (1u << 19))
+ if (cpuid01ECX & (1u << 19))
features |= SSE4_1;
- if (feature_result & (1u << 20))
+ if (cpuid01ECX & (1u << 20))
features |= SSE4_2;
- if (feature_result & (1u << 28))
- features |= AVX;
+ if (cpuid01ECX & (1u << 25))
+ features |= 0; // AES, enable if needed
+
+ uint xgetbvA = 0, xgetbvD = 0;
+ if (cpuid01ECX & (1u << 27)) {
+ // XGETBV enabled
+ xgetbv(0, xgetbvA, xgetbvD);
+ }
+
+ uint cpuid0700EBX = 0;
+ if (cpuidLevel >= 7)
+ cpuidFeatures07_00(cpuid0700EBX);
+
+ if ((xgetbvA & 6) == 6) {
+ // support for YMM and XMM registers is enabled
+ if (cpuid01ECX & (1u << 28))
+ features |= AVX;
+
+ if (cpuid0700EBX & (1u << 5))
+ features |= AVX2;
+ }
+
+ if (cpuid0700EBX & (1u << 4))
+ features |= HLE; // Hardware Lock Ellision
+ if (cpuid0700EBX & (1u << 11))
+ features |= RTM; // Restricted Transactional Memory
return features;
}
+
#else
static inline uint detectProcessorFeatures()
{
@@ -322,6 +326,9 @@ static inline uint detectProcessorFeatures()
sse4.1
sse4.2
avx
+ avx2
+ hle
+ rtm
*/
// begin generated
@@ -334,11 +341,14 @@ static const char features_string[] =
" sse4.1\0"
" sse4.2\0"
" avx\0"
+ " avx2\0"
+ " hle\0"
+ " rtm\0"
"\0";
static const int features_indices[] = {
0, 8, 14, 20, 26, 33, 41, 49,
- -1
+ 54, 60, 65, -1
};
// end generated
@@ -384,8 +394,12 @@ static const uint minFeature = None
#else
int ffs(int i)
{
+#ifndef Q_OS_WINCE
unsigned long result;
return _BitScanForward(&result, i) ? result : 0;
+#else
+ return 0;
+#endif
}
#endif
#endif // Q_OS_WIN