diff options
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 278 |
1 files changed, 146 insertions, 132 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index fb6219273f..ac21e5beb9 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies). +** Copyright (C) 2012 Intel Corporation. ** Contact: http://www.qt-project.org/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -46,9 +47,12 @@ #if defined(Q_OS_WIN) # if defined(Q_OS_WINCE) # include <qt_windows.h> +# include <cmnintrin.h> # endif # if !defined(Q_CC_GNU) -# include <intrin.h> +# ifndef Q_OS_WINCE +# include <intrin.h> +# endif # endif #elif defined(Q_OS_LINUX) && defined(__arm__) #include "private/qcore_unix_p.h" @@ -98,7 +102,7 @@ static inline uint detectProcessorFeatures() return features; } -#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON) +#elif defined(Q_PROCESSOR_ARM) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON) static inline uint detectProcessorFeatures() { uint features = 0; @@ -142,17 +146,22 @@ static inline uint detectProcessorFeatures() return features; } -#elif defined(__i386__) || defined(_M_IX86) -static inline uint detectProcessorFeatures() -{ - uint features = 0; +#elif defined(Q_PROCESSOR_X86) - unsigned int extended_result = 0; - unsigned int feature_result = 0; - uint result = 0; - /* see p. 118 of amd64 instruction set manual Vol3 */ +#ifdef Q_PROCESSOR_X86_32 +# define PICreg "%%ebx" +#else +# define PICreg "%%rbx" +#endif + +static int maxBasicCpuidSupported() +{ #if defined(Q_CC_GNU) - long cpuid_supported, tmp1; + qintptr tmp1; + +# ifdef Q_PROCESSOR_X86_32 + // check if the CPUID instruction is supported + long cpuid_supported; asm ("pushf\n" "pop %0\n" "mov %0, %1\n" @@ -164,146 +173,141 @@ static inline uint detectProcessorFeatures() "xor %1, %0\n" // %eax is now 0 if CPUID is not supported : "=a" (cpuid_supported), "=r" (tmp1) ); - if (cpuid_supported) { - asm ("xchg %%ebx, %2\n" - "cpuid\n" - "xchg %%ebx, %2\n" - : "=&c" (feature_result), "=d" (result), "=&r" (tmp1) - : "a" (1)); - - asm ("xchg %%ebx, %1\n" - "cpuid\n" - "cmp $0x80000000, %%eax\n" - "jnbe 1f\n" - "xor %0, %0\n" - "jmp 2f\n" - "1:\n" - "mov $0x80000001, %%eax\n" - "cpuid\n" - "2:\n" - "xchg %%ebx, %1\n" - : "=&d" (extended_result), "=&r" (tmp1) - : "a" (0x80000000) - : "%ecx" - ); - } + if (!cpuid_supported) + return 0; +# endif -#elif defined (Q_OS_WIN) - _asm { - push eax - push ebx - push ecx - push edx - pushfd - pop eax - mov ebx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ebx - jz skip - - mov eax, 1 - cpuid - mov result, edx - mov feature_result, ecx - skip: - pop edx - pop ecx - pop ebx - pop eax - } + int result; + asm ("xchg " PICreg", %1\n" + "cpuid\n" + "xchg " PICreg", %1\n" + : "=&a" (result), "=&r" (tmp1) + : "0" (0) + : "ecx", "edx"); + return result; +#elif defined(Q_OS_WIN) + // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 + int info[4]; + __cpuid(info, 0); + return info[0]; +#else + return 0; +#endif +} - _asm { - push eax - push ebx - push ecx - push edx - pushfd - pop eax - mov ebx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ebx - jz skip2 - - mov eax, 80000000h - cpuid - cmp eax, 80000000h - jbe skip2 - mov eax, 80000001h - cpuid - mov extended_result, edx - skip2: - pop edx - pop ecx - pop ebx - pop eax - } +static void cpuidFeatures01(uint &ecx, uint &edx) +{ +#if defined(Q_CC_GNU) + qintptr tmp1; + asm ("xchg " PICreg", %2\n" + "cpuid\n" + "xchg " PICreg", %2\n" + : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) + : "a" (1)); +#elif defined(Q_OS_WIN) + int info[4]; + __cpuid(info, 1); + ecx = info[2]; + edx = info[3]; #endif +} +static void cpuidFeatures07_00(uint &ebx) +{ +#if defined(Q_CC_GNU) + quintptr rbx; // in case it's 64-bit + asm ("xchg " PICreg", %0\n" + "cpuid\n" + "xchg " PICreg", %0\n" + : "=&r" (rbx) + : "a" (7), "c" (0) + : "%edx"); + ebx = rbx; +#elif defined(Q_OS_WIN) + int info[4]; + __cpuidex(info, 7, 0); + ebx = info[1]; +#endif +} - // result now contains the standard feature bits - if (result & (1u << 26)) - features |= SSE2; - if (feature_result & (1u)) - features |= SSE3; - if (feature_result & (1u << 9)) - features |= SSSE3; - if (feature_result & (1u << 19)) - features |= SSE4_1; - if (feature_result & (1u << 20)) - features |= SSE4_2; - if (feature_result & (1u << 28)) - features |= AVX; +#ifdef Q_OS_WIN +namespace QtXgetbvHack { + inline quint64 _xgetbv(int) { return 0; } +} +using namespace QtXgetbvHack; +#endif - return features; +static void xgetbv(int in, uint &eax, uint &edx) +{ +#ifdef Q_OS_WIN + quint64 result = _xgetbv(in); + eax = result; + edx = result >> 32; +#elif defined(Q_CC_GNU) + asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction + : "=a" (eax), "=d" (edx) + : "c" (in)); +#endif } -#elif defined(__x86_64) || defined(Q_OS_WIN64) static inline uint detectProcessorFeatures() { - uint features = SSE2; - uint feature_result = 0; - -#if defined (Q_OS_WIN64) - { - int info[4]; - __cpuid(info, 1); - feature_result = info[2]; - } -#elif defined(Q_CC_GNU) - quint64 tmp; - asm ("xchg %%rbx, %1\n" - "cpuid\n" - "xchg %%rbx, %1\n" - : "=&c" (feature_result), "=&r" (tmp) - : "a" (1) - : "%edx" - ); + uint features = 0; + int cpuidLevel = maxBasicCpuidSupported(); + if (cpuidLevel < 1) + return 0; + + uint cpuid01ECX = 0, cpuid01EDX = 0; + cpuidFeatures01(cpuid01ECX, cpuid01EDX); +#if defined(Q_PROCESSOR_X86_32) + // x86 might not have SSE2 support + if (cpuid01EDX & (1u << 26)) + features |= SSE2; +#else + // x86-64 or x32 + features = SSE2; #endif - if (feature_result & (1u)) + // common part between 32- and 64-bit + if (cpuid01ECX & (1u)) features |= SSE3; - if (feature_result & (1u << 9)) + if (cpuid01ECX & (1u << 9)) features |= SSSE3; - if (feature_result & (1u << 19)) + if (cpuid01ECX & (1u << 19)) features |= SSE4_1; - if (feature_result & (1u << 20)) + if (cpuid01ECX & (1u << 20)) features |= SSE4_2; - if (feature_result & (1u << 28)) - features |= AVX; + if (cpuid01ECX & (1u << 25)) + features |= 0; // AES, enable if needed + + uint xgetbvA = 0, xgetbvD = 0; + if (cpuid01ECX & (1u << 27)) { + // XGETBV enabled + xgetbv(0, xgetbvA, xgetbvD); + } + + uint cpuid0700EBX = 0; + if (cpuidLevel >= 7) + cpuidFeatures07_00(cpuid0700EBX); + + if ((xgetbvA & 6) == 6) { + // support for YMM and XMM registers is enabled + if (cpuid01ECX & (1u << 28)) + features |= AVX; + + if (cpuid0700EBX & (1u << 5)) + features |= AVX2; + } + + if (cpuid0700EBX & (1u << 4)) + features |= HLE; // Hardware Lock Ellision + if (cpuid0700EBX & (1u << 11)) + features |= RTM; // Restricted Transactional Memory return features; } + #else static inline uint detectProcessorFeatures() { @@ -322,6 +326,9 @@ static inline uint detectProcessorFeatures() sse4.1 sse4.2 avx + avx2 + hle + rtm */ // begin generated @@ -334,11 +341,14 @@ static const char features_string[] = " sse4.1\0" " sse4.2\0" " avx\0" + " avx2\0" + " hle\0" + " rtm\0" "\0"; static const int features_indices[] = { 0, 8, 14, 20, 26, 33, 41, 49, - -1 + 54, 60, 65, -1 }; // end generated @@ -384,8 +394,12 @@ static const uint minFeature = None #else int ffs(int i) { +#ifndef Q_OS_WINCE unsigned long result; return _BitScanForward(&result, i) ? result : 0; +#else + return 0; +#endif } #endif #endif // Q_OS_WIN |