diff options
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 271 |
1 files changed, 75 insertions, 196 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 25340f2d02..07a8b022bc 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -80,6 +80,43 @@ QT_BEGIN_NAMESPACE +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note + * we remove the terminating -1 that the script adds. + */ + +// begin generated +#if defined(Q_PROCESSOR_ARM) +/* Data: + neon + crc32 + */ +static const char features_string[] = + " neon\0" + " crc32\0" + "\0"; +static const int features_indices[] = { 0, 6 }; +#elif defined(Q_PROCESSOR_MIPS) +/* Data: + dsp + dspr2 +*/ +static const char features_string[] = + " dsp\0" + " dspr2\0" + "\0"; + +static const int features_indices[] = { + 0, 5 +}; +#elif defined(Q_PROCESSOR_X86) +# include "qsimd_x86.cpp" // generated by util/x86simdgen +#else +static const char features_string[] = ""; +static const int features_indices[] = { }; +#endif +// end generated + #if defined (Q_OS_NACL) static inline uint detectProcessorFeatures() { @@ -153,7 +190,9 @@ static inline quint64 detectProcessorFeatures() static int maxBasicCpuidSupported() { -#if defined(Q_CC_GNU) +#if defined(Q_CC_EMSCRIPTEN) + return 6; // All features supported by Emscripten +#elif defined(Q_CC_GNU) qregisterint tmp1; # if Q_PROCESSOR_X86 < 5 @@ -198,7 +237,7 @@ static int maxBasicCpuidSupported() static void cpuidFeatures01(uint &ecx, uint &edx) { -#if defined(Q_CC_GNU) +#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) qregisterint tmp1; asm ("xchg " PICreg", %2\n" "cpuid\n" @@ -215,6 +254,9 @@ static void cpuidFeatures01(uint &ecx, uint &edx) __CPUID(1, info); ecx = info[2]; edx = info[3]; +#else + Q_UNUSED(ecx); + Q_UNUSED(edx); #endif } @@ -222,29 +264,32 @@ static void cpuidFeatures01(uint &ecx, uint &edx) inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} #endif -static void cpuidFeatures07_00(uint &ebx, uint &ecx) +static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) { -#if defined(Q_CC_GNU) +#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) qregisteruint rbx; // in case it's 64-bit qregisteruint rcx = 0; + qregisteruint rdx = 0; asm ("xchg " PICreg", %0\n" "cpuid\n" "xchg " PICreg", %0\n" - : "=&r" (rbx), "+&c" (rcx) - : "a" (7) - : "%edx"); + : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) + : "a" (7)); ebx = rbx; ecx = rcx; + edx = rdx; #elif defined(Q_OS_WIN) int info[4]; __cpuidex(info, 7, 0); ebx = info[1]; ecx = info[2]; + edx = info[3]; #elif defined(Q_CC_GHS) unsigned int info[4]; __CPUIDEX(7, 0, info); ebx = info[1]; ecx = info[2]; + edx = info[3]; #endif } @@ -254,7 +299,7 @@ inline quint64 _xgetbv(__int64) { return 0; } #endif static void xgetbv(uint in, uint &eax, uint &edx) { -#if defined(Q_CC_GNU) || defined(Q_CC_GHS) +#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction : "=a" (eax), "=d" (edx) : "c" (in)); @@ -262,6 +307,10 @@ static void xgetbv(uint in, uint &eax, uint &edx) quint64 result = _xgetbv(in); eax = result; edx = result >> 32; +#else + Q_UNUSED(in); + Q_UNUSED(eax); + Q_UNUSED(edx); #endif } @@ -282,13 +331,8 @@ static quint64 detectProcessorFeatures() AVXState = XMM0_15 | YMM0_15Hi128, AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 }; - static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) | - (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) | - (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) | - (Q_UINT64_C(1) << CpuFeatureAVX512VL) | - (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI); - static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512; - static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2; + static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512; + static const quint64 AllAVX = CpuFeatureAVX | AllAVX2; quint64 features = 0; int cpuidLevel = maxBasicCpuidSupported(); @@ -299,52 +343,33 @@ static quint64 detectProcessorFeatures() Q_ASSERT(cpuidLevel >= 1); #endif - uint cpuid01ECX = 0, cpuid01EDX = 0; - cpuidFeatures01(cpuid01ECX, cpuid01EDX); - - // the low 32-bits of features is cpuid01ECX - // note: we need to check OS support for saving the AVX register state - features = cpuid01ECX; - -#if defined(Q_PROCESSOR_X86_32) - // x86 might not have SSE2 support - if (cpuid01EDX & (1u << 26)) - features |= Q_UINT64_C(1) << CpuFeatureSSE2; - else - features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2); - // we should verify that the OS enabled saving of the SSE state... -#else - // x86-64 or x32 - features |= Q_UINT64_C(1) << CpuFeatureSSE2; -#endif + uint results[X86CpuidMaxLeaf] = {}; + cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); + if (cpuidLevel >= 7) + cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); + + // populate our feature list + for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { + uint word = x86_locators[i] / 32; + uint bit = 1U << (x86_locators[i] % 32); + quint64 feature = Q_UINT64_C(1) << (i + 1); + if (results[word] & bit) + features |= feature; + } + // now check the AVX state uint xgetbvA = 0, xgetbvD = 0; - if (cpuid01ECX & (1u << 27)) { + if (results[Leaf1ECX] & (1u << 27)) { // XGETBV enabled xgetbv(0, xgetbvA, xgetbvD); } - uint cpuid0700EBX = 0; - uint cpuid0700ECX = 0; - if (cpuidLevel >= 7) { - cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX); - - // the high 32-bits of features is cpuid0700EBX - features |= quint64(cpuid0700EBX) << 32; - } - if ((xgetbvA & AVXState) != AVXState) { // support for YMM registers is disabled, disable all AVX features &= ~AllAVX; } else if ((xgetbvA & AVX512State) != AVX512State) { // support for ZMM registers or mask registers is disabled, disable all AVX512 features &= ~AllAVX512; - } else { - // this feature is out of order - if (cpuid0700ECX & (1u << 1)) - features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI; - else - features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI); } return features; @@ -493,152 +518,6 @@ static inline uint detectProcessorFeatures() } #endif -/* - * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note - * that the x86 version has a lot of blanks that must be kept and that the - * offset table's type is changed to make the table smaller. We also remove the - * terminating -1 that the script adds. - */ - -// begin generated -#if defined(Q_PROCESSOR_ARM) -/* Data: - neon - crc32 - */ -static const char features_string[] = - " neon\0" - " crc32\0" - "\0"; -static const int features_indices[] = { 0, 6 }; -#elif defined(Q_PROCESSOR_MIPS) -/* Data: - dsp - dspr2 -*/ -static const char features_string[] = - " dsp\0" - " dspr2\0" - "\0"; - -static const int features_indices[] = { - 0, 5 -}; -#elif defined(Q_PROCESSOR_X86) -/* Data: - sse3 - sse2 - avx512vbmi - - - - - - - ssse3 - - - fma - cmpxchg16b - - - - - - sse4.1 - sse4.2 - - movbe - popcnt - - aes - - - avx - f16c - rdrand - - - - - bmi - hle - avx2 - - - bmi2 - - - rtm - - - - - avx512f - avx512dq - rdseed - - - avx512ifma - - - - - avx512pf - avx512er - avx512cd - sha - avx512bw - avx512vl - */ -static const char features_string[] = - " sse3\0" - " sse2\0" - " avx512vbmi\0" - " ssse3\0" - " fma\0" - " cmpxchg16b\0" - " sse4.1\0" - " sse4.2\0" - " movbe\0" - " popcnt\0" - " aes\0" - " avx\0" - " f16c\0" - " rdrand\0" - " bmi\0" - " hle\0" - " avx2\0" - " bmi2\0" - " rtm\0" - " avx512f\0" - " avx512dq\0" - " rdseed\0" - " avx512ifma\0" - " avx512pf\0" - " avx512er\0" - " avx512cd\0" - " sha\0" - " avx512bw\0" - " avx512vl\0" - "\0"; - -static const quint8 features_indices[] = { - 0, 6, 12, 5, 5, 5, 5, 5, - 5, 24, 5, 5, 31, 36, 5, 5, - 5, 5, 5, 48, 56, 5, 64, 71, - 5, 79, 5, 5, 84, 89, 95, 5, - 5, 5, 5, 103, 108, 113, 5, 5, - 119, 5, 5, 125, 5, 5, 5, 5, - 130, 139, 149, 5, 5, 157, 5, 5, - 5, 5, 169, 179, 189, 199, 204, 214 -}; -#else -static const char features_string[] = ""; -static const int features_indices[] = { }; -#endif -// end generated - static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); // record what CPU features were enabled by default in this Qt build |