diff options
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 259 |
1 files changed, 65 insertions, 194 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 25340f2d02..fd9c6a7079 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -80,6 +80,43 @@ QT_BEGIN_NAMESPACE +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note + * we remove the terminating -1 that the script adds. + */ + +// begin generated +#if defined(Q_PROCESSOR_ARM) +/* Data: + neon + crc32 + */ +static const char features_string[] = + " neon\0" + " crc32\0" + "\0"; +static const int features_indices[] = { 0, 6 }; +#elif defined(Q_PROCESSOR_MIPS) +/* Data: + dsp + dspr2 +*/ +static const char features_string[] = + " dsp\0" + " dspr2\0" + "\0"; + +static const int features_indices[] = { + 0, 5 +}; +#elif defined(Q_PROCESSOR_X86) +# include "qsimd_x86.cpp" // generated by util/x86simdgen +#else +static const char features_string[] = ""; +static const int features_indices[] = { }; +#endif +// end generated + #if defined (Q_OS_NACL) static inline uint detectProcessorFeatures() { @@ -222,29 +259,32 @@ static void cpuidFeatures01(uint &ecx, uint &edx) inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} #endif -static void cpuidFeatures07_00(uint &ebx, uint &ecx) +static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) { #if defined(Q_CC_GNU) qregisteruint rbx; // in case it's 64-bit qregisteruint rcx = 0; + qregisteruint rdx = 0; asm ("xchg " PICreg", %0\n" "cpuid\n" "xchg " PICreg", %0\n" - : "=&r" (rbx), "+&c" (rcx) - : "a" (7) - : "%edx"); + : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) + : "a" (7)); ebx = rbx; ecx = rcx; + edx = rdx; #elif defined(Q_OS_WIN) int info[4]; __cpuidex(info, 7, 0); ebx = info[1]; ecx = info[2]; + edx = info[3]; #elif defined(Q_CC_GHS) unsigned int info[4]; __CPUIDEX(7, 0, info); ebx = info[1]; ecx = info[2]; + edx = info[3]; #endif } @@ -285,8 +325,11 @@ static quint64 detectProcessorFeatures() static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) | (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) | (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) | - (Q_UINT64_C(1) << CpuFeatureAVX512VL) | - (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI); + (Q_UINT64_C(1) << CpuFeatureAVX512VL) | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | + (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2) | + (Q_UINT64_C(1) << CpuFeatureAVX512VNNI) | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG) | + (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ) | + (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW) | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS); static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512; static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2; @@ -299,52 +342,33 @@ static quint64 detectProcessorFeatures() Q_ASSERT(cpuidLevel >= 1); #endif - uint cpuid01ECX = 0, cpuid01EDX = 0; - cpuidFeatures01(cpuid01ECX, cpuid01EDX); - - // the low 32-bits of features is cpuid01ECX - // note: we need to check OS support for saving the AVX register state - features = cpuid01ECX; - -#if defined(Q_PROCESSOR_X86_32) - // x86 might not have SSE2 support - if (cpuid01EDX & (1u << 26)) - features |= Q_UINT64_C(1) << CpuFeatureSSE2; - else - features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2); - // we should verify that the OS enabled saving of the SSE state... -#else - // x86-64 or x32 - features |= Q_UINT64_C(1) << CpuFeatureSSE2; -#endif + uint results[X86CpuidMaxLeaf] = {}; + cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); + if (cpuidLevel >= 7) + cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); + + // populate our feature list + for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { + uint word = x86_locators[i] / 32; + uint bit = 1U << (x86_locators[i] % 32); + quint64 feature = Q_UINT64_C(1) << (i + 1); + if (results[word] & bit) + features |= feature; + } + // now check the AVX state uint xgetbvA = 0, xgetbvD = 0; - if (cpuid01ECX & (1u << 27)) { + if (results[Leaf1ECX] & (1u << 27)) { // XGETBV enabled xgetbv(0, xgetbvA, xgetbvD); } - uint cpuid0700EBX = 0; - uint cpuid0700ECX = 0; - if (cpuidLevel >= 7) { - cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX); - - // the high 32-bits of features is cpuid0700EBX - features |= quint64(cpuid0700EBX) << 32; - } - if ((xgetbvA & AVXState) != AVXState) { // support for YMM registers is disabled, disable all AVX features &= ~AllAVX; } else if ((xgetbvA & AVX512State) != AVX512State) { // support for ZMM registers or mask registers is disabled, disable all AVX512 features &= ~AllAVX512; - } else { - // this feature is out of order - if (cpuid0700ECX & (1u << 1)) - features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI; - else - features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI); } return features; @@ -493,162 +517,12 @@ static inline uint detectProcessorFeatures() } #endif -/* - * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note - * that the x86 version has a lot of blanks that must be kept and that the - * offset table's type is changed to make the table smaller. We also remove the - * terminating -1 that the script adds. - */ - -// begin generated -#if defined(Q_PROCESSOR_ARM) -/* Data: - neon - crc32 - */ -static const char features_string[] = - " neon\0" - " crc32\0" - "\0"; -static const int features_indices[] = { 0, 6 }; -#elif defined(Q_PROCESSOR_MIPS) -/* Data: - dsp - dspr2 -*/ -static const char features_string[] = - " dsp\0" - " dspr2\0" - "\0"; - -static const int features_indices[] = { - 0, 5 -}; -#elif defined(Q_PROCESSOR_X86) -/* Data: - sse3 - sse2 - avx512vbmi - - - - - - - ssse3 - - - fma - cmpxchg16b - - - - - - sse4.1 - sse4.2 - - movbe - popcnt - - aes - - - avx - f16c - rdrand - - - - - bmi - hle - avx2 - - - bmi2 - - - rtm - - - - - avx512f - avx512dq - rdseed - - - avx512ifma - - - - - avx512pf - avx512er - avx512cd - sha - avx512bw - avx512vl - */ -static const char features_string[] = - " sse3\0" - " sse2\0" - " avx512vbmi\0" - " ssse3\0" - " fma\0" - " cmpxchg16b\0" - " sse4.1\0" - " sse4.2\0" - " movbe\0" - " popcnt\0" - " aes\0" - " avx\0" - " f16c\0" - " rdrand\0" - " bmi\0" - " hle\0" - " avx2\0" - " bmi2\0" - " rtm\0" - " avx512f\0" - " avx512dq\0" - " rdseed\0" - " avx512ifma\0" - " avx512pf\0" - " avx512er\0" - " avx512cd\0" - " sha\0" - " avx512bw\0" - " avx512vl\0" - "\0"; - -static const quint8 features_indices[] = { - 0, 6, 12, 5, 5, 5, 5, 5, - 5, 24, 5, 5, 31, 36, 5, 5, - 5, 5, 5, 48, 56, 5, 64, 71, - 5, 79, 5, 5, 84, 89, 95, 5, - 5, 5, 5, 103, 108, 113, 5, 5, - 119, 5, 5, 125, 5, 5, 5, 5, - 130, 139, 149, 5, 5, 157, 5, 5, - 5, 5, 169, 179, 189, 199, 204, 214 -}; -#else -static const char features_string[] = ""; -static const int features_indices[] = { }; -#endif -// end generated - static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); // record what CPU features were enabled by default in this Qt build static const quint64 minFeature = qCompilerCpuFeatures; -#ifdef Q_ATOMIC_INT64_IS_SUPPORTED Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) }; -#else -Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) }; -#endif void qDetectCpuFeatures() { @@ -681,9 +555,6 @@ void qDetectCpuFeatures() } qt_cpu_features[0].store(f | quint32(QSimdInitialized)); -#ifndef Q_ATOMIC_INT64_IS_SUPPORTED - qt_cpu_features[1].store(f >> 32); -#endif } void qDumpCPUFeatures() |