diff options
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 413 |
1 files changed, 413 insertions, 0 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp new file mode 100644 index 0000000000..f96703d1e3 --- /dev/null +++ b/src/corelib/tools/qsimd.cpp @@ -0,0 +1,413 @@ +/**************************************************************************** +** +** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qsimd_p.h" +#include <QByteArray> +#include <stdio.h> + +#if defined(Q_OS_WINCE) +#include <windows.h> +#endif + +#if defined(Q_OS_WIN64) && !defined(Q_CC_GNU) +#include <intrin.h> +#endif + +#if defined(Q_OS_LINUX) && defined(__arm__) +#include "private/qcore_unix_p.h" + +// the kernel header definitions for HWCAP_* +// (the ones we need/may need anyway) + +// copied from <asm/hwcap.h> (ARM) +#define HWCAP_IWMMXT 512 +#define HWCAP_CRUNCH 1024 +#define HWCAP_THUMBEE 2048 +#define HWCAP_NEON 4096 +#define HWCAP_VFPv3 8192 +#define HWCAP_VFPv3D16 16384 + +// copied from <linux/auxvec.h> +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ + +#endif + +QT_BEGIN_NAMESPACE + +#if defined (Q_OS_NACL) +static inline uint detectProcessorFeatures() +{ + return 0; +} +#elif defined (Q_OS_WINCE) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + +#if defined (ARM) + if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { + features = IWMMXT; + return features; + } +#elif defined(_X86_) + features = 0; +#if defined QT_HAVE_MMX + if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE)) + features |= MMX; +#endif +#if defined QT_HAVE_3DNOW + if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE)) + features |= MMX3DNOW; +#endif + return features; +#endif + features = 0; + return features; +} + +#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + +#if defined(Q_OS_LINUX) + int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY); + if (auxv != -1) { + unsigned long vector[64]; + int nread; + while (features == 0) { + nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector); + if (nread <= 0) { + // EOF or error + break; + } + + int max = nread / (sizeof vector[0]); + for (int i = 0; i < max; i += 2) + if (vector[i] == AT_HWCAP) { + if (vector[i+1] & HWCAP_IWMMXT) + features |= IWMMXT; + if (vector[i+1] & HWCAP_NEON) + features |= NEON; + break; + } + } + + ::qt_safe_close(auxv); + return features; + } + // fall back if /proc/self/auxv wasn't found +#endif + +#if defined(QT_HAVE_IWMMXT) + // runtime detection only available when running as a previlegied process + features = IWMMXT; +#elif defined(QT_ALWAYS_HAVE_NEON) + features = NEON; +#endif + + return features; +} + +#elif defined(__i386__) || defined(_M_IX86) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + + unsigned int extended_result = 0; + unsigned int feature_result = 0; + uint result = 0; + /* see p. 118 of amd64 instruction set manual Vol3 */ +#if defined(Q_CC_GNU) + long cpuid_supported, tmp1; + asm ("pushf\n" + "pop %0\n" + "mov %0, %1\n" + "xor $0x00200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "pop %0\n" + "xor %1, %0\n" // %eax is now 0 if CPUID is not supported + : "=a" (cpuid_supported), "=r" (tmp1) + ); + if (cpuid_supported) { + asm ("xchg %%ebx, %2\n" + "cpuid\n" + "xchg %%ebx, %2\n" + : "=c" (feature_result), "=d" (result), "=&r" (tmp1) + : "a" (1)); + + asm ("xchg %%ebx, %1\n" + "cpuid\n" + "cmp $0x80000000, %%eax\n" + "jnbe 1f\n" + "xor %0, %0\n" + "jmp 2f\n" + "1:\n" + "mov $0x80000001, %%eax\n" + "cpuid\n" + "2:\n" + "xchg %%ebx, %1\n" + : "=d" (extended_result), "=&r" (tmp1) + : "a" (0x80000000) + : "%ecx" + ); + } + +#elif defined (Q_OS_WIN) + _asm { + push eax + push ebx + push ecx + push edx + pushfd + pop eax + mov ebx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ebx + jz skip + + mov eax, 1 + cpuid + mov result, edx + mov feature_result, ecx + skip: + pop edx + pop ecx + pop ebx + pop eax + } + + _asm { + push eax + push ebx + push ecx + push edx + pushfd + pop eax + mov ebx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ebx + jz skip2 + + mov eax, 80000000h + cpuid + cmp eax, 80000000h + jbe skip2 + mov eax, 80000001h + cpuid + mov extended_result, edx + skip2: + pop edx + pop ecx + pop ebx + pop eax + } +#endif + + + // result now contains the standard feature bits + if (result & (1u << 15)) + features |= CMOV; + if (result & (1u << 23)) + features |= MMX; + if (extended_result & (1u << 22)) + features |= MMXEXT; + if (extended_result & (1u << 31)) + features |= MMX3DNOW; + if (extended_result & (1u << 30)) + features |= MMX3DNOWEXT; + if (result & (1u << 25)) + features |= SSE; + if (result & (1u << 26)) + features |= SSE2; + if (feature_result & (1u)) + features |= SSE3; + if (feature_result & (1u << 9)) + features |= SSSE3; + if (feature_result & (1u << 19)) + features |= SSE4_1; + if (feature_result & (1u << 20)) + features |= SSE4_2; + if (feature_result & (1u << 28)) + features |= AVX; + + return features; +} + +#elif defined(__x86_64) || defined(Q_OS_WIN64) +static inline uint detectProcessorFeatures() +{ + uint features = MMX|SSE|SSE2|CMOV; + uint feature_result = 0; + +#if defined (Q_OS_WIN64) + { + int info[4]; + __cpuid(info, 1); + feature_result = info[2]; + } +#elif defined(Q_CC_GNU) + quint64 tmp; + asm ("xchg %%rbx, %1\n" + "cpuid\n" + "xchg %%rbx, %1\n" + : "=c" (feature_result), "=&r" (tmp) + : "a" (1) + : "%edx" + ); +#endif + + if (feature_result & (1u)) + features |= SSE3; + if (feature_result & (1u << 9)) + features |= SSSE3; + if (feature_result & (1u << 19)) + features |= SSE4_1; + if (feature_result & (1u << 20)) + features |= SSE4_2; + if (feature_result & (1u << 28)) + features |= AVX; + + return features; +} + +#elif defined(__ia64__) +static inline uint detectProcessorFeatures() +{ + return MMX|SSE|SSE2; +} + +#else +static inline uint detectProcessorFeatures() +{ + return 0; +} +#endif + +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. + * Here's the data (don't forget the ONE leading space): + mmx + mmxext + mmx3dnow + mmx3dnowext + sse + sse2 + cmov + iwmmxt + neon + sse3 + ssse3 + sse4.1 + sse4.2 + avx + */ + +// begin generated +static const char features_string[] = + " mmx\0" + " mmxext\0" + " mmx3dnow\0" + " mmx3dnowext\0" + " sse\0" + " sse2\0" + " cmov\0" + " iwmmxt\0" + " neon\0" + " sse3\0" + " ssse3\0" + " sse4.1\0" + " sse4.2\0" + " avx\0" + "\0"; + +static const int features_indices[] = { + 0, 5, 13, 23, 36, 41, 47, 53, + 61, 67, 73, 80, 88, 96, -1 +}; +// end generated + +const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]); + +uint qDetectCPUFeatures() +{ + static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1); + if (features != -1) + return features; + + uint f = detectProcessorFeatures(); + QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); + if (!disable.isEmpty()) { + disable.prepend(' '); + for (int i = 0; i < features_count; ++i) { + if (disable.contains(features_string + features_indices[i])) + f &= ~(1 << i); + } + } + + features = f; + return features; +} + +void qDumpCPUFeatures() +{ + uint features = qDetectCPUFeatures(); + printf("Processor features: "); + for (int i = 0; i < features_count; ++i) { + if (features & (1 << i)) + printf("%s", features_string + features_indices[i]); + } + puts(""); +} + +QT_END_NAMESPACE |