summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r--src/corelib/tools/qsimd.cpp413
1 files changed, 413 insertions, 0 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
new file mode 100644
index 0000000000..f96703d1e3
--- /dev/null
+++ b/src/corelib/tools/qsimd.cpp
@@ -0,0 +1,413 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qsimd_p.h"
+#include <QByteArray>
+#include <stdio.h>
+
+#if defined(Q_OS_WINCE)
+#include <windows.h>
+#endif
+
+#if defined(Q_OS_WIN64) && !defined(Q_CC_GNU)
+#include <intrin.h>
+#endif
+
+#if defined(Q_OS_LINUX) && defined(__arm__)
+#include "private/qcore_unix_p.h"
+
+// the kernel header definitions for HWCAP_*
+// (the ones we need/may need anyway)
+
+// copied from <asm/hwcap.h> (ARM)
+#define HWCAP_IWMMXT 512
+#define HWCAP_CRUNCH 1024
+#define HWCAP_THUMBEE 2048
+#define HWCAP_NEON 4096
+#define HWCAP_VFPv3 8192
+#define HWCAP_VFPv3D16 16384
+
+// copied from <linux/auxvec.h>
+#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
+
+#endif
+
+QT_BEGIN_NAMESPACE
+
+#if defined (Q_OS_NACL)
+static inline uint detectProcessorFeatures()
+{
+ return 0;
+}
+#elif defined (Q_OS_WINCE)
+static inline uint detectProcessorFeatures()
+{
+ uint features = 0;
+
+#if defined (ARM)
+ if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
+ features = IWMMXT;
+ return features;
+ }
+#elif defined(_X86_)
+ features = 0;
+#if defined QT_HAVE_MMX
+ if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE))
+ features |= MMX;
+#endif
+#if defined QT_HAVE_3DNOW
+ if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE))
+ features |= MMX3DNOW;
+#endif
+ return features;
+#endif
+ features = 0;
+ return features;
+}
+
+#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
+static inline uint detectProcessorFeatures()
+{
+ uint features = 0;
+
+#if defined(Q_OS_LINUX)
+ int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY);
+ if (auxv != -1) {
+ unsigned long vector[64];
+ int nread;
+ while (features == 0) {
+ nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector);
+ if (nread <= 0) {
+ // EOF or error
+ break;
+ }
+
+ int max = nread / (sizeof vector[0]);
+ for (int i = 0; i < max; i += 2)
+ if (vector[i] == AT_HWCAP) {
+ if (vector[i+1] & HWCAP_IWMMXT)
+ features |= IWMMXT;
+ if (vector[i+1] & HWCAP_NEON)
+ features |= NEON;
+ break;
+ }
+ }
+
+ ::qt_safe_close(auxv);
+ return features;
+ }
+ // fall back if /proc/self/auxv wasn't found
+#endif
+
+#if defined(QT_HAVE_IWMMXT)
+ // runtime detection only available when running as a previlegied process
+ features = IWMMXT;
+#elif defined(QT_ALWAYS_HAVE_NEON)
+ features = NEON;
+#endif
+
+ return features;
+}
+
+#elif defined(__i386__) || defined(_M_IX86)
+static inline uint detectProcessorFeatures()
+{
+ uint features = 0;
+
+ unsigned int extended_result = 0;
+ unsigned int feature_result = 0;
+ uint result = 0;
+ /* see p. 118 of amd64 instruction set manual Vol3 */
+#if defined(Q_CC_GNU)
+ long cpuid_supported, tmp1;
+ asm ("pushf\n"
+ "pop %0\n"
+ "mov %0, %1\n"
+ "xor $0x00200000, %0\n"
+ "push %0\n"
+ "popf\n"
+ "pushf\n"
+ "pop %0\n"
+ "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
+ : "=a" (cpuid_supported), "=r" (tmp1)
+ );
+ if (cpuid_supported) {
+ asm ("xchg %%ebx, %2\n"
+ "cpuid\n"
+ "xchg %%ebx, %2\n"
+ : "=c" (feature_result), "=d" (result), "=&r" (tmp1)
+ : "a" (1));
+
+ asm ("xchg %%ebx, %1\n"
+ "cpuid\n"
+ "cmp $0x80000000, %%eax\n"
+ "jnbe 1f\n"
+ "xor %0, %0\n"
+ "jmp 2f\n"
+ "1:\n"
+ "mov $0x80000001, %%eax\n"
+ "cpuid\n"
+ "2:\n"
+ "xchg %%ebx, %1\n"
+ : "=d" (extended_result), "=&r" (tmp1)
+ : "a" (0x80000000)
+ : "%ecx"
+ );
+ }
+
+#elif defined (Q_OS_WIN)
+ _asm {
+ push eax
+ push ebx
+ push ecx
+ push edx
+ pushfd
+ pop eax
+ mov ebx, eax
+ xor eax, 00200000h
+ push eax
+ popfd
+ pushfd
+ pop eax
+ mov edx, 0
+ xor eax, ebx
+ jz skip
+
+ mov eax, 1
+ cpuid
+ mov result, edx
+ mov feature_result, ecx
+ skip:
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ }
+
+ _asm {
+ push eax
+ push ebx
+ push ecx
+ push edx
+ pushfd
+ pop eax
+ mov ebx, eax
+ xor eax, 00200000h
+ push eax
+ popfd
+ pushfd
+ pop eax
+ mov edx, 0
+ xor eax, ebx
+ jz skip2
+
+ mov eax, 80000000h
+ cpuid
+ cmp eax, 80000000h
+ jbe skip2
+ mov eax, 80000001h
+ cpuid
+ mov extended_result, edx
+ skip2:
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ }
+#endif
+
+
+ // result now contains the standard feature bits
+ if (result & (1u << 15))
+ features |= CMOV;
+ if (result & (1u << 23))
+ features |= MMX;
+ if (extended_result & (1u << 22))
+ features |= MMXEXT;
+ if (extended_result & (1u << 31))
+ features |= MMX3DNOW;
+ if (extended_result & (1u << 30))
+ features |= MMX3DNOWEXT;
+ if (result & (1u << 25))
+ features |= SSE;
+ if (result & (1u << 26))
+ features |= SSE2;
+ if (feature_result & (1u))
+ features |= SSE3;
+ if (feature_result & (1u << 9))
+ features |= SSSE3;
+ if (feature_result & (1u << 19))
+ features |= SSE4_1;
+ if (feature_result & (1u << 20))
+ features |= SSE4_2;
+ if (feature_result & (1u << 28))
+ features |= AVX;
+
+ return features;
+}
+
+#elif defined(__x86_64) || defined(Q_OS_WIN64)
+static inline uint detectProcessorFeatures()
+{
+ uint features = MMX|SSE|SSE2|CMOV;
+ uint feature_result = 0;
+
+#if defined (Q_OS_WIN64)
+ {
+ int info[4];
+ __cpuid(info, 1);
+ feature_result = info[2];
+ }
+#elif defined(Q_CC_GNU)
+ quint64 tmp;
+ asm ("xchg %%rbx, %1\n"
+ "cpuid\n"
+ "xchg %%rbx, %1\n"
+ : "=c" (feature_result), "=&r" (tmp)
+ : "a" (1)
+ : "%edx"
+ );
+#endif
+
+ if (feature_result & (1u))
+ features |= SSE3;
+ if (feature_result & (1u << 9))
+ features |= SSSE3;
+ if (feature_result & (1u << 19))
+ features |= SSE4_1;
+ if (feature_result & (1u << 20))
+ features |= SSE4_2;
+ if (feature_result & (1u << 28))
+ features |= AVX;
+
+ return features;
+}
+
+#elif defined(__ia64__)
+static inline uint detectProcessorFeatures()
+{
+ return MMX|SSE|SSE2;
+}
+
+#else
+static inline uint detectProcessorFeatures()
+{
+ return 0;
+}
+#endif
+
+/*
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below.
+ * Here's the data (don't forget the ONE leading space):
+ mmx
+ mmxext
+ mmx3dnow
+ mmx3dnowext
+ sse
+ sse2
+ cmov
+ iwmmxt
+ neon
+ sse3
+ ssse3
+ sse4.1
+ sse4.2
+ avx
+ */
+
+// begin generated
+static const char features_string[] =
+ " mmx\0"
+ " mmxext\0"
+ " mmx3dnow\0"
+ " mmx3dnowext\0"
+ " sse\0"
+ " sse2\0"
+ " cmov\0"
+ " iwmmxt\0"
+ " neon\0"
+ " sse3\0"
+ " ssse3\0"
+ " sse4.1\0"
+ " sse4.2\0"
+ " avx\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5, 13, 23, 36, 41, 47, 53,
+ 61, 67, 73, 80, 88, 96, -1
+};
+// end generated
+
+const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
+
+uint qDetectCPUFeatures()
+{
+ static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
+ if (features != -1)
+ return features;
+
+ uint f = detectProcessorFeatures();
+ QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
+ if (!disable.isEmpty()) {
+ disable.prepend(' ');
+ for (int i = 0; i < features_count; ++i) {
+ if (disable.contains(features_string + features_indices[i]))
+ f &= ~(1 << i);
+ }
+ }
+
+ features = f;
+ return features;
+}
+
+void qDumpCPUFeatures()
+{
+ uint features = qDetectCPUFeatures();
+ printf("Processor features: ");
+ for (int i = 0; i < features_count; ++i) {
+ if (features & (1 << i))
+ printf("%s", features_string + features_indices[i]);
+ }
+ puts("");
+}
+
+QT_END_NAMESPACE