summaryrefslogtreecommitdiffstats
path: root/src/corelib/global
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/global')
-rw-r--r--src/corelib/global/global.pri2
-rw-r--r--src/corelib/global/qsimd.cpp718
-rw-r--r--src/corelib/global/qsimd_p.h396
-rw-r--r--src/corelib/global/qsimd_x86.cpp155
-rw-r--r--src/corelib/global/qsimd_x86_p.h261
5 files changed, 1532 insertions, 0 deletions
diff --git a/src/corelib/global/global.pri b/src/corelib/global/global.pri
index 1da69aba9b..389e866987 100644
--- a/src/corelib/global/global.pri
+++ b/src/corelib/global/global.pri
@@ -20,6 +20,7 @@ HEADERS += \
global/qtypeinfo.h \
global/qsysinfo.h \
global/qisenum.h \
+ global/qsimd_p.h \
global/qtypetraits.h \
global/qflags.h \
global/qrandom.h \
@@ -38,6 +39,7 @@ SOURCES += \
global/qoperatingsystemversion.cpp \
global/qlogging.cpp \
global/qrandom.cpp \
+ global/qsimd.cpp \
global/qhooks.cpp
# To get listed in IDEs
diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp
new file mode 100644
index 0000000000..75c380ee8a
--- /dev/null
+++ b/src/corelib/global/qsimd.cpp
@@ -0,0 +1,718 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2019 Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qsimd_p.h"
+#include "qalgorithms.h"
+#include <QByteArray>
+#include <stdio.h>
+
+#ifdef Q_OS_LINUX
+# include "../testlib/3rdparty/valgrind_p.h"
+#endif
+
+#if defined(Q_OS_WIN)
+# if !defined(Q_CC_GNU)
+# include <intrin.h>
+# endif
+#elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32))
+#include "private/qcore_unix_p.h"
+
+// the kernel header definitions for HWCAP_*
+// (the ones we need/may need anyway)
+
+// copied from <asm/hwcap.h> (ARM)
+#define HWCAP_CRUNCH 1024
+#define HWCAP_THUMBEE 2048
+#define HWCAP_NEON 4096
+#define HWCAP_VFPv3 8192
+#define HWCAP_VFPv3D16 16384
+
+// copied from <asm/hwcap.h> (ARM):
+#define HWCAP2_CRC32 (1 << 4)
+
+// copied from <asm/hwcap.h> (Aarch64)
+#define HWCAP_CRC32 (1 << 7)
+
+// copied from <linux/auxvec.h>
+#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
+#define AT_HWCAP2 26 /* extension of AT_HWCAP */
+
+#elif defined(Q_CC_GHS)
+#include <INTEGRITY_types.h>
+#endif
+
+QT_BEGIN_NAMESPACE
+
+/*
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
+ * we remove the terminating -1 that the script adds.
+ */
+
+// begin generated
+#if defined(Q_PROCESSOR_ARM)
+/* Data:
+ neon
+ crc32
+ */
+static const char features_string[] =
+ " neon\0"
+ " crc32\0"
+ "\0";
+static const int features_indices[] = { 0, 6 };
+#elif defined(Q_PROCESSOR_MIPS)
+/* Data:
+ dsp
+ dspr2
+*/
+static const char features_string[] =
+ " dsp\0"
+ " dspr2\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5
+};
+#elif defined(Q_PROCESSOR_X86)
+# include "qsimd_x86.cpp" // generated by util/x86simdgen
+#else
+static const char features_string[] = "";
+static const int features_indices[] = { };
+#endif
+// end generated
+
+#if defined (Q_OS_NACL)
+static inline uint detectProcessorFeatures()
+{
+ return 0;
+}
+#elif defined(Q_PROCESSOR_ARM)
+static inline quint64 detectProcessorFeatures()
+{
+ quint64 features = 0;
+
+#if defined(Q_OS_LINUX)
+# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
+ features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit.
+# endif
+ int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
+ if (auxv != -1) {
+ unsigned long vector[64];
+ int nread;
+ while (features == 0) {
+ nread = qt_safe_read(auxv, (char *)vector, sizeof vector);
+ if (nread <= 0) {
+ // EOF or error
+ break;
+ }
+
+ int max = nread / (sizeof vector[0]);
+ for (int i = 0; i < max; i += 2) {
+ if (vector[i] == AT_HWCAP) {
+# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
+ // For Aarch64:
+ if (vector[i+1] & HWCAP_CRC32)
+ features |= Q_UINT64_C(1) << CpuFeatureCRC32;
+# endif
+ // Aarch32, or ARMv7 or before:
+ if (vector[i+1] & HWCAP_NEON)
+ features |= Q_UINT64_C(1) << CpuFeatureNEON;
+ }
+# if defined(Q_PROCESSOR_ARM_32)
+ // For Aarch32:
+ if (vector[i] == AT_HWCAP2) {
+ if (vector[i+1] & HWCAP2_CRC32)
+ features |= Q_UINT64_C(1) << CpuFeatureCRC32;
+ }
+# endif
+ }
+ }
+
+ qt_safe_close(auxv);
+ return features;
+ }
+ // fall back if /proc/self/auxv wasn't found
+#endif
+
+#if defined(__ARM_NEON__)
+ features |= Q_UINT64_C(1) << CpuFeatureNEON;
+#endif
+#if defined(__ARM_FEATURE_CRC32)
+ features |= Q_UINT64_C(1) << CpuFeatureCRC32;
+#endif
+
+ return features;
+}
+
+#elif defined(Q_PROCESSOR_X86)
+
+#ifdef Q_PROCESSOR_X86_32
+# define PICreg "%%ebx"
+#else
+# define PICreg "%%rbx"
+#endif
+
+static bool checkRdrndWorks() noexcept;
+
+static int maxBasicCpuidSupported()
+{
+#if defined(Q_CC_EMSCRIPTEN)
+ return 6; // All features supported by Emscripten
+#elif defined(Q_CC_GNU)
+ qregisterint tmp1;
+
+# if Q_PROCESSOR_X86 < 5
+ // check if the CPUID instruction is supported
+ long cpuid_supported;
+ asm ("pushf\n"
+ "pop %0\n"
+ "mov %0, %1\n"
+ "xor $0x00200000, %0\n"
+ "push %0\n"
+ "popf\n"
+ "pushf\n"
+ "pop %0\n"
+ "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
+ : "=a" (cpuid_supported), "=r" (tmp1)
+ );
+ if (!cpuid_supported)
+ return 0;
+# endif
+
+ int result;
+ asm ("xchg " PICreg", %1\n"
+ "cpuid\n"
+ "xchg " PICreg", %1\n"
+ : "=&a" (result), "=&r" (tmp1)
+ : "0" (0)
+ : "ecx", "edx");
+ return result;
+#elif defined(Q_OS_WIN)
+ // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
+ int info[4];
+ __cpuid(info, 0);
+ return info[0];
+#elif defined(Q_CC_GHS)
+ unsigned int info[4];
+ __CPUID(0, info);
+ return info[0];
+#else
+ return 0;
+#endif
+}
+
+static void cpuidFeatures01(uint &ecx, uint &edx)
+{
+#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
+ qregisterint tmp1;
+ asm ("xchg " PICreg", %2\n"
+ "cpuid\n"
+ "xchg " PICreg", %2\n"
+ : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
+ : "a" (1));
+#elif defined(Q_OS_WIN)
+ int info[4];
+ __cpuid(info, 1);
+ ecx = info[2];
+ edx = info[3];
+#elif defined(Q_CC_GHS)
+ unsigned int info[4];
+ __CPUID(1, info);
+ ecx = info[2];
+ edx = info[3];
+#else
+ Q_UNUSED(ecx);
+ Q_UNUSED(edx);
+#endif
+}
+
+#ifdef Q_OS_WIN
+inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
+#endif
+
+static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
+{
+#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
+ qregisteruint rbx; // in case it's 64-bit
+ qregisteruint rcx = 0;
+ qregisteruint rdx = 0;
+ asm ("xchg " PICreg", %0\n"
+ "cpuid\n"
+ "xchg " PICreg", %0\n"
+ : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
+ : "a" (7));
+ ebx = rbx;
+ ecx = rcx;
+ edx = rdx;
+#elif defined(Q_OS_WIN)
+ int info[4];
+ __cpuidex(info, 7, 0);
+ ebx = info[1];
+ ecx = info[2];
+ edx = info[3];
+#elif defined(Q_CC_GHS)
+ unsigned int info[4];
+ __CPUIDEX(7, 0, info);
+ ebx = info[1];
+ ecx = info[2];
+ edx = info[3];
+#else
+ Q_UNUSED(ebx);
+ Q_UNUSED(ecx);
+ Q_UNUSED(edx);
+#endif
+}
+
+#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
+// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
+inline quint64 _xgetbv(__int64) { return 0; }
+#endif
+static void xgetbv(uint in, uint &eax, uint &edx)
+{
+#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
+ asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
+ : "=a" (eax), "=d" (edx)
+ : "c" (in));
+#elif defined(Q_OS_WIN)
+ quint64 result = _xgetbv(in);
+ eax = result;
+ edx = result >> 32;
+#else
+ Q_UNUSED(in);
+ Q_UNUSED(eax);
+ Q_UNUSED(edx);
+#endif
+}
+
+static quint64 detectProcessorFeatures()
+{
+ // Flags from the CR0 / XCR0 state register
+ enum XCR0Flags {
+ X87 = 1 << 0,
+ XMM0_15 = 1 << 1,
+ YMM0_15Hi128 = 1 << 2,
+ BNDRegs = 1 << 3,
+ BNDCSR = 1 << 4,
+ OpMask = 1 << 5,
+ ZMM0_15Hi256 = 1 << 6,
+ ZMM16_31 = 1 << 7,
+
+ SSEState = XMM0_15,
+ AVXState = XMM0_15 | YMM0_15Hi128,
+ AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
+ };
+ static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
+ static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
+
+ quint64 features = 0;
+ int cpuidLevel = maxBasicCpuidSupported();
+#if Q_PROCESSOR_X86 < 5
+ if (cpuidLevel < 1)
+ return 0;
+#else
+ Q_ASSERT(cpuidLevel >= 1);
+#endif
+
+ uint results[X86CpuidMaxLeaf] = {};
+ cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
+ if (cpuidLevel >= 7)
+ cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
+
+ // populate our feature list
+ for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
+ uint word = x86_locators[i] / 32;
+ uint bit = 1U << (x86_locators[i] % 32);
+ quint64 feature = Q_UINT64_C(1) << (i + 1);
+ if (results[word] & bit)
+ features |= feature;
+ }
+
+ // now check the AVX state
+ uint xgetbvA = 0, xgetbvD = 0;
+ if (results[Leaf1ECX] & (1u << 27)) {
+ // XGETBV enabled
+ xgetbv(0, xgetbvA, xgetbvD);
+ }
+
+ if ((xgetbvA & AVXState) != AVXState) {
+ // support for YMM registers is disabled, disable all AVX
+ features &= ~AllAVX;
+ } else if ((xgetbvA & AVX512State) != AVX512State) {
+ // support for ZMM registers or mask registers is disabled, disable all AVX512
+ features &= ~AllAVX512;
+ }
+
+ if (features & CpuFeatureRDRND && !checkRdrndWorks())
+ features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
+
+ return features;
+}
+
+#elif defined(Q_PROCESSOR_MIPS_32)
+
+#if defined(Q_OS_LINUX)
+//
+// Do not use QByteArray: it could use SIMD instructions itself at
+// some point, thus creating a recursive dependency. Instead, use a
+// QSimpleBuffer, which has the bare minimum needed to use memory
+// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
+//
+struct QSimpleBuffer {
+ static const int chunk_size = 256;
+ char *data;
+ unsigned alloc;
+ unsigned size;
+
+ QSimpleBuffer(): data(0), alloc(0), size(0) {}
+ ~QSimpleBuffer() { ::free(data); }
+
+ void resize(unsigned newsize) {
+ if (newsize > alloc) {
+ unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
+ if (newalloc < newsize) newalloc = newsize;
+ if (newalloc != alloc) {
+ data = static_cast<char*>(::realloc(data, newalloc));
+ alloc = newalloc;
+ }
+ }
+ size = newsize;
+ }
+ void append(const QSimpleBuffer &other, unsigned appendsize) {
+ unsigned oldsize = size;
+ resize(oldsize + appendsize);
+ ::memcpy(data + oldsize, other.data, appendsize);
+ }
+ void popleft(unsigned amount) {
+ if (amount >= size) return resize(0);
+ size -= amount;
+ ::memmove(data, data + amount, size);
+ }
+ char* cString() {
+ if (!alloc) resize(1);
+ return (data[size] = '\0', data);
+ }
+};
+
+//
+// Uses a scratch "buffer" (which must be used for all reads done in the
+// same file descriptor) to read chunks of data from a file, to read
+// one line at a time. Lines include the trailing newline character ('\n').
+// On EOF, line.size is zero.
+//
+static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
+{
+ for (;;) {
+ char *newline = static_cast<char*>(::memchr(buffer.data, '\n', buffer.size));
+ if (newline) {
+ unsigned piece_size = newline - buffer.data + 1;
+ line.append(buffer, piece_size);
+ buffer.popleft(piece_size);
+ line.resize(line.size - 1);
+ return;
+ }
+ if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
+ int oldsize = buffer.size;
+ buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
+ buffer.size = oldsize;
+ }
+ ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
+ if (read_bytes > 0) buffer.size += read_bytes;
+ else return;
+ }
+}
+
+//
+// Checks if any line with a given prefix from /proc/cpuinfo contains
+// a certain string, surrounded by spaces.
+//
+static bool procCpuinfoContains(const char *prefix, const char *string)
+{
+ int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
+ if (cpuinfo_fd == -1)
+ return false;
+
+ unsigned string_len = ::strlen(string);
+ unsigned prefix_len = ::strlen(prefix);
+ QSimpleBuffer line, buffer;
+ bool present = false;
+ do {
+ line.resize(0);
+ bufReadLine(cpuinfo_fd, line, buffer);
+ char *colon = static_cast<char*>(::memchr(line.data, ':', line.size));
+ if (colon && line.size > prefix_len + string_len) {
+ if (!::strncmp(prefix, line.data, prefix_len)) {
+ // prefix matches, next character must be ':' or space
+ if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
+ // Does it contain the string?
+ char *found = ::strstr(line.cString(), string);
+ if (found && ::isspace(found[-1]) &&
+ (::isspace(found[string_len]) || found[string_len] == '\0')) {
+ present = true;
+ break;
+ }
+ }
+ }
+ }
+ } while (line.size);
+
+ ::qt_safe_close(cpuinfo_fd);
+ return present;
+}
+#endif
+
+static inline quint64 detectProcessorFeatures()
+{
+ // NOTE: MIPS 74K cores are the only ones supporting DSPr2.
+ quint64 flags = 0;
+
+#if defined __mips_dsp
+ flags |= Q_UINT64_C(1) << CpuFeatureDSP;
+# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
+# elif defined(Q_OS_LINUX)
+ if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
+# endif
+#elif defined(Q_OS_LINUX)
+ if (procCpuinfoContains("ASEs implemented", "dsp")) {
+ flags |= Q_UINT64_C(1) << CpuFeatureDSP;
+ if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
+ }
+#endif
+
+ return flags;
+}
+
+#else
+static inline uint detectProcessorFeatures()
+{
+ return 0;
+}
+#endif
+
+static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
+
+// record what CPU features were enabled by default in this Qt build
+static const quint64 minFeature = qCompilerCpuFeatures;
+
+#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
+Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
+#else
+Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
+#endif
+
+quint64 qDetectCpuFeatures()
+{
+ quint64 f = detectProcessorFeatures();
+ QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
+ if (!disable.isEmpty()) {
+ disable.prepend(' ');
+ for (int i = 0; i < features_count; ++i) {
+ if (disable.contains(features_string + features_indices[i]))
+ f &= ~(Q_UINT64_C(1) << i);
+ }
+ }
+
+#ifdef RUNNING_ON_VALGRIND
+ bool runningOnValgrind = RUNNING_ON_VALGRIND;
+#else
+ bool runningOnValgrind = false;
+#endif
+ if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) {
+ quint64 missing = minFeature & ~f;
+ fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
+ for (int i = 0; i < features_count; ++i) {
+ if (missing & (Q_UINT64_C(1) << i))
+ fprintf(stderr, "%s", features_string + features_indices[i]);
+ }
+ fprintf(stderr, "\n");
+ fflush(stderr);
+ qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
+ features_string + features_indices[qCountTrailingZeroBits(missing)]);
+ }
+
+ qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
+#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
+ qt_cpu_features[1].storeRelaxed(f >> 32);
+#endif
+ return f;
+}
+
+void qDumpCPUFeatures()
+{
+ quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
+ printf("Processor features: ");
+ for (int i = 0; i < features_count; ++i) {
+ if (features & (Q_UINT64_C(1) << i))
+ printf("%s%s", features_string + features_indices[i],
+ minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
+ }
+ if ((features = (qCompilerCpuFeatures & ~features))) {
+ printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
+ for (int i = 0; i < features_count; ++i) {
+ if (features & (Q_UINT64_C(1) << i))
+ printf("%s", features_string + features_indices[i]);
+ }
+ printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
+ }
+ puts("");
+}
+
+#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
+
+# ifdef Q_PROCESSOR_X86_64
+# define _rdrandXX_step _rdrand64_step
+# define _rdseedXX_step _rdseed64_step
+# else
+# define _rdrandXX_step _rdrand32_step
+# define _rdseedXX_step _rdseed32_step
+# endif
+
+# if QT_COMPILER_SUPPORTS_HERE(RDSEED)
+static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
+{
+ // Unlike for the RDRAND code below, the Intel whitepaper describing the
+ // use of the RDSEED instruction indicates we should not retry in a loop.
+ // If the independent bit generator used by RDSEED is out of entropy, it
+ // may take time to replenish.
+ // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
+ while (ptr + sizeof(qregisteruint)/sizeof(*ptr) <= end) {
+ if (_rdseedXX_step(reinterpret_cast<qregisteruint *>(ptr)) == 0)
+ goto out;
+ ptr += sizeof(qregisteruint)/sizeof(*ptr);
+ }
+
+ if (sizeof(*ptr) != sizeof(qregisteruint) && ptr != end) {
+ if (_rdseed32_step(ptr) == 0)
+ goto out;
+ ++ptr;
+ }
+
+out:
+ return ptr;
+}
+# else
+static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
+{
+ return ptr;
+}
+# endif
+
+static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
+{
+ int retries = 10;
+ while (ptr + sizeof(qregisteruint)/sizeof(*ptr) <= end) {
+ if (_rdrandXX_step(reinterpret_cast<qregisteruint *>(ptr)))
+ ptr += sizeof(qregisteruint)/sizeof(*ptr);
+ else if (--retries == 0)
+ goto out;
+ }
+
+ while (sizeof(*ptr) != sizeof(qregisteruint) && ptr != end) {
+ bool ok = _rdrand32_step(ptr);
+ if (!ok && --retries)
+ continue;
+ if (ok)
+ ++ptr;
+ break;
+ }
+
+out:
+ return ptr;
+}
+
+static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept
+{
+ /*
+ * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
+ * failing random generation instruction, which always returns
+ * 0xffffffff, even when generation was "successful".
+ *
+ * This code checks if hardware random generator generates four consecutive
+ * equal numbers. If it does, then we probably have a failing one and
+ * should disable it completely.
+ *
+ * https://bugreports.qt.io/browse/QTBUG-69423
+ */
+ constexpr qsizetype TestBufferSize = 4;
+ unsigned testBuffer[TestBufferSize] = {};
+
+ unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize);
+ if (end < testBuffer + 3) {
+ // Random generation didn't produce enough data for us to make a
+ // determination whether it's working or not. Assume it isn't, but
+ // don't print a warning.
+ return false;
+ }
+
+ // Check the results for equality
+ if (testBuffer[0] == testBuffer[1]
+ && testBuffer[0] == testBuffer[2]
+ && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
+ fprintf(stderr, "WARNING: CPU random generator seem to be failing, "
+ "disabling hardware random number generation\n"
+ "WARNING: RDRND generated:");
+ for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
+ fprintf(stderr, " 0x%x", *ptr);
+ fprintf(stderr, "\n");
+ return false;
+ }
+
+ // We're good
+ return true;
+}
+
+QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
+{
+ unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
+ unsigned *end = ptr + count;
+
+ if (qCpuHasFeature(RDSEED))
+ ptr = qt_random_rdseed(ptr, end);
+
+ // fill the buffer with RDRND if RDSEED didn't
+ ptr = qt_random_rdrnd(ptr, end);
+ return ptr - reinterpret_cast<unsigned *>(buffer);
+}
+#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM)
+static bool checkRdrndWorks() noexcept { return false; }
+#endif // Q_PROCESSOR_X86 && RDRND
+
+QT_END_NAMESPACE
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h
new file mode 100644
index 0000000000..26e98c4542
--- /dev/null
+++ b/src/corelib/global/qsimd_p.h
@@ -0,0 +1,396 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2018 Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QSIMD_P_H
+#define QSIMD_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include <QtCore/private/qglobal_p.h>
+
+/*
+ * qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros.
+ * They mean the compiler supports the necessary flags and the headers
+ * for the x86 and ARM intrinsics:
+ * - GCC: the -mXXX or march=YYY flag is necessary before #include
+ * up to 4.8; GCC >= 4.9 can include unconditionally
+ * - Intel CC: #include can happen unconditionally
+ * - MSVC: #include can happen unconditionally
+ * - RVCT: ???
+ *
+ * We will try to include all headers possible under this configuration.
+ *
+ * MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 &
+ * up do define __AVX__ if the -arch:AVX option is passed on the command-line.
+ *
+ * Supported XXX are:
+ * Flag | Arch | GCC | Intel CC | MSVC |
+ * ARM_NEON | ARM | I & C | None | ? |
+ * SSE2 | x86 | I & C | I & C | I & C |
+ * SSE3 | x86 | I & C | I & C | I only |
+ * SSSE3 | x86 | I & C | I & C | I only |
+ * SSE4_1 | x86 | I & C | I & C | I only |
+ * SSE4_2 | x86 | I & C | I & C | I only |
+ * AVX | x86 | I & C | I & C | I & C |
+ * AVX2 | x86 | I & C | I & C | I only |
+ * AVX512xx | x86 | I & C | I & C | I only |
+ * I = intrinsics; C = code generation
+ *
+ * Code can use the following constructs to determine compiler support & status:
+ * - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__)
+ * If this test passes, then the compiler is already generating code for that
+ * given sub-architecture. The intrinsics for that sub-architecture are
+ * #included and can be used without restriction or runtime check.
+ *
+ * - #if QT_COMPILER_SUPPORTS(XXX)
+ * If this test passes, then the compiler is able to generate code for that
+ * given sub-architecture in another translation unit, given the right set of
+ * flags. Use of the intrinsics is not guaranteed. This is useful with
+ * runtime detection (see below).
+ *
+ * - #if QT_COMPILER_SUPPORTS_HERE(XXX)
+ * If this test passes, then the compiler is able to generate code for that
+ * given sub-architecture in this translation unit, even if it is not doing
+ * that now (it might be). Individual functions may be tagged with
+ * QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that
+ * sub-arch. Only inside such functions is the use of the intrisics
+ * guaranteed to work. This is useful with runtime detection (see below).
+ *
+ * Runtime detection of a CPU sub-architecture can be done with the
+ * qCpuHasFeature(XXX) function. There are two strategies for generating
+ * optimized code like that:
+ *
+ * 1) place the optimized code in a different translation unit (C or assembly
+ * sources) and pass the correct flags to the compiler to enable support. Those
+ * sources must not include qglobal.h, which means they cannot include this
+ * file either. The dispatcher function would look like this:
+ *
+ * void foo()
+ * {
+ * #if QT_COMPILER_SUPPORTS(XXX)
+ * if (qCpuHasFeature(XXX)) {
+ * foo_optimized_xxx();
+ * return;
+ * }
+ * #endif
+ * foo_plain();
+ * }
+ *
+ * 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and
+ * surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use
+ * other Qt code. The dispatcher function would look like this:
+ *
+ * void foo()
+ * {
+ * #if QT_COMPILER_SUPPORTS_HERE(XXX)
+ * if (qCpuHasFeature(XXX)) {
+ * foo_optimized_xxx();
+ * return;
+ * }
+ * #endif
+ * foo_plain();
+ * }
+ */
+
+#if defined(__MINGW64_VERSION_MAJOR) || defined(Q_CC_MSVC)
+#include <intrin.h>
+#endif
+
+#define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0)
+
+#if defined(Q_PROCESSOR_ARM)
+# define QT_COMPILER_SUPPORTS_HERE(x) (__ARM_FEATURE_ ## x)
+# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600
+ /* GCC requires attributes for a function */
+# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
+# else
+# define QT_FUNCTION_TARGET(x)
+# endif
+# if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__)
+# define __ARM_FEATURE_NEON // also support QT_COMPILER_SUPPORTS_HERE(NEON)
+# endif
+#elif defined(Q_PROCESSOR_MIPS)
+# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __)
+# define QT_FUNCTION_TARGET(x)
+# if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32)
+# define __MIPS_DSP__
+# endif
+# if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32)
+# define __MIPS_DSPR2__
+# endif
+#elif defined(Q_PROCESSOR_X86) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)
+# define QT_COMPILER_SUPPORTS_HERE(x) ((__ ## x ## __) || QT_COMPILER_SUPPORTS(x))
+# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
+ /* GCC requires attributes for a function */
+# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
+# else
+# define QT_FUNCTION_TARGET(x)
+# endif
+#else
+# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __)
+# define QT_FUNCTION_TARGET(x)
+#endif
+
+#ifdef Q_PROCESSOR_X86
+/* -- x86 intrinsic support -- */
+
+# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2)
+// MSVC doesn't define __SSE2__, so do it ourselves
+# define __SSE__ 1
+# define __SSE2__ 1
+# endif
+
+# ifdef __SSE2__
+// #include the intrinsics
+# include <immintrin.h>
+# endif
+
+# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
+// GCC 4.4 and Clang 2.8 added a few more intrinsics there
+# include <x86intrin.h>
+# endif
+
+# if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__))
+// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros
+// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx
+# define __SSE3__ 1
+# define __SSSE3__ 1
+// no Intel CPU supports SSE4a, so don't define it
+# define __SSE4_1__ 1
+# define __SSE4_2__ 1
+# ifndef __AVX__
+# define __AVX__ 1
+# endif
+# endif
+
+# if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
+// POPCNT instructions:
+// All processors that support SSE4.2 support POPCNT
+// (but neither MSVC nor the Intel compiler define this macro)
+# define __POPCNT__ 1
+# endif
+
+// AVX intrinsics
+# if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
+// AES, PCLMULQDQ instructions:
+// All processors that support AVX support PCLMULQDQ
+// (but neither MSVC nor the Intel compiler define this macro)
+# define __PCLMUL__ 1
+# endif
+
+# if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
+// F16C & RDRAND instructions:
+// All processors that support AVX2 support F16C & RDRAND:
+// (but neither MSVC nor the Intel compiler define these macros)
+# define __F16C__ 1
+# define __RDRND__ 1
+# endif
+
+# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL)
+// BMI2 instructions:
+// All processors that support BMI support BMI2 (and AVX2)
+// (but neither MSVC nor the Intel compiler define this macro)
+# define __BMI2__ 1
+# endif
+
+# include "qsimd_x86_p.h"
+
+// Haswell sub-architecture
+//
+// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
+// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
+// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
+// same features.
+//
+// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
+// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
+# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell"
+# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \
+ defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__)
+# define __haswell__ 1
+# endif
+
+// This constant does not include all CPU features found in a Haswell, only
+// those that we'd have optimized code for.
+// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
+QT_BEGIN_NAMESPACE
+static const quint64 CpuFeatureArchHaswell = 0
+ | CpuFeatureSSE2
+ | CpuFeatureSSE3
+ | CpuFeatureSSSE3
+ | CpuFeatureSSE4_1
+ | CpuFeatureSSE4_2
+ | CpuFeatureFMA
+ | CpuFeaturePOPCNT
+ | CpuFeatureAVX
+ | CpuFeatureF16C
+ | CpuFeatureAVX2
+ | CpuFeatureBMI
+ | CpuFeatureBMI2;
+QT_END_NAMESPACE
+
+#endif /* Q_PROCESSOR_X86 */
+
+// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
+// This should be tweaked with an "upper version" of clang once we know which release fixes the
+// issue. At that point we can rely on __ARM_FEATURE_CRC32 again.
+#if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32)
+# undef __ARM_FEATURE_CRC32
+#endif
+
+// NEON intrinsics
+// note: as of GCC 4.9, does not support function targets for ARM
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include <arm_neon.h>
+#define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available.
+#ifndef __ARM_NEON__
+// __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection.
+#define __ARM_NEON__
+#endif
+#endif
+// AArch64/ARM64
+#if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32)
+#if defined(Q_PROCESSOR_ARM_64)
+// only available on aarch64
+#define QT_FUNCTION_TARGET_STRING_CRC32 "+crc"
+#endif
+# include <arm_acle.h>
+#endif
+
+#ifdef __cplusplus
+#include <qatomic.h>
+
+QT_BEGIN_NAMESPACE
+
+#ifndef Q_PROCESSOR_X86
+enum CPUFeatures {
+#if defined(Q_PROCESSOR_ARM)
+ CpuFeatureNEON = 2,
+ CpuFeatureARM_NEON = CpuFeatureNEON,
+ CpuFeatureCRC32 = 4,
+#elif defined(Q_PROCESSOR_MIPS)
+ CpuFeatureDSP = 2,
+ CpuFeatureDSPR2 = 4,
+#endif
+
+ // used only to indicate that the CPU detection was initialised
+ QSimdInitialized = 1
+};
+
+static const quint64 qCompilerCpuFeatures = 0
+#if defined __ARM_NEON__
+ | CpuFeatureNEON
+#endif
+#if defined __ARM_FEATURE_CRC32
+ | CpuFeatureCRC32
+#endif
+#if defined __mips_dsp
+ | CpuFeatureDSP
+#endif
+#if defined __mips_dspr2
+ | CpuFeatureDSPR2
+#endif
+ ;
+#endif
+
+#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
+extern Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1];
+#else
+extern Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2];
+#endif
+Q_CORE_EXPORT quint64 qDetectCpuFeatures();
+
+#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED)
+Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept;
+#else
+static inline qsizetype qRandomCpu(void *, qsizetype) noexcept
+{
+ return 0;
+}
+#endif
+
+static inline quint64 qCpuFeatures()
+{
+ quint64 features = qt_cpu_features[0].loadRelaxed();
+#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
+ features |= quint64(qt_cpu_features[1].loadRelaxed()) << 32;
+#endif
+ if (Q_UNLIKELY(features == 0)) {
+ features = qDetectCpuFeatures();
+ Q_ASSUME(features != 0);
+ }
+ return features;
+}
+
+#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
+ || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
+
+inline bool qHasHwrng()
+{
+#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
+ return qCpuHasFeature(RDRND);
+#else
+ return false;
+#endif
+}
+
+#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
+ for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
+
+#define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \
+ for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((8 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x7)) & 0x7))); ++i)
+
+QT_END_NAMESPACE
+
+#endif // __cplusplus
+
+#define SIMD_EPILOGUE(i, length, max) \
+ for (int _i = 0; _i < max && i < length; ++i, ++_i)
+
+#endif // QSIMD_P_H
diff --git a/src/corelib/global/qsimd_x86.cpp b/src/corelib/global/qsimd_x86.cpp
new file mode 100644
index 0000000000..be17f44c09
--- /dev/null
+++ b/src/corelib/global/qsimd_x86.cpp
@@ -0,0 +1,155 @@
+/****************************************************************************
+**
+** Copyright (C) 2018 Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#include "qsimd_p.h"
+
+static const char features_string[] =
+ " sse2\0"
+ " sse3\0"
+ " ssse3\0"
+ " fma\0"
+ " sse4.1\0"
+ " sse4.2\0"
+ " movbe\0"
+ " popcnt\0"
+ " aes\0"
+ " avx\0"
+ " f16c\0"
+ " rdrnd\0"
+ " bmi\0"
+ " hle\0"
+ " avx2\0"
+ " bmi2\0"
+ " rtm\0"
+ " avx512f\0"
+ " avx512dq\0"
+ " rdseed\0"
+ " avx512ifma\0"
+ " avx512pf\0"
+ " avx512er\0"
+ " avx512cd\0"
+ " sha\0"
+ " avx512bw\0"
+ " avx512vl\0"
+ " avx512vbmi\0"
+ " avx512vbmi2\0"
+ " gfni\0"
+ " vaes\0"
+ " avx512vnni\0"
+ " avx512bitalg\0"
+ " avx512vpopcntdq\0"
+ " avx5124nniw\0"
+ " avx5124fmaps\0"
+ "\0";
+
+static const quint16 features_indices[] = {
+ 306, 0, 6, 12, 19, 24, 32, 40,
+ 47, 55, 60, 65, 71, 78, 83, 88,
+ 94, 100, 105, 114, 124, 132, 144, 154,
+ 164, 174, 179, 189, 199, 211, 224, 230,
+ 236, 248, 262, 279, 292
+};
+
+enum X86CpuidLeaves {
+ Leaf1ECX,
+ Leaf1EDX,
+ Leaf7_0EBX,
+ Leaf7_0ECX,
+ Leaf7_0EDX,
+ X86CpuidMaxLeaf
+};
+
+static const quint8 x86_locators[] = {
+ Leaf1EDX*32 + 26, // sse2
+ Leaf1ECX*32 + 0, // sse3
+ Leaf1ECX*32 + 9, // ssse3
+ Leaf1ECX*32 + 12, // fma
+ Leaf1ECX*32 + 19, // sse4.1
+ Leaf1ECX*32 + 20, // sse4.2
+ Leaf1ECX*32 + 22, // movbe
+ Leaf1ECX*32 + 23, // popcnt
+ Leaf1ECX*32 + 25, // aes
+ Leaf1ECX*32 + 28, // avx
+ Leaf1ECX*32 + 29, // f16c
+ Leaf1ECX*32 + 30, // rdrnd
+ Leaf7_0EBX*32 + 3, // bmi
+ Leaf7_0EBX*32 + 4, // hle
+ Leaf7_0EBX*32 + 5, // avx2
+ Leaf7_0EBX*32 + 8, // bmi2
+ Leaf7_0EBX*32 + 11, // rtm
+ Leaf7_0EBX*32 + 16, // avx512f
+ Leaf7_0EBX*32 + 17, // avx512dq
+ Leaf7_0EBX*32 + 18, // rdseed
+ Leaf7_0EBX*32 + 21, // avx512ifma
+ Leaf7_0EBX*32 + 26, // avx512pf
+ Leaf7_0EBX*32 + 27, // avx512er
+ Leaf7_0EBX*32 + 28, // avx512cd
+ Leaf7_0EBX*32 + 29, // sha
+ Leaf7_0EBX*32 + 30, // avx512bw
+ Leaf7_0EBX*32 + 31, // avx512vl
+ Leaf7_0ECX*32 + 1, // avx512vbmi
+ Leaf7_0ECX*32 + 6, // avx512vbmi2
+ Leaf7_0ECX*32 + 8, // gfni
+ Leaf7_0ECX*32 + 9, // vaes
+ Leaf7_0ECX*32 + 11, // avx512vnni
+ Leaf7_0ECX*32 + 12, // avx512bitalg
+ Leaf7_0ECX*32 + 14, // avx512vpopcntdq
+ Leaf7_0EDX*32 + 2, // avx5124nniw
+ Leaf7_0EDX*32 + 3 // avx5124fmaps
+};
+
+// List of AVX512 features (see detectProcessorFeatures())
+static const quint64 AllAVX512 = 0
+ | CpuFeatureAVX512F
+ | CpuFeatureAVX512DQ
+ | CpuFeatureAVX512IFMA
+ | CpuFeatureAVX512PF
+ | CpuFeatureAVX512ER
+ | CpuFeatureAVX512CD
+ | CpuFeatureAVX512BW
+ | CpuFeatureAVX512VL
+ | CpuFeatureAVX512VBMI
+ | CpuFeatureAVX512VBMI2
+ | CpuFeatureAVX512VNNI
+ | CpuFeatureAVX512BITALG
+ | CpuFeatureAVX512VPOPCNTDQ
+ | CpuFeatureAVX5124NNIW
+ | CpuFeatureAVX5124FMAPS;
diff --git a/src/corelib/global/qsimd_x86_p.h b/src/corelib/global/qsimd_x86_p.h
new file mode 100644
index 0000000000..82e3008a24
--- /dev/null
+++ b/src/corelib/global/qsimd_x86_p.h
@@ -0,0 +1,261 @@
+/****************************************************************************
+**
+** Copyright (C) 2018 Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#ifndef QSIMD_P_H
+# error "Please include <private/qsimd_p.h> instead"
+#endif
+#ifndef QSIMD_X86_P_H
+#define QSIMD_X86_P_H
+
+#include "qsimd_p.h"
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+QT_BEGIN_NAMESPACE
+
+// used only to indicate that the CPU detection was initialized
+#define QSimdInitialized (Q_UINT64_C(1) << 0)
+
+// in CPUID Leaf 1, EDX:
+#define CpuFeatureSSE2 (Q_UINT64_C(1) << 1)
+#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
+
+// in CPUID Leaf 1, ECX:
+#define CpuFeatureSSE3 (Q_UINT64_C(1) << 2)
+#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
+#define CpuFeatureSSSE3 (Q_UINT64_C(1) << 3)
+#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
+#define CpuFeatureFMA (Q_UINT64_C(1) << 4)
+#define QT_FUNCTION_TARGET_STRING_FMA "fma"
+#define CpuFeatureSSE4_1 (Q_UINT64_C(1) << 5)
+#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
+#define CpuFeatureSSE4_2 (Q_UINT64_C(1) << 6)
+#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
+#define CpuFeatureMOVBE (Q_UINT64_C(1) << 7)
+#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
+#define CpuFeaturePOPCNT (Q_UINT64_C(1) << 8)
+#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
+#define CpuFeatureAES (Q_UINT64_C(1) << 9)
+#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
+#define CpuFeatureAVX (Q_UINT64_C(1) << 10)
+#define QT_FUNCTION_TARGET_STRING_AVX "avx"
+#define CpuFeatureF16C (Q_UINT64_C(1) << 11)
+#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
+#define CpuFeatureRDRND (Q_UINT64_C(1) << 12)
+#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
+
+// in CPUID Leaf 7, Sub-leaf 0, EBX:
+#define CpuFeatureBMI (Q_UINT64_C(1) << 13)
+#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
+#define CpuFeatureHLE (Q_UINT64_C(1) << 14)
+#define QT_FUNCTION_TARGET_STRING_HLE "hle"
+#define CpuFeatureAVX2 (Q_UINT64_C(1) << 15)
+#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
+#define CpuFeatureBMI2 (Q_UINT64_C(1) << 16)
+#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
+#define CpuFeatureRTM (Q_UINT64_C(1) << 17)
+#define QT_FUNCTION_TARGET_STRING_RTM "rtm"
+#define CpuFeatureAVX512F (Q_UINT64_C(1) << 18)
+#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
+#define CpuFeatureAVX512DQ (Q_UINT64_C(1) << 19)
+#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
+#define CpuFeatureRDSEED (Q_UINT64_C(1) << 20)
+#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
+#define CpuFeatureAVX512IFMA (Q_UINT64_C(1) << 21)
+#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
+#define CpuFeatureAVX512PF (Q_UINT64_C(1) << 22)
+#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
+#define CpuFeatureAVX512ER (Q_UINT64_C(1) << 23)
+#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
+#define CpuFeatureAVX512CD (Q_UINT64_C(1) << 24)
+#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
+#define CpuFeatureSHA (Q_UINT64_C(1) << 25)
+#define QT_FUNCTION_TARGET_STRING_SHA "sha"
+#define CpuFeatureAVX512BW (Q_UINT64_C(1) << 26)
+#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
+#define CpuFeatureAVX512VL (Q_UINT64_C(1) << 27)
+#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
+
+// in CPUID Leaf 7, Sub-leaf 0, ECX:
+#define CpuFeatureAVX512VBMI (Q_UINT64_C(1) << 28)
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
+#define CpuFeatureAVX512VBMI2 (Q_UINT64_C(1) << 29)
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2"
+#define CpuFeatureGFNI (Q_UINT64_C(1) << 30)
+#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
+#define CpuFeatureVAES (Q_UINT64_C(1) << 31)
+#define QT_FUNCTION_TARGET_STRING_VAES "vaes"
+#define CpuFeatureAVX512VNNI (Q_UINT64_C(1) << 32)
+#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni"
+#define CpuFeatureAVX512BITALG (Q_UINT64_C(1) << 33)
+#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg"
+#define CpuFeatureAVX512VPOPCNTDQ (Q_UINT64_C(1) << 34)
+#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq"
+
+// in CPUID Leaf 7, Sub-leaf 0, EDX:
+#define CpuFeatureAVX5124NNIW (Q_UINT64_C(1) << 35)
+#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
+#define CpuFeatureAVX5124FMAPS (Q_UINT64_C(1) << 36)
+#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
+
+static const quint64 qCompilerCpuFeatures = 0
+#ifdef __SSE2__
+ | CpuFeatureSSE2
+#endif
+#ifdef __SSE3__
+ | CpuFeatureSSE3
+#endif
+#ifdef __SSSE3__
+ | CpuFeatureSSSE3
+#endif
+#ifdef __FMA__
+ | CpuFeatureFMA
+#endif
+#ifdef __SSE4_1__
+ | CpuFeatureSSE4_1
+#endif
+#ifdef __SSE4_2__
+ | CpuFeatureSSE4_2
+#endif
+#ifdef __MOVBE__
+ | CpuFeatureMOVBE
+#endif
+#ifdef __POPCNT__
+ | CpuFeaturePOPCNT
+#endif
+#ifdef __AES__
+ | CpuFeatureAES
+#endif
+#ifdef __AVX__
+ | CpuFeatureAVX
+#endif
+#ifdef __F16C__
+ | CpuFeatureF16C
+#endif
+#ifdef __RDRND__
+ | CpuFeatureRDRND
+#endif
+#ifdef __BMI__
+ | CpuFeatureBMI
+#endif
+#ifdef __HLE__
+ | CpuFeatureHLE
+#endif
+#ifdef __AVX2__
+ | CpuFeatureAVX2
+#endif
+#ifdef __BMI2__
+ | CpuFeatureBMI2
+#endif
+#ifdef __RTM__
+ | CpuFeatureRTM
+#endif
+#ifdef __AVX512F__
+ | CpuFeatureAVX512F
+#endif
+#ifdef __AVX512DQ__
+ | CpuFeatureAVX512DQ
+#endif
+#ifdef __RDSEED__
+ | CpuFeatureRDSEED
+#endif
+#ifdef __AVX512IFMA__
+ | CpuFeatureAVX512IFMA
+#endif
+#ifdef __AVX512PF__
+ | CpuFeatureAVX512PF
+#endif
+#ifdef __AVX512ER__
+ | CpuFeatureAVX512ER
+#endif
+#ifdef __AVX512CD__
+ | CpuFeatureAVX512CD
+#endif
+#ifdef __SHA__
+ | CpuFeatureSHA
+#endif
+#ifdef __AVX512BW__
+ | CpuFeatureAVX512BW
+#endif
+#ifdef __AVX512VL__
+ | CpuFeatureAVX512VL
+#endif
+#ifdef __AVX512VBMI__
+ | CpuFeatureAVX512VBMI
+#endif
+#ifdef __AVX512VBMI2__
+ | CpuFeatureAVX512VBMI2
+#endif
+#ifdef __GFNI__
+ | CpuFeatureGFNI
+#endif
+#ifdef __VAES__
+ | CpuFeatureVAES
+#endif
+#ifdef __AVX512VNNI__
+ | CpuFeatureAVX512VNNI
+#endif
+#ifdef __AVX512BITALG__
+ | CpuFeatureAVX512BITALG
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+ | CpuFeatureAVX512VPOPCNTDQ
+#endif
+#ifdef __AVX5124NNIW__
+ | CpuFeatureAVX5124NNIW
+#endif
+#ifdef __AVX5124FMAPS__
+ | CpuFeatureAVX5124FMAPS
+#endif
+ ;
+
+QT_END_NAMESPACE
+
+#endif // QSIMD_X86_P_H