From f2f32b88b873b8b18cc1b9e1f66135e6d09b9286 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Sat, 29 Feb 2020 20:11:39 +0100 Subject: Move qsimd* from corelib/tools to corelib/global It's not used in tools at all and fits a lot better in global. Also fix the qsimd_x86* files to have a proper copyright header. Change-Id: Id3d8e7cfcd7769a1ca9f3d8cf6d357a31a99ba40 Reviewed-by: Thiago Macieira Reviewed-by: Lars Knoll --- src/corelib/.prev_CMakeLists.txt | 2 +- src/corelib/CMakeLists.txt | 2 +- src/corelib/global/global.pri | 2 + src/corelib/global/qsimd.cpp | 718 +++++++++++++++++++++++++++++++++++++++ src/corelib/global/qsimd_p.h | 396 +++++++++++++++++++++ src/corelib/global/qsimd_x86.cpp | 155 +++++++++ src/corelib/global/qsimd_x86_p.h | 261 ++++++++++++++ src/corelib/tools/qsimd.cpp | 718 --------------------------------------- src/corelib/tools/qsimd_p.h | 396 --------------------- src/corelib/tools/qsimd_x86.cpp | 116 ------- src/corelib/tools/qsimd_x86_p.h | 222 ------------ src/corelib/tools/tools.pri | 2 - util/x86simdgen/generate.pl | 88 ++++- 13 files changed, 1617 insertions(+), 1461 deletions(-) create mode 100644 src/corelib/global/qsimd.cpp create mode 100644 src/corelib/global/qsimd_p.h create mode 100644 src/corelib/global/qsimd_x86.cpp create mode 100644 src/corelib/global/qsimd_x86_p.h delete mode 100644 src/corelib/tools/qsimd.cpp delete mode 100644 src/corelib/tools/qsimd_p.h delete mode 100644 src/corelib/tools/qsimd_x86.cpp delete mode 100644 src/corelib/tools/qsimd_x86_p.h diff --git a/src/corelib/.prev_CMakeLists.txt b/src/corelib/.prev_CMakeLists.txt index 5d31f4c33a..73fc757c73 100644 --- a/src/corelib/.prev_CMakeLists.txt +++ b/src/corelib/.prev_CMakeLists.txt @@ -29,6 +29,7 @@ qt_add_module(Core global/qoperatingsystemversion.cpp global/qoperatingsystemversion.h global/qoperatingsystemversion_p.h global/qprocessordetection.h global/qrandom.cpp global/qrandom.h global/qrandom_p.h + global/qsimd.cpp global/qsimd_p.h global/qsysinfo.h global/qsystemdetection.h global/qtypeinfo.h @@ -211,7 +212,6 @@ qt_add_module(Core tools/qshareddata.cpp tools/qshareddata.h tools/qsharedpointer.cpp tools/qsharedpointer.h tools/qsharedpointer_impl.h - tools/qsimd.cpp tools/qsimd_p.h tools/qsize.cpp tools/qsize.h tools/qstack.h tools/qtaggedpointer.h diff --git a/src/corelib/CMakeLists.txt b/src/corelib/CMakeLists.txt index e28df10290..53d8de6668 100644 --- a/src/corelib/CMakeLists.txt +++ b/src/corelib/CMakeLists.txt @@ -45,6 +45,7 @@ qt_add_module(Core global/qoperatingsystemversion.cpp global/qoperatingsystemversion.h global/qoperatingsystemversion_p.h global/qprocessordetection.h global/qrandom.cpp global/qrandom.h global/qrandom_p.h + global/qsimd.cpp global/qsimd_p.h global/qsysinfo.h global/qsystemdetection.h global/qtypeinfo.h @@ -227,7 +228,6 @@ qt_add_module(Core tools/qshareddata.cpp tools/qshareddata.h tools/qsharedpointer.cpp tools/qsharedpointer.h tools/qsharedpointer_impl.h - tools/qsimd.cpp tools/qsimd_p.h tools/qsize.cpp tools/qsize.h tools/qstack.h tools/qtaggedpointer.h diff --git a/src/corelib/global/global.pri b/src/corelib/global/global.pri index 1da69aba9b..389e866987 100644 --- a/src/corelib/global/global.pri +++ b/src/corelib/global/global.pri @@ -20,6 +20,7 @@ HEADERS += \ global/qtypeinfo.h \ global/qsysinfo.h \ global/qisenum.h \ + global/qsimd_p.h \ global/qtypetraits.h \ global/qflags.h \ global/qrandom.h \ @@ -38,6 +39,7 @@ SOURCES += \ global/qoperatingsystemversion.cpp \ global/qlogging.cpp \ global/qrandom.cpp \ + global/qsimd.cpp \ global/qhooks.cpp # To get listed in IDEs diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp new file mode 100644 index 0000000000..75c380ee8a --- /dev/null +++ b/src/corelib/global/qsimd.cpp @@ -0,0 +1,718 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2019 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qsimd_p.h" +#include "qalgorithms.h" +#include +#include + +#ifdef Q_OS_LINUX +# include "../testlib/3rdparty/valgrind_p.h" +#endif + +#if defined(Q_OS_WIN) +# if !defined(Q_CC_GNU) +# include +# endif +#elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32)) +#include "private/qcore_unix_p.h" + +// the kernel header definitions for HWCAP_* +// (the ones we need/may need anyway) + +// copied from (ARM) +#define HWCAP_CRUNCH 1024 +#define HWCAP_THUMBEE 2048 +#define HWCAP_NEON 4096 +#define HWCAP_VFPv3 8192 +#define HWCAP_VFPv3D16 16384 + +// copied from (ARM): +#define HWCAP2_CRC32 (1 << 4) + +// copied from (Aarch64) +#define HWCAP_CRC32 (1 << 7) + +// copied from +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ +#define AT_HWCAP2 26 /* extension of AT_HWCAP */ + +#elif defined(Q_CC_GHS) +#include +#endif + +QT_BEGIN_NAMESPACE + +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note + * we remove the terminating -1 that the script adds. + */ + +// begin generated +#if defined(Q_PROCESSOR_ARM) +/* Data: + neon + crc32 + */ +static const char features_string[] = + " neon\0" + " crc32\0" + "\0"; +static const int features_indices[] = { 0, 6 }; +#elif defined(Q_PROCESSOR_MIPS) +/* Data: + dsp + dspr2 +*/ +static const char features_string[] = + " dsp\0" + " dspr2\0" + "\0"; + +static const int features_indices[] = { + 0, 5 +}; +#elif defined(Q_PROCESSOR_X86) +# include "qsimd_x86.cpp" // generated by util/x86simdgen +#else +static const char features_string[] = ""; +static const int features_indices[] = { }; +#endif +// end generated + +#if defined (Q_OS_NACL) +static inline uint detectProcessorFeatures() +{ + return 0; +} +#elif defined(Q_PROCESSOR_ARM) +static inline quint64 detectProcessorFeatures() +{ + quint64 features = 0; + +#if defined(Q_OS_LINUX) +# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) + features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit. +# endif + int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY); + if (auxv != -1) { + unsigned long vector[64]; + int nread; + while (features == 0) { + nread = qt_safe_read(auxv, (char *)vector, sizeof vector); + if (nread <= 0) { + // EOF or error + break; + } + + int max = nread / (sizeof vector[0]); + for (int i = 0; i < max; i += 2) { + if (vector[i] == AT_HWCAP) { +# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) + // For Aarch64: + if (vector[i+1] & HWCAP_CRC32) + features |= Q_UINT64_C(1) << CpuFeatureCRC32; +# endif + // Aarch32, or ARMv7 or before: + if (vector[i+1] & HWCAP_NEON) + features |= Q_UINT64_C(1) << CpuFeatureNEON; + } +# if defined(Q_PROCESSOR_ARM_32) + // For Aarch32: + if (vector[i] == AT_HWCAP2) { + if (vector[i+1] & HWCAP2_CRC32) + features |= Q_UINT64_C(1) << CpuFeatureCRC32; + } +# endif + } + } + + qt_safe_close(auxv); + return features; + } + // fall back if /proc/self/auxv wasn't found +#endif + +#if defined(__ARM_NEON__) + features |= Q_UINT64_C(1) << CpuFeatureNEON; +#endif +#if defined(__ARM_FEATURE_CRC32) + features |= Q_UINT64_C(1) << CpuFeatureCRC32; +#endif + + return features; +} + +#elif defined(Q_PROCESSOR_X86) + +#ifdef Q_PROCESSOR_X86_32 +# define PICreg "%%ebx" +#else +# define PICreg "%%rbx" +#endif + +static bool checkRdrndWorks() noexcept; + +static int maxBasicCpuidSupported() +{ +#if defined(Q_CC_EMSCRIPTEN) + return 6; // All features supported by Emscripten +#elif defined(Q_CC_GNU) + qregisterint tmp1; + +# if Q_PROCESSOR_X86 < 5 + // check if the CPUID instruction is supported + long cpuid_supported; + asm ("pushf\n" + "pop %0\n" + "mov %0, %1\n" + "xor $0x00200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "pop %0\n" + "xor %1, %0\n" // %eax is now 0 if CPUID is not supported + : "=a" (cpuid_supported), "=r" (tmp1) + ); + if (!cpuid_supported) + return 0; +# endif + + int result; + asm ("xchg " PICreg", %1\n" + "cpuid\n" + "xchg " PICreg", %1\n" + : "=&a" (result), "=&r" (tmp1) + : "0" (0) + : "ecx", "edx"); + return result; +#elif defined(Q_OS_WIN) + // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 + int info[4]; + __cpuid(info, 0); + return info[0]; +#elif defined(Q_CC_GHS) + unsigned int info[4]; + __CPUID(0, info); + return info[0]; +#else + return 0; +#endif +} + +static void cpuidFeatures01(uint &ecx, uint &edx) +{ +#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) + qregisterint tmp1; + asm ("xchg " PICreg", %2\n" + "cpuid\n" + "xchg " PICreg", %2\n" + : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) + : "a" (1)); +#elif defined(Q_OS_WIN) + int info[4]; + __cpuid(info, 1); + ecx = info[2]; + edx = info[3]; +#elif defined(Q_CC_GHS) + unsigned int info[4]; + __CPUID(1, info); + ecx = info[2]; + edx = info[3]; +#else + Q_UNUSED(ecx); + Q_UNUSED(edx); +#endif +} + +#ifdef Q_OS_WIN +inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} +#endif + +static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) +{ +#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) + qregisteruint rbx; // in case it's 64-bit + qregisteruint rcx = 0; + qregisteruint rdx = 0; + asm ("xchg " PICreg", %0\n" + "cpuid\n" + "xchg " PICreg", %0\n" + : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) + : "a" (7)); + ebx = rbx; + ecx = rcx; + edx = rdx; +#elif defined(Q_OS_WIN) + int info[4]; + __cpuidex(info, 7, 0); + ebx = info[1]; + ecx = info[2]; + edx = info[3]; +#elif defined(Q_CC_GHS) + unsigned int info[4]; + __CPUIDEX(7, 0, info); + ebx = info[1]; + ecx = info[2]; + edx = info[3]; +#else + Q_UNUSED(ebx); + Q_UNUSED(ecx); + Q_UNUSED(edx); +#endif +} + +#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) +// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); +inline quint64 _xgetbv(__int64) { return 0; } +#endif +static void xgetbv(uint in, uint &eax, uint &edx) +{ +#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) + asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction + : "=a" (eax), "=d" (edx) + : "c" (in)); +#elif defined(Q_OS_WIN) + quint64 result = _xgetbv(in); + eax = result; + edx = result >> 32; +#else + Q_UNUSED(in); + Q_UNUSED(eax); + Q_UNUSED(edx); +#endif +} + +static quint64 detectProcessorFeatures() +{ + // Flags from the CR0 / XCR0 state register + enum XCR0Flags { + X87 = 1 << 0, + XMM0_15 = 1 << 1, + YMM0_15Hi128 = 1 << 2, + BNDRegs = 1 << 3, + BNDCSR = 1 << 4, + OpMask = 1 << 5, + ZMM0_15Hi256 = 1 << 6, + ZMM16_31 = 1 << 7, + + SSEState = XMM0_15, + AVXState = XMM0_15 | YMM0_15Hi128, + AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 + }; + static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512; + static const quint64 AllAVX = CpuFeatureAVX | AllAVX2; + + quint64 features = 0; + int cpuidLevel = maxBasicCpuidSupported(); +#if Q_PROCESSOR_X86 < 5 + if (cpuidLevel < 1) + return 0; +#else + Q_ASSERT(cpuidLevel >= 1); +#endif + + uint results[X86CpuidMaxLeaf] = {}; + cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); + if (cpuidLevel >= 7) + cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); + + // populate our feature list + for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { + uint word = x86_locators[i] / 32; + uint bit = 1U << (x86_locators[i] % 32); + quint64 feature = Q_UINT64_C(1) << (i + 1); + if (results[word] & bit) + features |= feature; + } + + // now check the AVX state + uint xgetbvA = 0, xgetbvD = 0; + if (results[Leaf1ECX] & (1u << 27)) { + // XGETBV enabled + xgetbv(0, xgetbvA, xgetbvD); + } + + if ((xgetbvA & AVXState) != AVXState) { + // support for YMM registers is disabled, disable all AVX + features &= ~AllAVX; + } else if ((xgetbvA & AVX512State) != AVX512State) { + // support for ZMM registers or mask registers is disabled, disable all AVX512 + features &= ~AllAVX512; + } + + if (features & CpuFeatureRDRND && !checkRdrndWorks()) + features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED); + + return features; +} + +#elif defined(Q_PROCESSOR_MIPS_32) + +#if defined(Q_OS_LINUX) +// +// Do not use QByteArray: it could use SIMD instructions itself at +// some point, thus creating a recursive dependency. Instead, use a +// QSimpleBuffer, which has the bare minimum needed to use memory +// dynamically and read lines from /proc/cpuinfo of arbitrary sizes. +// +struct QSimpleBuffer { + static const int chunk_size = 256; + char *data; + unsigned alloc; + unsigned size; + + QSimpleBuffer(): data(0), alloc(0), size(0) {} + ~QSimpleBuffer() { ::free(data); } + + void resize(unsigned newsize) { + if (newsize > alloc) { + unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); + if (newalloc < newsize) newalloc = newsize; + if (newalloc != alloc) { + data = static_cast(::realloc(data, newalloc)); + alloc = newalloc; + } + } + size = newsize; + } + void append(const QSimpleBuffer &other, unsigned appendsize) { + unsigned oldsize = size; + resize(oldsize + appendsize); + ::memcpy(data + oldsize, other.data, appendsize); + } + void popleft(unsigned amount) { + if (amount >= size) return resize(0); + size -= amount; + ::memmove(data, data + amount, size); + } + char* cString() { + if (!alloc) resize(1); + return (data[size] = '\0', data); + } +}; + +// +// Uses a scratch "buffer" (which must be used for all reads done in the +// same file descriptor) to read chunks of data from a file, to read +// one line at a time. Lines include the trailing newline character ('\n'). +// On EOF, line.size is zero. +// +static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) +{ + for (;;) { + char *newline = static_cast(::memchr(buffer.data, '\n', buffer.size)); + if (newline) { + unsigned piece_size = newline - buffer.data + 1; + line.append(buffer, piece_size); + buffer.popleft(piece_size); + line.resize(line.size - 1); + return; + } + if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { + int oldsize = buffer.size; + buffer.resize(buffer.size + QSimpleBuffer::chunk_size); + buffer.size = oldsize; + } + ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); + if (read_bytes > 0) buffer.size += read_bytes; + else return; + } +} + +// +// Checks if any line with a given prefix from /proc/cpuinfo contains +// a certain string, surrounded by spaces. +// +static bool procCpuinfoContains(const char *prefix, const char *string) +{ + int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY); + if (cpuinfo_fd == -1) + return false; + + unsigned string_len = ::strlen(string); + unsigned prefix_len = ::strlen(prefix); + QSimpleBuffer line, buffer; + bool present = false; + do { + line.resize(0); + bufReadLine(cpuinfo_fd, line, buffer); + char *colon = static_cast(::memchr(line.data, ':', line.size)); + if (colon && line.size > prefix_len + string_len) { + if (!::strncmp(prefix, line.data, prefix_len)) { + // prefix matches, next character must be ':' or space + if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { + // Does it contain the string? + char *found = ::strstr(line.cString(), string); + if (found && ::isspace(found[-1]) && + (::isspace(found[string_len]) || found[string_len] == '\0')) { + present = true; + break; + } + } + } + } + } while (line.size); + + ::qt_safe_close(cpuinfo_fd); + return present; +} +#endif + +static inline quint64 detectProcessorFeatures() +{ + // NOTE: MIPS 74K cores are the only ones supporting DSPr2. + quint64 flags = 0; + +#if defined __mips_dsp + flags |= Q_UINT64_C(1) << CpuFeatureDSP; +# if defined __mips_dsp_rev && __mips_dsp_rev >= 2 + flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; +# elif defined(Q_OS_LINUX) + if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) + flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; +# endif +#elif defined(Q_OS_LINUX) + if (procCpuinfoContains("ASEs implemented", "dsp")) { + flags |= Q_UINT64_C(1) << CpuFeatureDSP; + if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) + flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; + } +#endif + + return flags; +} + +#else +static inline uint detectProcessorFeatures() +{ + return 0; +} +#endif + +static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); + +// record what CPU features were enabled by default in this Qt build +static const quint64 minFeature = qCompilerCpuFeatures; + +#ifdef Q_ATOMIC_INT64_IS_SUPPORTED +Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) }; +#else +Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) }; +#endif + +quint64 qDetectCpuFeatures() +{ + quint64 f = detectProcessorFeatures(); + QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); + if (!disable.isEmpty()) { + disable.prepend(' '); + for (int i = 0; i < features_count; ++i) { + if (disable.contains(features_string + features_indices[i])) + f &= ~(Q_UINT64_C(1) << i); + } + } + +#ifdef RUNNING_ON_VALGRIND + bool runningOnValgrind = RUNNING_ON_VALGRIND; +#else + bool runningOnValgrind = false; +#endif + if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) { + quint64 missing = minFeature & ~f; + fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n "); + for (int i = 0; i < features_count; ++i) { + if (missing & (Q_UINT64_C(1) << i)) + fprintf(stderr, "%s", features_string + features_indices[i]); + } + fprintf(stderr, "\n"); + fflush(stderr); + qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing, + features_string + features_indices[qCountTrailingZeroBits(missing)]); + } + + qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized)); +#ifndef Q_ATOMIC_INT64_IS_SUPPORTED + qt_cpu_features[1].storeRelaxed(f >> 32); +#endif + return f; +} + +void qDumpCPUFeatures() +{ + quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized); + printf("Processor features: "); + for (int i = 0; i < features_count; ++i) { + if (features & (Q_UINT64_C(1) << i)) + printf("%s%s", features_string + features_indices[i], + minFeature & (Q_UINT64_C(1) << i) ? "[required]" : ""); + } + if ((features = (qCompilerCpuFeatures & ~features))) { + printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:"); + for (int i = 0; i < features_count; ++i) { + if (features & (Q_UINT64_C(1) << i)) + printf("%s", features_string + features_indices[i]); + } + printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!"); + } + puts(""); +} + +#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) + +# ifdef Q_PROCESSOR_X86_64 +# define _rdrandXX_step _rdrand64_step +# define _rdseedXX_step _rdseed64_step +# else +# define _rdrandXX_step _rdrand32_step +# define _rdseedXX_step _rdseed32_step +# endif + +# if QT_COMPILER_SUPPORTS_HERE(RDSEED) +static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept +{ + // Unlike for the RDRAND code below, the Intel whitepaper describing the + // use of the RDSEED instruction indicates we should not retry in a loop. + // If the independent bit generator used by RDSEED is out of entropy, it + // may take time to replenish. + // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide + while (ptr + sizeof(qregisteruint)/sizeof(*ptr) <= end) { + if (_rdseedXX_step(reinterpret_cast(ptr)) == 0) + goto out; + ptr += sizeof(qregisteruint)/sizeof(*ptr); + } + + if (sizeof(*ptr) != sizeof(qregisteruint) && ptr != end) { + if (_rdseed32_step(ptr) == 0) + goto out; + ++ptr; + } + +out: + return ptr; +} +# else +static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *) +{ + return ptr; +} +# endif + +static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept +{ + int retries = 10; + while (ptr + sizeof(qregisteruint)/sizeof(*ptr) <= end) { + if (_rdrandXX_step(reinterpret_cast(ptr))) + ptr += sizeof(qregisteruint)/sizeof(*ptr); + else if (--retries == 0) + goto out; + } + + while (sizeof(*ptr) != sizeof(qregisteruint) && ptr != end) { + bool ok = _rdrand32_step(ptr); + if (!ok && --retries) + continue; + if (ok) + ++ptr; + break; + } + +out: + return ptr; +} + +static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept +{ + /* + * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a + * failing random generation instruction, which always returns + * 0xffffffff, even when generation was "successful". + * + * This code checks if hardware random generator generates four consecutive + * equal numbers. If it does, then we probably have a failing one and + * should disable it completely. + * + * https://bugreports.qt.io/browse/QTBUG-69423 + */ + constexpr qsizetype TestBufferSize = 4; + unsigned testBuffer[TestBufferSize] = {}; + + unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize); + if (end < testBuffer + 3) { + // Random generation didn't produce enough data for us to make a + // determination whether it's working or not. Assume it isn't, but + // don't print a warning. + return false; + } + + // Check the results for equality + if (testBuffer[0] == testBuffer[1] + && testBuffer[0] == testBuffer[2] + && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) { + fprintf(stderr, "WARNING: CPU random generator seem to be failing, " + "disabling hardware random number generation\n" + "WARNING: RDRND generated:"); + for (unsigned *ptr = testBuffer; ptr < end; ++ptr) + fprintf(stderr, " 0x%x", *ptr); + fprintf(stderr, "\n"); + return false; + } + + // We're good + return true; +} + +QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept +{ + unsigned *ptr = reinterpret_cast(buffer); + unsigned *end = ptr + count; + + if (qCpuHasFeature(RDSEED)) + ptr = qt_random_rdseed(ptr, end); + + // fill the buffer with RDRND if RDSEED didn't + ptr = qt_random_rdrnd(ptr, end); + return ptr - reinterpret_cast(buffer); +} +#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM) +static bool checkRdrndWorks() noexcept { return false; } +#endif // Q_PROCESSOR_X86 && RDRND + +QT_END_NAMESPACE diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h new file mode 100644 index 0000000000..26e98c4542 --- /dev/null +++ b/src/corelib/global/qsimd_p.h @@ -0,0 +1,396 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QSIMD_P_H +#define QSIMD_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include + +/* + * qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros. + * They mean the compiler supports the necessary flags and the headers + * for the x86 and ARM intrinsics: + * - GCC: the -mXXX or march=YYY flag is necessary before #include + * up to 4.8; GCC >= 4.9 can include unconditionally + * - Intel CC: #include can happen unconditionally + * - MSVC: #include can happen unconditionally + * - RVCT: ??? + * + * We will try to include all headers possible under this configuration. + * + * MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 & + * up do define __AVX__ if the -arch:AVX option is passed on the command-line. + * + * Supported XXX are: + * Flag | Arch | GCC | Intel CC | MSVC | + * ARM_NEON | ARM | I & C | None | ? | + * SSE2 | x86 | I & C | I & C | I & C | + * SSE3 | x86 | I & C | I & C | I only | + * SSSE3 | x86 | I & C | I & C | I only | + * SSE4_1 | x86 | I & C | I & C | I only | + * SSE4_2 | x86 | I & C | I & C | I only | + * AVX | x86 | I & C | I & C | I & C | + * AVX2 | x86 | I & C | I & C | I only | + * AVX512xx | x86 | I & C | I & C | I only | + * I = intrinsics; C = code generation + * + * Code can use the following constructs to determine compiler support & status: + * - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__) + * If this test passes, then the compiler is already generating code for that + * given sub-architecture. The intrinsics for that sub-architecture are + * #included and can be used without restriction or runtime check. + * + * - #if QT_COMPILER_SUPPORTS(XXX) + * If this test passes, then the compiler is able to generate code for that + * given sub-architecture in another translation unit, given the right set of + * flags. Use of the intrinsics is not guaranteed. This is useful with + * runtime detection (see below). + * + * - #if QT_COMPILER_SUPPORTS_HERE(XXX) + * If this test passes, then the compiler is able to generate code for that + * given sub-architecture in this translation unit, even if it is not doing + * that now (it might be). Individual functions may be tagged with + * QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that + * sub-arch. Only inside such functions is the use of the intrisics + * guaranteed to work. This is useful with runtime detection (see below). + * + * Runtime detection of a CPU sub-architecture can be done with the + * qCpuHasFeature(XXX) function. There are two strategies for generating + * optimized code like that: + * + * 1) place the optimized code in a different translation unit (C or assembly + * sources) and pass the correct flags to the compiler to enable support. Those + * sources must not include qglobal.h, which means they cannot include this + * file either. The dispatcher function would look like this: + * + * void foo() + * { + * #if QT_COMPILER_SUPPORTS(XXX) + * if (qCpuHasFeature(XXX)) { + * foo_optimized_xxx(); + * return; + * } + * #endif + * foo_plain(); + * } + * + * 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and + * surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use + * other Qt code. The dispatcher function would look like this: + * + * void foo() + * { + * #if QT_COMPILER_SUPPORTS_HERE(XXX) + * if (qCpuHasFeature(XXX)) { + * foo_optimized_xxx(); + * return; + * } + * #endif + * foo_plain(); + * } + */ + +#if defined(__MINGW64_VERSION_MAJOR) || defined(Q_CC_MSVC) +#include +#endif + +#define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0) + +#if defined(Q_PROCESSOR_ARM) +# define QT_COMPILER_SUPPORTS_HERE(x) (__ARM_FEATURE_ ## x) +# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600 + /* GCC requires attributes for a function */ +# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) +# else +# define QT_FUNCTION_TARGET(x) +# endif +# if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__) +# define __ARM_FEATURE_NEON // also support QT_COMPILER_SUPPORTS_HERE(NEON) +# endif +#elif defined(Q_PROCESSOR_MIPS) +# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) +# define QT_FUNCTION_TARGET(x) +# if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32) +# define __MIPS_DSP__ +# endif +# if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32) +# define __MIPS_DSPR2__ +# endif +#elif defined(Q_PROCESSOR_X86) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) +# define QT_COMPILER_SUPPORTS_HERE(x) ((__ ## x ## __) || QT_COMPILER_SUPPORTS(x)) +# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) + /* GCC requires attributes for a function */ +# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) +# else +# define QT_FUNCTION_TARGET(x) +# endif +#else +# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) +# define QT_FUNCTION_TARGET(x) +#endif + +#ifdef Q_PROCESSOR_X86 +/* -- x86 intrinsic support -- */ + +# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2) +// MSVC doesn't define __SSE2__, so do it ourselves +# define __SSE__ 1 +# define __SSE2__ 1 +# endif + +# ifdef __SSE2__ +// #include the intrinsics +# include +# endif + +# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) +// GCC 4.4 and Clang 2.8 added a few more intrinsics there +# include +# endif + +# if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__)) +// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros +// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx +# define __SSE3__ 1 +# define __SSSE3__ 1 +// no Intel CPU supports SSE4a, so don't define it +# define __SSE4_1__ 1 +# define __SSE4_2__ 1 +# ifndef __AVX__ +# define __AVX__ 1 +# endif +# endif + +# if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) +// POPCNT instructions: +// All processors that support SSE4.2 support POPCNT +// (but neither MSVC nor the Intel compiler define this macro) +# define __POPCNT__ 1 +# endif + +// AVX intrinsics +# if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) +// AES, PCLMULQDQ instructions: +// All processors that support AVX support PCLMULQDQ +// (but neither MSVC nor the Intel compiler define this macro) +# define __PCLMUL__ 1 +# endif + +# if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) +// F16C & RDRAND instructions: +// All processors that support AVX2 support F16C & RDRAND: +// (but neither MSVC nor the Intel compiler define these macros) +# define __F16C__ 1 +# define __RDRND__ 1 +# endif + +# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL) +// BMI2 instructions: +// All processors that support BMI support BMI2 (and AVX2) +// (but neither MSVC nor the Intel compiler define this macro) +# define __BMI2__ 1 +# endif + +# include "qsimd_x86_p.h" + +// Haswell sub-architecture +// +// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2, +// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a +// sub-target for us. The first AMD processor with AVX2 support (Zen) has the +// same features. +// +// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc +// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell). +# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell" +# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \ + defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__) +# define __haswell__ 1 +# endif + +// This constant does not include all CPU features found in a Haswell, only +// those that we'd have optimized code for. +// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode. +QT_BEGIN_NAMESPACE +static const quint64 CpuFeatureArchHaswell = 0 + | CpuFeatureSSE2 + | CpuFeatureSSE3 + | CpuFeatureSSSE3 + | CpuFeatureSSE4_1 + | CpuFeatureSSE4_2 + | CpuFeatureFMA + | CpuFeaturePOPCNT + | CpuFeatureAVX + | CpuFeatureF16C + | CpuFeatureAVX2 + | CpuFeatureBMI + | CpuFeatureBMI2; +QT_END_NAMESPACE + +#endif /* Q_PROCESSOR_X86 */ + +// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html +// This should be tweaked with an "upper version" of clang once we know which release fixes the +// issue. At that point we can rely on __ARM_FEATURE_CRC32 again. +#if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32) +# undef __ARM_FEATURE_CRC32 +#endif + +// NEON intrinsics +// note: as of GCC 4.9, does not support function targets for ARM +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include +#define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available. +#ifndef __ARM_NEON__ +// __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection. +#define __ARM_NEON__ +#endif +#endif +// AArch64/ARM64 +#if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32) +#if defined(Q_PROCESSOR_ARM_64) +// only available on aarch64 +#define QT_FUNCTION_TARGET_STRING_CRC32 "+crc" +#endif +# include +#endif + +#ifdef __cplusplus +#include + +QT_BEGIN_NAMESPACE + +#ifndef Q_PROCESSOR_X86 +enum CPUFeatures { +#if defined(Q_PROCESSOR_ARM) + CpuFeatureNEON = 2, + CpuFeatureARM_NEON = CpuFeatureNEON, + CpuFeatureCRC32 = 4, +#elif defined(Q_PROCESSOR_MIPS) + CpuFeatureDSP = 2, + CpuFeatureDSPR2 = 4, +#endif + + // used only to indicate that the CPU detection was initialised + QSimdInitialized = 1 +}; + +static const quint64 qCompilerCpuFeatures = 0 +#if defined __ARM_NEON__ + | CpuFeatureNEON +#endif +#if defined __ARM_FEATURE_CRC32 + | CpuFeatureCRC32 +#endif +#if defined __mips_dsp + | CpuFeatureDSP +#endif +#if defined __mips_dspr2 + | CpuFeatureDSPR2 +#endif + ; +#endif + +#ifdef Q_ATOMIC_INT64_IS_SUPPORTED +extern Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[1]; +#else +extern Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[2]; +#endif +Q_CORE_EXPORT quint64 qDetectCpuFeatures(); + +#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED) +Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept; +#else +static inline qsizetype qRandomCpu(void *, qsizetype) noexcept +{ + return 0; +} +#endif + +static inline quint64 qCpuFeatures() +{ + quint64 features = qt_cpu_features[0].loadRelaxed(); +#ifndef Q_ATOMIC_INT64_IS_SUPPORTED + features |= quint64(qt_cpu_features[1].loadRelaxed()) << 32; +#endif + if (Q_UNLIKELY(features == 0)) { + features = qDetectCpuFeatures(); + Q_ASSUME(features != 0); + } + return features; +} + +#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \ + || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature)) + +inline bool qHasHwrng() +{ +#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) + return qCpuHasFeature(RDRND); +#else + return false; +#endif +} + +#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ + for (; i < static_cast(qMin(static_cast(length), ((4 - ((reinterpret_cast(ptr) >> 2) & 0x3)) & 0x3))); ++i) + +#define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \ + for (; i < static_cast(qMin(static_cast(length), ((8 - ((reinterpret_cast(ptr) >> 2) & 0x7)) & 0x7))); ++i) + +QT_END_NAMESPACE + +#endif // __cplusplus + +#define SIMD_EPILOGUE(i, length, max) \ + for (int _i = 0; _i < max && i < length; ++i, ++_i) + +#endif // QSIMD_P_H diff --git a/src/corelib/global/qsimd_x86.cpp b/src/corelib/global/qsimd_x86.cpp new file mode 100644 index 0000000000..be17f44c09 --- /dev/null +++ b/src/corelib/global/qsimd_x86.cpp @@ -0,0 +1,155 @@ +/**************************************************************************** +** +** Copyright (C) 2018 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +// This is a generated file. DO NOT EDIT. +// Please see util/x86simdgen/generate.pl +#include "qsimd_p.h" + +static const char features_string[] = + " sse2\0" + " sse3\0" + " ssse3\0" + " fma\0" + " sse4.1\0" + " sse4.2\0" + " movbe\0" + " popcnt\0" + " aes\0" + " avx\0" + " f16c\0" + " rdrnd\0" + " bmi\0" + " hle\0" + " avx2\0" + " bmi2\0" + " rtm\0" + " avx512f\0" + " avx512dq\0" + " rdseed\0" + " avx512ifma\0" + " avx512pf\0" + " avx512er\0" + " avx512cd\0" + " sha\0" + " avx512bw\0" + " avx512vl\0" + " avx512vbmi\0" + " avx512vbmi2\0" + " gfni\0" + " vaes\0" + " avx512vnni\0" + " avx512bitalg\0" + " avx512vpopcntdq\0" + " avx5124nniw\0" + " avx5124fmaps\0" + "\0"; + +static const quint16 features_indices[] = { + 306, 0, 6, 12, 19, 24, 32, 40, + 47, 55, 60, 65, 71, 78, 83, 88, + 94, 100, 105, 114, 124, 132, 144, 154, + 164, 174, 179, 189, 199, 211, 224, 230, + 236, 248, 262, 279, 292 +}; + +enum X86CpuidLeaves { + Leaf1ECX, + Leaf1EDX, + Leaf7_0EBX, + Leaf7_0ECX, + Leaf7_0EDX, + X86CpuidMaxLeaf +}; + +static const quint8 x86_locators[] = { + Leaf1EDX*32 + 26, // sse2 + Leaf1ECX*32 + 0, // sse3 + Leaf1ECX*32 + 9, // ssse3 + Leaf1ECX*32 + 12, // fma + Leaf1ECX*32 + 19, // sse4.1 + Leaf1ECX*32 + 20, // sse4.2 + Leaf1ECX*32 + 22, // movbe + Leaf1ECX*32 + 23, // popcnt + Leaf1ECX*32 + 25, // aes + Leaf1ECX*32 + 28, // avx + Leaf1ECX*32 + 29, // f16c + Leaf1ECX*32 + 30, // rdrnd + Leaf7_0EBX*32 + 3, // bmi + Leaf7_0EBX*32 + 4, // hle + Leaf7_0EBX*32 + 5, // avx2 + Leaf7_0EBX*32 + 8, // bmi2 + Leaf7_0EBX*32 + 11, // rtm + Leaf7_0EBX*32 + 16, // avx512f + Leaf7_0EBX*32 + 17, // avx512dq + Leaf7_0EBX*32 + 18, // rdseed + Leaf7_0EBX*32 + 21, // avx512ifma + Leaf7_0EBX*32 + 26, // avx512pf + Leaf7_0EBX*32 + 27, // avx512er + Leaf7_0EBX*32 + 28, // avx512cd + Leaf7_0EBX*32 + 29, // sha + Leaf7_0EBX*32 + 30, // avx512bw + Leaf7_0EBX*32 + 31, // avx512vl + Leaf7_0ECX*32 + 1, // avx512vbmi + Leaf7_0ECX*32 + 6, // avx512vbmi2 + Leaf7_0ECX*32 + 8, // gfni + Leaf7_0ECX*32 + 9, // vaes + Leaf7_0ECX*32 + 11, // avx512vnni + Leaf7_0ECX*32 + 12, // avx512bitalg + Leaf7_0ECX*32 + 14, // avx512vpopcntdq + Leaf7_0EDX*32 + 2, // avx5124nniw + Leaf7_0EDX*32 + 3 // avx5124fmaps +}; + +// List of AVX512 features (see detectProcessorFeatures()) +static const quint64 AllAVX512 = 0 + | CpuFeatureAVX512F + | CpuFeatureAVX512DQ + | CpuFeatureAVX512IFMA + | CpuFeatureAVX512PF + | CpuFeatureAVX512ER + | CpuFeatureAVX512CD + | CpuFeatureAVX512BW + | CpuFeatureAVX512VL + | CpuFeatureAVX512VBMI + | CpuFeatureAVX512VBMI2 + | CpuFeatureAVX512VNNI + | CpuFeatureAVX512BITALG + | CpuFeatureAVX512VPOPCNTDQ + | CpuFeatureAVX5124NNIW + | CpuFeatureAVX5124FMAPS; diff --git a/src/corelib/global/qsimd_x86_p.h b/src/corelib/global/qsimd_x86_p.h new file mode 100644 index 0000000000..82e3008a24 --- /dev/null +++ b/src/corelib/global/qsimd_x86_p.h @@ -0,0 +1,261 @@ +/**************************************************************************** +** +** Copyright (C) 2018 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +// This is a generated file. DO NOT EDIT. +// Please see util/x86simdgen/generate.pl +#ifndef QSIMD_P_H +# error "Please include instead" +#endif +#ifndef QSIMD_X86_P_H +#define QSIMD_X86_P_H + +#include "qsimd_p.h" + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +QT_BEGIN_NAMESPACE + +// used only to indicate that the CPU detection was initialized +#define QSimdInitialized (Q_UINT64_C(1) << 0) + +// in CPUID Leaf 1, EDX: +#define CpuFeatureSSE2 (Q_UINT64_C(1) << 1) +#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" + +// in CPUID Leaf 1, ECX: +#define CpuFeatureSSE3 (Q_UINT64_C(1) << 2) +#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" +#define CpuFeatureSSSE3 (Q_UINT64_C(1) << 3) +#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3" +#define CpuFeatureFMA (Q_UINT64_C(1) << 4) +#define QT_FUNCTION_TARGET_STRING_FMA "fma" +#define CpuFeatureSSE4_1 (Q_UINT64_C(1) << 5) +#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1" +#define CpuFeatureSSE4_2 (Q_UINT64_C(1) << 6) +#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2" +#define CpuFeatureMOVBE (Q_UINT64_C(1) << 7) +#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe" +#define CpuFeaturePOPCNT (Q_UINT64_C(1) << 8) +#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt" +#define CpuFeatureAES (Q_UINT64_C(1) << 9) +#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2" +#define CpuFeatureAVX (Q_UINT64_C(1) << 10) +#define QT_FUNCTION_TARGET_STRING_AVX "avx" +#define CpuFeatureF16C (Q_UINT64_C(1) << 11) +#define QT_FUNCTION_TARGET_STRING_F16C "f16c" +#define CpuFeatureRDRND (Q_UINT64_C(1) << 12) +#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd" + +// in CPUID Leaf 7, Sub-leaf 0, EBX: +#define CpuFeatureBMI (Q_UINT64_C(1) << 13) +#define QT_FUNCTION_TARGET_STRING_BMI "bmi" +#define CpuFeatureHLE (Q_UINT64_C(1) << 14) +#define QT_FUNCTION_TARGET_STRING_HLE "hle" +#define CpuFeatureAVX2 (Q_UINT64_C(1) << 15) +#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2" +#define CpuFeatureBMI2 (Q_UINT64_C(1) << 16) +#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" +#define CpuFeatureRTM (Q_UINT64_C(1) << 17) +#define QT_FUNCTION_TARGET_STRING_RTM "rtm" +#define CpuFeatureAVX512F (Q_UINT64_C(1) << 18) +#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f" +#define CpuFeatureAVX512DQ (Q_UINT64_C(1) << 19) +#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq" +#define CpuFeatureRDSEED (Q_UINT64_C(1) << 20) +#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" +#define CpuFeatureAVX512IFMA (Q_UINT64_C(1) << 21) +#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma" +#define CpuFeatureAVX512PF (Q_UINT64_C(1) << 22) +#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf" +#define CpuFeatureAVX512ER (Q_UINT64_C(1) << 23) +#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er" +#define CpuFeatureAVX512CD (Q_UINT64_C(1) << 24) +#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd" +#define CpuFeatureSHA (Q_UINT64_C(1) << 25) +#define QT_FUNCTION_TARGET_STRING_SHA "sha" +#define CpuFeatureAVX512BW (Q_UINT64_C(1) << 26) +#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw" +#define CpuFeatureAVX512VL (Q_UINT64_C(1) << 27) +#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl" + +// in CPUID Leaf 7, Sub-leaf 0, ECX: +#define CpuFeatureAVX512VBMI (Q_UINT64_C(1) << 28) +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi" +#define CpuFeatureAVX512VBMI2 (Q_UINT64_C(1) << 29) +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2" +#define CpuFeatureGFNI (Q_UINT64_C(1) << 30) +#define QT_FUNCTION_TARGET_STRING_GFNI "gfni" +#define CpuFeatureVAES (Q_UINT64_C(1) << 31) +#define QT_FUNCTION_TARGET_STRING_VAES "vaes" +#define CpuFeatureAVX512VNNI (Q_UINT64_C(1) << 32) +#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni" +#define CpuFeatureAVX512BITALG (Q_UINT64_C(1) << 33) +#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg" +#define CpuFeatureAVX512VPOPCNTDQ (Q_UINT64_C(1) << 34) +#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq" + +// in CPUID Leaf 7, Sub-leaf 0, EDX: +#define CpuFeatureAVX5124NNIW (Q_UINT64_C(1) << 35) +#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw" +#define CpuFeatureAVX5124FMAPS (Q_UINT64_C(1) << 36) +#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps" + +static const quint64 qCompilerCpuFeatures = 0 +#ifdef __SSE2__ + | CpuFeatureSSE2 +#endif +#ifdef __SSE3__ + | CpuFeatureSSE3 +#endif +#ifdef __SSSE3__ + | CpuFeatureSSSE3 +#endif +#ifdef __FMA__ + | CpuFeatureFMA +#endif +#ifdef __SSE4_1__ + | CpuFeatureSSE4_1 +#endif +#ifdef __SSE4_2__ + | CpuFeatureSSE4_2 +#endif +#ifdef __MOVBE__ + | CpuFeatureMOVBE +#endif +#ifdef __POPCNT__ + | CpuFeaturePOPCNT +#endif +#ifdef __AES__ + | CpuFeatureAES +#endif +#ifdef __AVX__ + | CpuFeatureAVX +#endif +#ifdef __F16C__ + | CpuFeatureF16C +#endif +#ifdef __RDRND__ + | CpuFeatureRDRND +#endif +#ifdef __BMI__ + | CpuFeatureBMI +#endif +#ifdef __HLE__ + | CpuFeatureHLE +#endif +#ifdef __AVX2__ + | CpuFeatureAVX2 +#endif +#ifdef __BMI2__ + | CpuFeatureBMI2 +#endif +#ifdef __RTM__ + | CpuFeatureRTM +#endif +#ifdef __AVX512F__ + | CpuFeatureAVX512F +#endif +#ifdef __AVX512DQ__ + | CpuFeatureAVX512DQ +#endif +#ifdef __RDSEED__ + | CpuFeatureRDSEED +#endif +#ifdef __AVX512IFMA__ + | CpuFeatureAVX512IFMA +#endif +#ifdef __AVX512PF__ + | CpuFeatureAVX512PF +#endif +#ifdef __AVX512ER__ + | CpuFeatureAVX512ER +#endif +#ifdef __AVX512CD__ + | CpuFeatureAVX512CD +#endif +#ifdef __SHA__ + | CpuFeatureSHA +#endif +#ifdef __AVX512BW__ + | CpuFeatureAVX512BW +#endif +#ifdef __AVX512VL__ + | CpuFeatureAVX512VL +#endif +#ifdef __AVX512VBMI__ + | CpuFeatureAVX512VBMI +#endif +#ifdef __AVX512VBMI2__ + | CpuFeatureAVX512VBMI2 +#endif +#ifdef __GFNI__ + | CpuFeatureGFNI +#endif +#ifdef __VAES__ + | CpuFeatureVAES +#endif +#ifdef __AVX512VNNI__ + | CpuFeatureAVX512VNNI +#endif +#ifdef __AVX512BITALG__ + | CpuFeatureAVX512BITALG +#endif +#ifdef __AVX512VPOPCNTDQ__ + | CpuFeatureAVX512VPOPCNTDQ +#endif +#ifdef __AVX5124NNIW__ + | CpuFeatureAVX5124NNIW +#endif +#ifdef __AVX5124FMAPS__ + | CpuFeatureAVX5124FMAPS +#endif + ; + +QT_END_NAMESPACE + +#endif // QSIMD_X86_P_H diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp deleted file mode 100644 index 75c380ee8a..0000000000 --- a/src/corelib/tools/qsimd.cpp +++ /dev/null @@ -1,718 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2019 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "qsimd_p.h" -#include "qalgorithms.h" -#include -#include - -#ifdef Q_OS_LINUX -# include "../testlib/3rdparty/valgrind_p.h" -#endif - -#if defined(Q_OS_WIN) -# if !defined(Q_CC_GNU) -# include -# endif -#elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32)) -#include "private/qcore_unix_p.h" - -// the kernel header definitions for HWCAP_* -// (the ones we need/may need anyway) - -// copied from (ARM) -#define HWCAP_CRUNCH 1024 -#define HWCAP_THUMBEE 2048 -#define HWCAP_NEON 4096 -#define HWCAP_VFPv3 8192 -#define HWCAP_VFPv3D16 16384 - -// copied from (ARM): -#define HWCAP2_CRC32 (1 << 4) - -// copied from (Aarch64) -#define HWCAP_CRC32 (1 << 7) - -// copied from -#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ -#define AT_HWCAP2 26 /* extension of AT_HWCAP */ - -#elif defined(Q_CC_GHS) -#include -#endif - -QT_BEGIN_NAMESPACE - -/* - * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note - * we remove the terminating -1 that the script adds. - */ - -// begin generated -#if defined(Q_PROCESSOR_ARM) -/* Data: - neon - crc32 - */ -static const char features_string[] = - " neon\0" - " crc32\0" - "\0"; -static const int features_indices[] = { 0, 6 }; -#elif defined(Q_PROCESSOR_MIPS) -/* Data: - dsp - dspr2 -*/ -static const char features_string[] = - " dsp\0" - " dspr2\0" - "\0"; - -static const int features_indices[] = { - 0, 5 -}; -#elif defined(Q_PROCESSOR_X86) -# include "qsimd_x86.cpp" // generated by util/x86simdgen -#else -static const char features_string[] = ""; -static const int features_indices[] = { }; -#endif -// end generated - -#if defined (Q_OS_NACL) -static inline uint detectProcessorFeatures() -{ - return 0; -} -#elif defined(Q_PROCESSOR_ARM) -static inline quint64 detectProcessorFeatures() -{ - quint64 features = 0; - -#if defined(Q_OS_LINUX) -# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) - features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit. -# endif - int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY); - if (auxv != -1) { - unsigned long vector[64]; - int nread; - while (features == 0) { - nread = qt_safe_read(auxv, (char *)vector, sizeof vector); - if (nread <= 0) { - // EOF or error - break; - } - - int max = nread / (sizeof vector[0]); - for (int i = 0; i < max; i += 2) { - if (vector[i] == AT_HWCAP) { -# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) - // For Aarch64: - if (vector[i+1] & HWCAP_CRC32) - features |= Q_UINT64_C(1) << CpuFeatureCRC32; -# endif - // Aarch32, or ARMv7 or before: - if (vector[i+1] & HWCAP_NEON) - features |= Q_UINT64_C(1) << CpuFeatureNEON; - } -# if defined(Q_PROCESSOR_ARM_32) - // For Aarch32: - if (vector[i] == AT_HWCAP2) { - if (vector[i+1] & HWCAP2_CRC32) - features |= Q_UINT64_C(1) << CpuFeatureCRC32; - } -# endif - } - } - - qt_safe_close(auxv); - return features; - } - // fall back if /proc/self/auxv wasn't found -#endif - -#if defined(__ARM_NEON__) - features |= Q_UINT64_C(1) << CpuFeatureNEON; -#endif -#if defined(__ARM_FEATURE_CRC32) - features |= Q_UINT64_C(1) << CpuFeatureCRC32; -#endif - - return features; -} - -#elif defined(Q_PROCESSOR_X86) - -#ifdef Q_PROCESSOR_X86_32 -# define PICreg "%%ebx" -#else -# define PICreg "%%rbx" -#endif - -static bool checkRdrndWorks() noexcept; - -static int maxBasicCpuidSupported() -{ -#if defined(Q_CC_EMSCRIPTEN) - return 6; // All features supported by Emscripten -#elif defined(Q_CC_GNU) - qregisterint tmp1; - -# if Q_PROCESSOR_X86 < 5 - // check if the CPUID instruction is supported - long cpuid_supported; - asm ("pushf\n" - "pop %0\n" - "mov %0, %1\n" - "xor $0x00200000, %0\n" - "push %0\n" - "popf\n" - "pushf\n" - "pop %0\n" - "xor %1, %0\n" // %eax is now 0 if CPUID is not supported - : "=a" (cpuid_supported), "=r" (tmp1) - ); - if (!cpuid_supported) - return 0; -# endif - - int result; - asm ("xchg " PICreg", %1\n" - "cpuid\n" - "xchg " PICreg", %1\n" - : "=&a" (result), "=&r" (tmp1) - : "0" (0) - : "ecx", "edx"); - return result; -#elif defined(Q_OS_WIN) - // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 - int info[4]; - __cpuid(info, 0); - return info[0]; -#elif defined(Q_CC_GHS) - unsigned int info[4]; - __CPUID(0, info); - return info[0]; -#else - return 0; -#endif -} - -static void cpuidFeatures01(uint &ecx, uint &edx) -{ -#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) - qregisterint tmp1; - asm ("xchg " PICreg", %2\n" - "cpuid\n" - "xchg " PICreg", %2\n" - : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) - : "a" (1)); -#elif defined(Q_OS_WIN) - int info[4]; - __cpuid(info, 1); - ecx = info[2]; - edx = info[3]; -#elif defined(Q_CC_GHS) - unsigned int info[4]; - __CPUID(1, info); - ecx = info[2]; - edx = info[3]; -#else - Q_UNUSED(ecx); - Q_UNUSED(edx); -#endif -} - -#ifdef Q_OS_WIN -inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} -#endif - -static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) -{ -#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) - qregisteruint rbx; // in case it's 64-bit - qregisteruint rcx = 0; - qregisteruint rdx = 0; - asm ("xchg " PICreg", %0\n" - "cpuid\n" - "xchg " PICreg", %0\n" - : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) - : "a" (7)); - ebx = rbx; - ecx = rcx; - edx = rdx; -#elif defined(Q_OS_WIN) - int info[4]; - __cpuidex(info, 7, 0); - ebx = info[1]; - ecx = info[2]; - edx = info[3]; -#elif defined(Q_CC_GHS) - unsigned int info[4]; - __CPUIDEX(7, 0, info); - ebx = info[1]; - ecx = info[2]; - edx = info[3]; -#else - Q_UNUSED(ebx); - Q_UNUSED(ecx); - Q_UNUSED(edx); -#endif -} - -#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) -// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); -inline quint64 _xgetbv(__int64) { return 0; } -#endif -static void xgetbv(uint in, uint &eax, uint &edx) -{ -#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) - asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction - : "=a" (eax), "=d" (edx) - : "c" (in)); -#elif defined(Q_OS_WIN) - quint64 result = _xgetbv(in); - eax = result; - edx = result >> 32; -#else - Q_UNUSED(in); - Q_UNUSED(eax); - Q_UNUSED(edx); -#endif -} - -static quint64 detectProcessorFeatures() -{ - // Flags from the CR0 / XCR0 state register - enum XCR0Flags { - X87 = 1 << 0, - XMM0_15 = 1 << 1, - YMM0_15Hi128 = 1 << 2, - BNDRegs = 1 << 3, - BNDCSR = 1 << 4, - OpMask = 1 << 5, - ZMM0_15Hi256 = 1 << 6, - ZMM16_31 = 1 << 7, - - SSEState = XMM0_15, - AVXState = XMM0_15 | YMM0_15Hi128, - AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 - }; - static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512; - static const quint64 AllAVX = CpuFeatureAVX | AllAVX2; - - quint64 features = 0; - int cpuidLevel = maxBasicCpuidSupported(); -#if Q_PROCESSOR_X86 < 5 - if (cpuidLevel < 1) - return 0; -#else - Q_ASSERT(cpuidLevel >= 1); -#endif - - uint results[X86CpuidMaxLeaf] = {}; - cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); - if (cpuidLevel >= 7) - cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); - - // populate our feature list - for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { - uint word = x86_locators[i] / 32; - uint bit = 1U << (x86_locators[i] % 32); - quint64 feature = Q_UINT64_C(1) << (i + 1); - if (results[word] & bit) - features |= feature; - } - - // now check the AVX state - uint xgetbvA = 0, xgetbvD = 0; - if (results[Leaf1ECX] & (1u << 27)) { - // XGETBV enabled - xgetbv(0, xgetbvA, xgetbvD); - } - - if ((xgetbvA & AVXState) != AVXState) { - // support for YMM registers is disabled, disable all AVX - features &= ~AllAVX; - } else if ((xgetbvA & AVX512State) != AVX512State) { - // support for ZMM registers or mask registers is disabled, disable all AVX512 - features &= ~AllAVX512; - } - - if (features & CpuFeatureRDRND && !checkRdrndWorks()) - features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED); - - return features; -} - -#elif defined(Q_PROCESSOR_MIPS_32) - -#if defined(Q_OS_LINUX) -// -// Do not use QByteArray: it could use SIMD instructions itself at -// some point, thus creating a recursive dependency. Instead, use a -// QSimpleBuffer, which has the bare minimum needed to use memory -// dynamically and read lines from /proc/cpuinfo of arbitrary sizes. -// -struct QSimpleBuffer { - static const int chunk_size = 256; - char *data; - unsigned alloc; - unsigned size; - - QSimpleBuffer(): data(0), alloc(0), size(0) {} - ~QSimpleBuffer() { ::free(data); } - - void resize(unsigned newsize) { - if (newsize > alloc) { - unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); - if (newalloc < newsize) newalloc = newsize; - if (newalloc != alloc) { - data = static_cast(::realloc(data, newalloc)); - alloc = newalloc; - } - } - size = newsize; - } - void append(const QSimpleBuffer &other, unsigned appendsize) { - unsigned oldsize = size; - resize(oldsize + appendsize); - ::memcpy(data + oldsize, other.data, appendsize); - } - void popleft(unsigned amount) { - if (amount >= size) return resize(0); - size -= amount; - ::memmove(data, data + amount, size); - } - char* cString() { - if (!alloc) resize(1); - return (data[size] = '\0', data); - } -}; - -// -// Uses a scratch "buffer" (which must be used for all reads done in the -// same file descriptor) to read chunks of data from a file, to read -// one line at a time. Lines include the trailing newline character ('\n'). -// On EOF, line.size is zero. -// -static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) -{ - for (;;) { - char *newline = static_cast(::memchr(buffer.data, '\n', buffer.size)); - if (newline) { - unsigned piece_size = newline - buffer.data + 1; - line.append(buffer, piece_size); - buffer.popleft(piece_size); - line.resize(line.size - 1); - return; - } - if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { - int oldsize = buffer.size; - buffer.resize(buffer.size + QSimpleBuffer::chunk_size); - buffer.size = oldsize; - } - ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); - if (read_bytes > 0) buffer.size += read_bytes; - else return; - } -} - -// -// Checks if any line with a given prefix from /proc/cpuinfo contains -// a certain string, surrounded by spaces. -// -static bool procCpuinfoContains(const char *prefix, const char *string) -{ - int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY); - if (cpuinfo_fd == -1) - return false; - - unsigned string_len = ::strlen(string); - unsigned prefix_len = ::strlen(prefix); - QSimpleBuffer line, buffer; - bool present = false; - do { - line.resize(0); - bufReadLine(cpuinfo_fd, line, buffer); - char *colon = static_cast(::memchr(line.data, ':', line.size)); - if (colon && line.size > prefix_len + string_len) { - if (!::strncmp(prefix, line.data, prefix_len)) { - // prefix matches, next character must be ':' or space - if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { - // Does it contain the string? - char *found = ::strstr(line.cString(), string); - if (found && ::isspace(found[-1]) && - (::isspace(found[string_len]) || found[string_len] == '\0')) { - present = true; - break; - } - } - } - } - } while (line.size); - - ::qt_safe_close(cpuinfo_fd); - return present; -} -#endif - -static inline quint64 detectProcessorFeatures() -{ - // NOTE: MIPS 74K cores are the only ones supporting DSPr2. - quint64 flags = 0; - -#if defined __mips_dsp - flags |= Q_UINT64_C(1) << CpuFeatureDSP; -# if defined __mips_dsp_rev && __mips_dsp_rev >= 2 - flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; -# elif defined(Q_OS_LINUX) - if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) - flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; -# endif -#elif defined(Q_OS_LINUX) - if (procCpuinfoContains("ASEs implemented", "dsp")) { - flags |= Q_UINT64_C(1) << CpuFeatureDSP; - if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) - flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; - } -#endif - - return flags; -} - -#else -static inline uint detectProcessorFeatures() -{ - return 0; -} -#endif - -static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); - -// record what CPU features were enabled by default in this Qt build -static const quint64 minFeature = qCompilerCpuFeatures; - -#ifdef Q_ATOMIC_INT64_IS_SUPPORTED -Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) }; -#else -Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) }; -#endif - -quint64 qDetectCpuFeatures() -{ - quint64 f = detectProcessorFeatures(); - QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); - if (!disable.isEmpty()) { - disable.prepend(' '); - for (int i = 0; i < features_count; ++i) { - if (disable.contains(features_string + features_indices[i])) - f &= ~(Q_UINT64_C(1) << i); - } - } - -#ifdef RUNNING_ON_VALGRIND - bool runningOnValgrind = RUNNING_ON_VALGRIND; -#else - bool runningOnValgrind = false; -#endif - if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) { - quint64 missing = minFeature & ~f; - fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n "); - for (int i = 0; i < features_count; ++i) { - if (missing & (Q_UINT64_C(1) << i)) - fprintf(stderr, "%s", features_string + features_indices[i]); - } - fprintf(stderr, "\n"); - fflush(stderr); - qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing, - features_string + features_indices[qCountTrailingZeroBits(missing)]); - } - - qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized)); -#ifndef Q_ATOMIC_INT64_IS_SUPPORTED - qt_cpu_features[1].storeRelaxed(f >> 32); -#endif - return f; -} - -void qDumpCPUFeatures() -{ - quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized); - printf("Processor features: "); - for (int i = 0; i < features_count; ++i) { - if (features & (Q_UINT64_C(1) << i)) - printf("%s%s", features_string + features_indices[i], - minFeature & (Q_UINT64_C(1) << i) ? "[required]" : ""); - } - if ((features = (qCompilerCpuFeatures & ~features))) { - printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:"); - for (int i = 0; i < features_count; ++i) { - if (features & (Q_UINT64_C(1) << i)) - printf("%s", features_string + features_indices[i]); - } - printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!"); - } - puts(""); -} - -#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) - -# ifdef Q_PROCESSOR_X86_64 -# define _rdrandXX_step _rdrand64_step -# define _rdseedXX_step _rdseed64_step -# else -# define _rdrandXX_step _rdrand32_step -# define _rdseedXX_step _rdseed32_step -# endif - -# if QT_COMPILER_SUPPORTS_HERE(RDSEED) -static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept -{ - // Unlike for the RDRAND code below, the Intel whitepaper describing the - // use of the RDSEED instruction indicates we should not retry in a loop. - // If the independent bit generator used by RDSEED is out of entropy, it - // may take time to replenish. - // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide - while (ptr + sizeof(qregisteruint)/sizeof(*ptr) <= end) { - if (_rdseedXX_step(reinterpret_cast(ptr)) == 0) - goto out; - ptr += sizeof(qregisteruint)/sizeof(*ptr); - } - - if (sizeof(*ptr) != sizeof(qregisteruint) && ptr != end) { - if (_rdseed32_step(ptr) == 0) - goto out; - ++ptr; - } - -out: - return ptr; -} -# else -static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *) -{ - return ptr; -} -# endif - -static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept -{ - int retries = 10; - while (ptr + sizeof(qregisteruint)/sizeof(*ptr) <= end) { - if (_rdrandXX_step(reinterpret_cast(ptr))) - ptr += sizeof(qregisteruint)/sizeof(*ptr); - else if (--retries == 0) - goto out; - } - - while (sizeof(*ptr) != sizeof(qregisteruint) && ptr != end) { - bool ok = _rdrand32_step(ptr); - if (!ok && --retries) - continue; - if (ok) - ++ptr; - break; - } - -out: - return ptr; -} - -static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept -{ - /* - * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a - * failing random generation instruction, which always returns - * 0xffffffff, even when generation was "successful". - * - * This code checks if hardware random generator generates four consecutive - * equal numbers. If it does, then we probably have a failing one and - * should disable it completely. - * - * https://bugreports.qt.io/browse/QTBUG-69423 - */ - constexpr qsizetype TestBufferSize = 4; - unsigned testBuffer[TestBufferSize] = {}; - - unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize); - if (end < testBuffer + 3) { - // Random generation didn't produce enough data for us to make a - // determination whether it's working or not. Assume it isn't, but - // don't print a warning. - return false; - } - - // Check the results for equality - if (testBuffer[0] == testBuffer[1] - && testBuffer[0] == testBuffer[2] - && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) { - fprintf(stderr, "WARNING: CPU random generator seem to be failing, " - "disabling hardware random number generation\n" - "WARNING: RDRND generated:"); - for (unsigned *ptr = testBuffer; ptr < end; ++ptr) - fprintf(stderr, " 0x%x", *ptr); - fprintf(stderr, "\n"); - return false; - } - - // We're good - return true; -} - -QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept -{ - unsigned *ptr = reinterpret_cast(buffer); - unsigned *end = ptr + count; - - if (qCpuHasFeature(RDSEED)) - ptr = qt_random_rdseed(ptr, end); - - // fill the buffer with RDRND if RDSEED didn't - ptr = qt_random_rdrnd(ptr, end); - return ptr - reinterpret_cast(buffer); -} -#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM) -static bool checkRdrndWorks() noexcept { return false; } -#endif // Q_PROCESSOR_X86 && RDRND - -QT_END_NAMESPACE diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h deleted file mode 100644 index 26e98c4542..0000000000 --- a/src/corelib/tools/qsimd_p.h +++ /dev/null @@ -1,396 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#ifndef QSIMD_P_H -#define QSIMD_P_H - -// -// W A R N I N G -// ------------- -// -// This file is not part of the Qt API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. -// - -#include - -/* - * qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros. - * They mean the compiler supports the necessary flags and the headers - * for the x86 and ARM intrinsics: - * - GCC: the -mXXX or march=YYY flag is necessary before #include - * up to 4.8; GCC >= 4.9 can include unconditionally - * - Intel CC: #include can happen unconditionally - * - MSVC: #include can happen unconditionally - * - RVCT: ??? - * - * We will try to include all headers possible under this configuration. - * - * MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 & - * up do define __AVX__ if the -arch:AVX option is passed on the command-line. - * - * Supported XXX are: - * Flag | Arch | GCC | Intel CC | MSVC | - * ARM_NEON | ARM | I & C | None | ? | - * SSE2 | x86 | I & C | I & C | I & C | - * SSE3 | x86 | I & C | I & C | I only | - * SSSE3 | x86 | I & C | I & C | I only | - * SSE4_1 | x86 | I & C | I & C | I only | - * SSE4_2 | x86 | I & C | I & C | I only | - * AVX | x86 | I & C | I & C | I & C | - * AVX2 | x86 | I & C | I & C | I only | - * AVX512xx | x86 | I & C | I & C | I only | - * I = intrinsics; C = code generation - * - * Code can use the following constructs to determine compiler support & status: - * - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__) - * If this test passes, then the compiler is already generating code for that - * given sub-architecture. The intrinsics for that sub-architecture are - * #included and can be used without restriction or runtime check. - * - * - #if QT_COMPILER_SUPPORTS(XXX) - * If this test passes, then the compiler is able to generate code for that - * given sub-architecture in another translation unit, given the right set of - * flags. Use of the intrinsics is not guaranteed. This is useful with - * runtime detection (see below). - * - * - #if QT_COMPILER_SUPPORTS_HERE(XXX) - * If this test passes, then the compiler is able to generate code for that - * given sub-architecture in this translation unit, even if it is not doing - * that now (it might be). Individual functions may be tagged with - * QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that - * sub-arch. Only inside such functions is the use of the intrisics - * guaranteed to work. This is useful with runtime detection (see below). - * - * Runtime detection of a CPU sub-architecture can be done with the - * qCpuHasFeature(XXX) function. There are two strategies for generating - * optimized code like that: - * - * 1) place the optimized code in a different translation unit (C or assembly - * sources) and pass the correct flags to the compiler to enable support. Those - * sources must not include qglobal.h, which means they cannot include this - * file either. The dispatcher function would look like this: - * - * void foo() - * { - * #if QT_COMPILER_SUPPORTS(XXX) - * if (qCpuHasFeature(XXX)) { - * foo_optimized_xxx(); - * return; - * } - * #endif - * foo_plain(); - * } - * - * 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and - * surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use - * other Qt code. The dispatcher function would look like this: - * - * void foo() - * { - * #if QT_COMPILER_SUPPORTS_HERE(XXX) - * if (qCpuHasFeature(XXX)) { - * foo_optimized_xxx(); - * return; - * } - * #endif - * foo_plain(); - * } - */ - -#if defined(__MINGW64_VERSION_MAJOR) || defined(Q_CC_MSVC) -#include -#endif - -#define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0) - -#if defined(Q_PROCESSOR_ARM) -# define QT_COMPILER_SUPPORTS_HERE(x) (__ARM_FEATURE_ ## x) -# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600 - /* GCC requires attributes for a function */ -# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) -# else -# define QT_FUNCTION_TARGET(x) -# endif -# if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__) -# define __ARM_FEATURE_NEON // also support QT_COMPILER_SUPPORTS_HERE(NEON) -# endif -#elif defined(Q_PROCESSOR_MIPS) -# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) -# define QT_FUNCTION_TARGET(x) -# if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32) -# define __MIPS_DSP__ -# endif -# if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32) -# define __MIPS_DSPR2__ -# endif -#elif defined(Q_PROCESSOR_X86) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) -# define QT_COMPILER_SUPPORTS_HERE(x) ((__ ## x ## __) || QT_COMPILER_SUPPORTS(x)) -# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) - /* GCC requires attributes for a function */ -# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) -# else -# define QT_FUNCTION_TARGET(x) -# endif -#else -# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) -# define QT_FUNCTION_TARGET(x) -#endif - -#ifdef Q_PROCESSOR_X86 -/* -- x86 intrinsic support -- */ - -# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2) -// MSVC doesn't define __SSE2__, so do it ourselves -# define __SSE__ 1 -# define __SSE2__ 1 -# endif - -# ifdef __SSE2__ -// #include the intrinsics -# include -# endif - -# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) -// GCC 4.4 and Clang 2.8 added a few more intrinsics there -# include -# endif - -# if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__)) -// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros -// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx -# define __SSE3__ 1 -# define __SSSE3__ 1 -// no Intel CPU supports SSE4a, so don't define it -# define __SSE4_1__ 1 -# define __SSE4_2__ 1 -# ifndef __AVX__ -# define __AVX__ 1 -# endif -# endif - -# if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) -// POPCNT instructions: -// All processors that support SSE4.2 support POPCNT -// (but neither MSVC nor the Intel compiler define this macro) -# define __POPCNT__ 1 -# endif - -// AVX intrinsics -# if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) -// AES, PCLMULQDQ instructions: -// All processors that support AVX support PCLMULQDQ -// (but neither MSVC nor the Intel compiler define this macro) -# define __PCLMUL__ 1 -# endif - -# if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) -// F16C & RDRAND instructions: -// All processors that support AVX2 support F16C & RDRAND: -// (but neither MSVC nor the Intel compiler define these macros) -# define __F16C__ 1 -# define __RDRND__ 1 -# endif - -# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL) -// BMI2 instructions: -// All processors that support BMI support BMI2 (and AVX2) -// (but neither MSVC nor the Intel compiler define this macro) -# define __BMI2__ 1 -# endif - -# include "qsimd_x86_p.h" - -// Haswell sub-architecture -// -// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2, -// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a -// sub-target for us. The first AMD processor with AVX2 support (Zen) has the -// same features. -// -// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc -// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell). -# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell" -# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \ - defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__) -# define __haswell__ 1 -# endif - -// This constant does not include all CPU features found in a Haswell, only -// those that we'd have optimized code for. -// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode. -QT_BEGIN_NAMESPACE -static const quint64 CpuFeatureArchHaswell = 0 - | CpuFeatureSSE2 - | CpuFeatureSSE3 - | CpuFeatureSSSE3 - | CpuFeatureSSE4_1 - | CpuFeatureSSE4_2 - | CpuFeatureFMA - | CpuFeaturePOPCNT - | CpuFeatureAVX - | CpuFeatureF16C - | CpuFeatureAVX2 - | CpuFeatureBMI - | CpuFeatureBMI2; -QT_END_NAMESPACE - -#endif /* Q_PROCESSOR_X86 */ - -// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html -// This should be tweaked with an "upper version" of clang once we know which release fixes the -// issue. At that point we can rely on __ARM_FEATURE_CRC32 again. -#if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32) -# undef __ARM_FEATURE_CRC32 -#endif - -// NEON intrinsics -// note: as of GCC 4.9, does not support function targets for ARM -#if defined(__ARM_NEON) || defined(__ARM_NEON__) -#include -#define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available. -#ifndef __ARM_NEON__ -// __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection. -#define __ARM_NEON__ -#endif -#endif -// AArch64/ARM64 -#if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32) -#if defined(Q_PROCESSOR_ARM_64) -// only available on aarch64 -#define QT_FUNCTION_TARGET_STRING_CRC32 "+crc" -#endif -# include -#endif - -#ifdef __cplusplus -#include - -QT_BEGIN_NAMESPACE - -#ifndef Q_PROCESSOR_X86 -enum CPUFeatures { -#if defined(Q_PROCESSOR_ARM) - CpuFeatureNEON = 2, - CpuFeatureARM_NEON = CpuFeatureNEON, - CpuFeatureCRC32 = 4, -#elif defined(Q_PROCESSOR_MIPS) - CpuFeatureDSP = 2, - CpuFeatureDSPR2 = 4, -#endif - - // used only to indicate that the CPU detection was initialised - QSimdInitialized = 1 -}; - -static const quint64 qCompilerCpuFeatures = 0 -#if defined __ARM_NEON__ - | CpuFeatureNEON -#endif -#if defined __ARM_FEATURE_CRC32 - | CpuFeatureCRC32 -#endif -#if defined __mips_dsp - | CpuFeatureDSP -#endif -#if defined __mips_dspr2 - | CpuFeatureDSPR2 -#endif - ; -#endif - -#ifdef Q_ATOMIC_INT64_IS_SUPPORTED -extern Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[1]; -#else -extern Q_CORE_EXPORT QBasicAtomicInteger qt_cpu_features[2]; -#endif -Q_CORE_EXPORT quint64 qDetectCpuFeatures(); - -#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED) -Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept; -#else -static inline qsizetype qRandomCpu(void *, qsizetype) noexcept -{ - return 0; -} -#endif - -static inline quint64 qCpuFeatures() -{ - quint64 features = qt_cpu_features[0].loadRelaxed(); -#ifndef Q_ATOMIC_INT64_IS_SUPPORTED - features |= quint64(qt_cpu_features[1].loadRelaxed()) << 32; -#endif - if (Q_UNLIKELY(features == 0)) { - features = qDetectCpuFeatures(); - Q_ASSUME(features != 0); - } - return features; -} - -#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \ - || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature)) - -inline bool qHasHwrng() -{ -#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) - return qCpuHasFeature(RDRND); -#else - return false; -#endif -} - -#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ - for (; i < static_cast(qMin(static_cast(length), ((4 - ((reinterpret_cast(ptr) >> 2) & 0x3)) & 0x3))); ++i) - -#define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \ - for (; i < static_cast(qMin(static_cast(length), ((8 - ((reinterpret_cast(ptr) >> 2) & 0x7)) & 0x7))); ++i) - -QT_END_NAMESPACE - -#endif // __cplusplus - -#define SIMD_EPILOGUE(i, length, max) \ - for (int _i = 0; _i < max && i < length; ++i, ++_i) - -#endif // QSIMD_P_H diff --git a/src/corelib/tools/qsimd_x86.cpp b/src/corelib/tools/qsimd_x86.cpp deleted file mode 100644 index 509af464b2..0000000000 --- a/src/corelib/tools/qsimd_x86.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl -#include "qsimd_p.h" - -static const char features_string[] = - " sse2\0" - " sse3\0" - " ssse3\0" - " fma\0" - " sse4.1\0" - " sse4.2\0" - " movbe\0" - " popcnt\0" - " aes\0" - " avx\0" - " f16c\0" - " rdrnd\0" - " bmi\0" - " hle\0" - " avx2\0" - " bmi2\0" - " rtm\0" - " avx512f\0" - " avx512dq\0" - " rdseed\0" - " avx512ifma\0" - " avx512pf\0" - " avx512er\0" - " avx512cd\0" - " sha\0" - " avx512bw\0" - " avx512vl\0" - " avx512vbmi\0" - " avx512vbmi2\0" - " gfni\0" - " vaes\0" - " avx512vnni\0" - " avx512bitalg\0" - " avx512vpopcntdq\0" - " avx5124nniw\0" - " avx5124fmaps\0" - "\0"; - -static const quint16 features_indices[] = { - 306, 0, 6, 12, 19, 24, 32, 40, - 47, 55, 60, 65, 71, 78, 83, 88, - 94, 100, 105, 114, 124, 132, 144, 154, - 164, 174, 179, 189, 199, 211, 224, 230, - 236, 248, 262, 279, 292 -}; - -enum X86CpuidLeaves { - Leaf1ECX, - Leaf1EDX, - Leaf7_0EBX, - Leaf7_0ECX, - Leaf7_0EDX, - X86CpuidMaxLeaf -}; - -static const quint8 x86_locators[] = { - Leaf1EDX*32 + 26, // sse2 - Leaf1ECX*32 + 0, // sse3 - Leaf1ECX*32 + 9, // ssse3 - Leaf1ECX*32 + 12, // fma - Leaf1ECX*32 + 19, // sse4.1 - Leaf1ECX*32 + 20, // sse4.2 - Leaf1ECX*32 + 22, // movbe - Leaf1ECX*32 + 23, // popcnt - Leaf1ECX*32 + 25, // aes - Leaf1ECX*32 + 28, // avx - Leaf1ECX*32 + 29, // f16c - Leaf1ECX*32 + 30, // rdrnd - Leaf7_0EBX*32 + 3, // bmi - Leaf7_0EBX*32 + 4, // hle - Leaf7_0EBX*32 + 5, // avx2 - Leaf7_0EBX*32 + 8, // bmi2 - Leaf7_0EBX*32 + 11, // rtm - Leaf7_0EBX*32 + 16, // avx512f - Leaf7_0EBX*32 + 17, // avx512dq - Leaf7_0EBX*32 + 18, // rdseed - Leaf7_0EBX*32 + 21, // avx512ifma - Leaf7_0EBX*32 + 26, // avx512pf - Leaf7_0EBX*32 + 27, // avx512er - Leaf7_0EBX*32 + 28, // avx512cd - Leaf7_0EBX*32 + 29, // sha - Leaf7_0EBX*32 + 30, // avx512bw - Leaf7_0EBX*32 + 31, // avx512vl - Leaf7_0ECX*32 + 1, // avx512vbmi - Leaf7_0ECX*32 + 6, // avx512vbmi2 - Leaf7_0ECX*32 + 8, // gfni - Leaf7_0ECX*32 + 9, // vaes - Leaf7_0ECX*32 + 11, // avx512vnni - Leaf7_0ECX*32 + 12, // avx512bitalg - Leaf7_0ECX*32 + 14, // avx512vpopcntdq - Leaf7_0EDX*32 + 2, // avx5124nniw - Leaf7_0EDX*32 + 3 // avx5124fmaps -}; - -// List of AVX512 features (see detectProcessorFeatures()) -static const quint64 AllAVX512 = 0 - | CpuFeatureAVX512F - | CpuFeatureAVX512DQ - | CpuFeatureAVX512IFMA - | CpuFeatureAVX512PF - | CpuFeatureAVX512ER - | CpuFeatureAVX512CD - | CpuFeatureAVX512BW - | CpuFeatureAVX512VL - | CpuFeatureAVX512VBMI - | CpuFeatureAVX512VBMI2 - | CpuFeatureAVX512VNNI - | CpuFeatureAVX512BITALG - | CpuFeatureAVX512VPOPCNTDQ - | CpuFeatureAVX5124NNIW - | CpuFeatureAVX5124FMAPS; diff --git a/src/corelib/tools/qsimd_x86_p.h b/src/corelib/tools/qsimd_x86_p.h deleted file mode 100644 index 2434e2b797..0000000000 --- a/src/corelib/tools/qsimd_x86_p.h +++ /dev/null @@ -1,222 +0,0 @@ -// This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl -#ifndef QSIMD_P_H -# error "Please include instead" -#endif -#ifndef QSIMD_X86_P_H -#define QSIMD_X86_P_H - -#include "qsimd_p.h" - -// -// W A R N I N G -// ------------- -// -// This file is not part of the Qt API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. -// - -QT_BEGIN_NAMESPACE - -// used only to indicate that the CPU detection was initialized -#define QSimdInitialized (Q_UINT64_C(1) << 0) - -// in CPUID Leaf 1, EDX: -#define CpuFeatureSSE2 (Q_UINT64_C(1) << 1) -#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" - -// in CPUID Leaf 1, ECX: -#define CpuFeatureSSE3 (Q_UINT64_C(1) << 2) -#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" -#define CpuFeatureSSSE3 (Q_UINT64_C(1) << 3) -#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3" -#define CpuFeatureFMA (Q_UINT64_C(1) << 4) -#define QT_FUNCTION_TARGET_STRING_FMA "fma" -#define CpuFeatureSSE4_1 (Q_UINT64_C(1) << 5) -#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1" -#define CpuFeatureSSE4_2 (Q_UINT64_C(1) << 6) -#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2" -#define CpuFeatureMOVBE (Q_UINT64_C(1) << 7) -#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe" -#define CpuFeaturePOPCNT (Q_UINT64_C(1) << 8) -#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt" -#define CpuFeatureAES (Q_UINT64_C(1) << 9) -#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2" -#define CpuFeatureAVX (Q_UINT64_C(1) << 10) -#define QT_FUNCTION_TARGET_STRING_AVX "avx" -#define CpuFeatureF16C (Q_UINT64_C(1) << 11) -#define QT_FUNCTION_TARGET_STRING_F16C "f16c" -#define CpuFeatureRDRND (Q_UINT64_C(1) << 12) -#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd" - -// in CPUID Leaf 7, Sub-leaf 0, EBX: -#define CpuFeatureBMI (Q_UINT64_C(1) << 13) -#define QT_FUNCTION_TARGET_STRING_BMI "bmi" -#define CpuFeatureHLE (Q_UINT64_C(1) << 14) -#define QT_FUNCTION_TARGET_STRING_HLE "hle" -#define CpuFeatureAVX2 (Q_UINT64_C(1) << 15) -#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2" -#define CpuFeatureBMI2 (Q_UINT64_C(1) << 16) -#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" -#define CpuFeatureRTM (Q_UINT64_C(1) << 17) -#define QT_FUNCTION_TARGET_STRING_RTM "rtm" -#define CpuFeatureAVX512F (Q_UINT64_C(1) << 18) -#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f" -#define CpuFeatureAVX512DQ (Q_UINT64_C(1) << 19) -#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq" -#define CpuFeatureRDSEED (Q_UINT64_C(1) << 20) -#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" -#define CpuFeatureAVX512IFMA (Q_UINT64_C(1) << 21) -#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma" -#define CpuFeatureAVX512PF (Q_UINT64_C(1) << 22) -#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf" -#define CpuFeatureAVX512ER (Q_UINT64_C(1) << 23) -#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er" -#define CpuFeatureAVX512CD (Q_UINT64_C(1) << 24) -#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd" -#define CpuFeatureSHA (Q_UINT64_C(1) << 25) -#define QT_FUNCTION_TARGET_STRING_SHA "sha" -#define CpuFeatureAVX512BW (Q_UINT64_C(1) << 26) -#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw" -#define CpuFeatureAVX512VL (Q_UINT64_C(1) << 27) -#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl" - -// in CPUID Leaf 7, Sub-leaf 0, ECX: -#define CpuFeatureAVX512VBMI (Q_UINT64_C(1) << 28) -#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi" -#define CpuFeatureAVX512VBMI2 (Q_UINT64_C(1) << 29) -#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2" -#define CpuFeatureGFNI (Q_UINT64_C(1) << 30) -#define QT_FUNCTION_TARGET_STRING_GFNI "gfni" -#define CpuFeatureVAES (Q_UINT64_C(1) << 31) -#define QT_FUNCTION_TARGET_STRING_VAES "vaes" -#define CpuFeatureAVX512VNNI (Q_UINT64_C(1) << 32) -#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni" -#define CpuFeatureAVX512BITALG (Q_UINT64_C(1) << 33) -#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg" -#define CpuFeatureAVX512VPOPCNTDQ (Q_UINT64_C(1) << 34) -#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq" - -// in CPUID Leaf 7, Sub-leaf 0, EDX: -#define CpuFeatureAVX5124NNIW (Q_UINT64_C(1) << 35) -#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw" -#define CpuFeatureAVX5124FMAPS (Q_UINT64_C(1) << 36) -#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps" - -static const quint64 qCompilerCpuFeatures = 0 -#ifdef __SSE2__ - | CpuFeatureSSE2 -#endif -#ifdef __SSE3__ - | CpuFeatureSSE3 -#endif -#ifdef __SSSE3__ - | CpuFeatureSSSE3 -#endif -#ifdef __FMA__ - | CpuFeatureFMA -#endif -#ifdef __SSE4_1__ - | CpuFeatureSSE4_1 -#endif -#ifdef __SSE4_2__ - | CpuFeatureSSE4_2 -#endif -#ifdef __MOVBE__ - | CpuFeatureMOVBE -#endif -#ifdef __POPCNT__ - | CpuFeaturePOPCNT -#endif -#ifdef __AES__ - | CpuFeatureAES -#endif -#ifdef __AVX__ - | CpuFeatureAVX -#endif -#ifdef __F16C__ - | CpuFeatureF16C -#endif -#ifdef __RDRND__ - | CpuFeatureRDRND -#endif -#ifdef __BMI__ - | CpuFeatureBMI -#endif -#ifdef __HLE__ - | CpuFeatureHLE -#endif -#ifdef __AVX2__ - | CpuFeatureAVX2 -#endif -#ifdef __BMI2__ - | CpuFeatureBMI2 -#endif -#ifdef __RTM__ - | CpuFeatureRTM -#endif -#ifdef __AVX512F__ - | CpuFeatureAVX512F -#endif -#ifdef __AVX512DQ__ - | CpuFeatureAVX512DQ -#endif -#ifdef __RDSEED__ - | CpuFeatureRDSEED -#endif -#ifdef __AVX512IFMA__ - | CpuFeatureAVX512IFMA -#endif -#ifdef __AVX512PF__ - | CpuFeatureAVX512PF -#endif -#ifdef __AVX512ER__ - | CpuFeatureAVX512ER -#endif -#ifdef __AVX512CD__ - | CpuFeatureAVX512CD -#endif -#ifdef __SHA__ - | CpuFeatureSHA -#endif -#ifdef __AVX512BW__ - | CpuFeatureAVX512BW -#endif -#ifdef __AVX512VL__ - | CpuFeatureAVX512VL -#endif -#ifdef __AVX512VBMI__ - | CpuFeatureAVX512VBMI -#endif -#ifdef __AVX512VBMI2__ - | CpuFeatureAVX512VBMI2 -#endif -#ifdef __GFNI__ - | CpuFeatureGFNI -#endif -#ifdef __VAES__ - | CpuFeatureVAES -#endif -#ifdef __AVX512VNNI__ - | CpuFeatureAVX512VNNI -#endif -#ifdef __AVX512BITALG__ - | CpuFeatureAVX512BITALG -#endif -#ifdef __AVX512VPOPCNTDQ__ - | CpuFeatureAVX512VPOPCNTDQ -#endif -#ifdef __AVX5124NNIW__ - | CpuFeatureAVX5124NNIW -#endif -#ifdef __AVX5124FMAPS__ - | CpuFeatureAVX5124FMAPS -#endif - ; - -QT_END_NAMESPACE - -#endif // QSIMD_X86_P_H diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri index d7c66e85be..607a6eaf06 100644 --- a/src/corelib/tools/tools.pri +++ b/src/corelib/tools/tools.pri @@ -40,7 +40,6 @@ HEADERS += \ tools/qsharedpointer.h \ tools/qsharedpointer_impl.h \ tools/qset.h \ - tools/qsimd_p.h \ tools/qsize.h \ tools/qstack.h \ tools/qtools_p.h \ @@ -67,7 +66,6 @@ SOURCES += \ tools/qringbuffer.cpp \ tools/qshareddata.cpp \ tools/qsharedpointer.cpp \ - tools/qsimd.cpp \ tools/qsize.cpp \ tools/qversionnumber.cpp diff --git a/util/x86simdgen/generate.pl b/util/x86simdgen/generate.pl index 5df2f4d526..b3e7e99298 100755 --- a/util/x86simdgen/generate.pl +++ b/util/x86simdgen/generate.pl @@ -65,7 +65,46 @@ if (my $h = shift @ARGV) { } # Print the qsimd_x86_p.h output -print q{// This is a generated file. DO NOT EDIT. +print q{/**************************************************************************** +** +** Copyright (C) 2018 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +// This is a generated file. DO NOT EDIT. // Please see util/x86simdgen/generate.pl"; #ifndef QSIMD_P_H # error "Please include instead" @@ -142,10 +181,49 @@ if (my $cpp = shift @ARGV) { }; }; -print "// This is a generated file. DO NOT EDIT."; -print "// Please see util/x86simdgen/generate.pl"; -print '#include "qsimd_p.h"'; -print ""; +print q{/**************************************************************************** +** +** Copyright (C) 2018 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +// This is a generated file. DO NOT EDIT. +// Please see util/x86simdgen/generate.pl"; +#include "qsimd_p.h" +}; # Now generate the string table and bit-location array my $offset = 0; -- cgit v1.2.3