diff options
Diffstat (limited to 'src/corelib/global/qsimd.cpp')
-rw-r--r-- | src/corelib/global/qsimd.cpp | 220 |
1 files changed, 106 insertions, 114 deletions
diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp index 025c50b6e8..8bc5381591 100644 --- a/src/corelib/global/qsimd.cpp +++ b/src/corelib/global/qsimd.cpp @@ -1,60 +1,34 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2019 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2021 The Qt Company Ltd. +// Copyright (C) 2022 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only // we need ICC to define the prototype for _rdseed64_step #define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES +#undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline #include "qsimd_p.h" #include "qalgorithms.h" -#include <QByteArray> + #include <stdio.h> +#include <string.h> + +#if defined(QT_NO_DEBUG) && !defined(NDEBUG) +# define NDEBUG +#endif +#include <assert.h> #ifdef Q_OS_LINUX # include "../testlib/3rdparty/valgrind_p.h" #endif +#define QT_FUNCTION_TARGET_BASELINE + #if defined(Q_OS_WIN) # if !defined(Q_CC_GNU) # include <intrin.h> # endif -# if defined(Q_PROCESSOR_ARM64) +# if defined(Q_PROCESSOR_ARM_64) +# include <qt_windows.h> # include <processthreadsapi.h> # endif #elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32) @@ -88,6 +62,14 @@ QT_BEGIN_NAMESPACE +template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE +uint arraysize(T (&)[N]) +{ + // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE, + // otherwise some versions of GCC fail to compile. + return N; +} + #if defined(Q_PROCESSOR_ARM) /* Data: neon @@ -117,16 +99,11 @@ static const int features_indices[] = { # include "qsimd_x86.cpp" // generated by util/x86simdgen #else static const char features_string[] = ""; -static const int features_indices[] = { }; +static const int features_indices[] = { 0 }; #endif // end generated -#if defined (Q_OS_NACL) -static inline uint detectProcessorFeatures() -{ - return 0; -} -#elif defined(Q_PROCESSOR_ARM) +#if defined(Q_PROCESSOR_ARM) static inline quint64 detectProcessorFeatures() { quint64 features = 0; @@ -166,7 +143,7 @@ static inline quint64 detectProcessorFeatures() features |= CpuFeatureAES; #endif return features; -#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM64) +#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64) features |= CpuFeatureNEON; if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0) features |= CpuFeatureCRC32; @@ -194,9 +171,23 @@ static inline quint64 detectProcessorFeatures() #else # define PICreg "%%rbx" #endif +#ifdef __SSE2_MATH__ +# define X86_BASELINE "no-sse3" +#else +# define X86_BASELINE "no-sse" +#endif + +#if defined(Q_CC_GNU) +// lower the target for functions in this file +# undef QT_FUNCTION_TARGET_BASELINE +# define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE))) +# define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND \ + X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND +#endif static bool checkRdrndWorks() noexcept; +QT_FUNCTION_TARGET_BASELINE static int maxBasicCpuidSupported() { #if defined(Q_CC_EMSCRIPTEN) @@ -244,6 +235,7 @@ static int maxBasicCpuidSupported() #endif } +QT_FUNCTION_TARGET_BASELINE static void cpuidFeatures01(uint &ecx, uint &edx) { #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) @@ -273,6 +265,7 @@ static void cpuidFeatures01(uint &ecx, uint &edx) inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} #endif +QT_FUNCTION_TARGET_BASELINE static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) { #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) @@ -306,6 +299,7 @@ static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) #endif } +QT_FUNCTION_TARGET_BASELINE #if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) // fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); inline quint64 _xgetbv(__int64) { return 0; } @@ -327,22 +321,7 @@ static void xgetbv(uint in, uint &eax, uint &edx) #endif } -// Flags from the XCR0 state register -enum XCR0Flags { - X87 = 1 << 0, - XMM0_15 = 1 << 1, - YMM0_15Hi128 = 1 << 2, - BNDRegs = 1 << 3, - BNDCSR = 1 << 4, - OpMask = 1 << 5, - ZMM0_15Hi256 = 1 << 6, - ZMM16_31 = 1 << 7, - - SSEState = XMM0_15, - AVXState = XMM0_15 | YMM0_15Hi128, - AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 -}; - +QT_FUNCTION_TARGET_BASELINE static quint64 adjustedXcr0(quint64 xcr0) { /* @@ -362,59 +341,54 @@ static quint64 adjustedXcr0(quint64 xcr0) constexpr quintptr cpu_capabilities64 = commpage + 0x10; quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64); if (capab & kHasAVX512F) - xcr0 |= AVX512State; + xcr0 |= XSave_Avx512State; #endif return xcr0; } +QT_FUNCTION_TARGET_BASELINE static quint64 detectProcessorFeatures() { - static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512; - static const quint64 AllAVX = CpuFeatureAVX | AllAVX2; - quint64 features = 0; int cpuidLevel = maxBasicCpuidSupported(); #if Q_PROCESSOR_X86 < 5 if (cpuidLevel < 1) return 0; #else - Q_ASSERT(cpuidLevel >= 1); + assert(cpuidLevel >= 1); #endif uint results[X86CpuidMaxLeaf] = {}; - cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); + cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]); if (cpuidLevel >= 7) - cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); + cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]); // populate our feature list - for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { + for (uint i = 0; i < arraysize(x86_locators); ++i) { uint word = x86_locators[i] / 32; uint bit = 1U << (x86_locators[i] % 32); - quint64 feature = Q_UINT64_C(1) << (i + 1); + quint64 feature = Q_UINT64_C(1) << i; if (results[word] & bit) features |= feature; } // now check the AVX state quint64 xcr0 = 0; - if (results[Leaf1ECX] & (1u << 27)) { + if (results[Leaf01ECX] & (1u << 27)) { // XGETBV enabled uint xgetbvA = 0, xgetbvD = 0; xgetbv(0, xgetbvA, xgetbvD); xcr0 = xgetbvA; - if (sizeof(XCR0Flags) > sizeof(xgetbvA)) + if (sizeof(XSaveBits) > sizeof(xgetbvA)) xcr0 |= quint64(xgetbvD) << 32; xcr0 = adjustedXcr0(xcr0); } - if ((xcr0 & AVXState) != AVXState) { - // support for YMM registers is disabled, disable all AVX - features &= ~AllAVX; - } else if ((xcr0 & AVX512State) != AVX512State) { - // support for ZMM registers or mask registers is disabled, disable all AVX512 - features &= ~AllAVX512; + for (auto req : xsave_requirements) { + if ((xcr0 & req.xsave_state) != req.xsave_state) + features &= ~req.cpu_features; } if (features & CpuFeatureRDRND && !checkRdrndWorks()) @@ -577,32 +551,38 @@ static inline uint detectProcessorFeatures() } #endif -static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); - // record what CPU features were enabled by default in this Qt build static const quint64 minFeature = qCompilerCpuFeatures; -#ifdef Q_ATOMIC_INT64_IS_SUPPORTED -Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) }; -#else -Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) }; -#endif +static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1); +Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 }; -quint64 qDetectCpuFeatures() +QT_FUNCTION_TARGET_BASELINE +uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)() { auto minFeatureTest = minFeature; -#if defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64) - // Yocto hard-codes CRC32+AES on. Since they are unlikely to be used - // automatically by compilers, we can just add runtime check. - minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32); +#if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk) + // Controlflow Enforcement Technology (CET) is an OS-assisted + // hardware-feature, meaning the CPUID bit may be disabled if the OS + // doesn't support it, but that's ok. + minFeatureTest &= ~CpuFeatureSHSTK; #endif - quint64 f = detectProcessorFeatures(); - QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); - if (!disable.isEmpty()) { - disable.prepend(' '); - for (int i = 0; i < features_count; ++i) { - if (disable.contains(features_string + features_indices[i])) - f &= ~(Q_UINT64_C(1) << i); + QCpuFeatureType f = detectProcessorFeatures(); + + // Intentionally NOT qgetenv (this code runs too early) + if (char *disable = getenv("QT_NO_CPU_FEATURE"); disable && *disable) { +#if _POSIX_C_SOURCE >= 200112L + char *saveptr = nullptr; + auto strtok = [&saveptr](char *str, const char *delim) { + return ::strtok_r(str, delim, &saveptr); + }; +#endif + while (char *token = strtok(disable, " ")) { + disable = nullptr; + for (uint i = 0; i < arraysize(features_indices); ++i) { + if (strcmp(token, features_string + features_indices[i]) == 0) + f &= ~(Q_UINT64_C(1) << i); + } } } @@ -612,37 +592,36 @@ quint64 qDetectCpuFeatures() bool runningOnValgrind = false; #endif if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) { - quint64 missing = minFeatureTest & ~f; + quint64 missing = minFeatureTest & ~quint64(f); fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n "); - for (int i = 0; i < features_count; ++i) { + for (uint i = 0; i < arraysize(features_indices); ++i) { if (missing & (Q_UINT64_C(1) << i)) fprintf(stderr, "%s", features_string + features_indices[i]); } fprintf(stderr, "\n"); fflush(stderr); - qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing, - features_string + features_indices[qCountTrailingZeroBits(missing)]); + qAbort(); } - qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized)); -#ifndef Q_ATOMIC_INT64_IS_SUPPORTED - qt_cpu_features[1].storeRelaxed(f >> 32); -#endif + assert((f & SimdInitialized) == 0); + f |= SimdInitialized; + std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), f, std::memory_order_relaxed); return f; } +QT_FUNCTION_TARGET_BASELINE void qDumpCPUFeatures() { - quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized); + quint64 features = detectProcessorFeatures() & ~SimdInitialized; printf("Processor features: "); - for (int i = 0; i < features_count; ++i) { + for (uint i = 0; i < arraysize(features_indices); ++i) { if (features & (Q_UINT64_C(1) << i)) printf("%s%s", features_string + features_indices[i], minFeature & (Q_UINT64_C(1) << i) ? "[required]" : ""); } if ((features = (qCompilerCpuFeatures & ~features))) { printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:"); - for (int i = 0; i < features_count; ++i) { + for (uint i = 0; i < arraysize(features_indices); ++i) { if (features & (Q_UINT64_C(1) << i)) printf("%s", features_string + features_indices[i]); } @@ -723,7 +702,8 @@ out: return ptr; } -static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept +QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION +static bool checkRdrndWorks() noexcept { /* * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a @@ -776,8 +756,20 @@ QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) no ptr = qt_random_rdrnd(ptr, end); return ptr - reinterpret_cast<unsigned *>(buffer); } -#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM) +#elif defined(Q_PROCESSOR_X86) && !defined(Q_PROCESSOR_ARM) static bool checkRdrndWorks() noexcept { return false; } #endif // Q_PROCESSOR_X86 && RDRND +#if QT_SUPPORTS_INIT_PRIORITY +namespace { +struct QSimdInitializer +{ + inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); } +}; +} + +// This is intentionally a dynamic initialization of the variable +Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer; +#endif + QT_END_NAMESPACE |