summaryrefslogtreecommitdiffstats
path: root/src/corelib/global/qsimd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/global/qsimd.cpp')
-rw-r--r--src/corelib/global/qsimd.cpp220
1 files changed, 106 insertions, 114 deletions
diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp
index 025c50b6e8..8bc5381591 100644
--- a/src/corelib/global/qsimd.cpp
+++ b/src/corelib/global/qsimd.cpp
@@ -1,60 +1,34 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2019 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2021 The Qt Company Ltd.
+// Copyright (C) 2022 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
// we need ICC to define the prototype for _rdseed64_step
#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
+#undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline
#include "qsimd_p.h"
#include "qalgorithms.h"
-#include <QByteArray>
+
#include <stdio.h>
+#include <string.h>
+
+#if defined(QT_NO_DEBUG) && !defined(NDEBUG)
+# define NDEBUG
+#endif
+#include <assert.h>
#ifdef Q_OS_LINUX
# include "../testlib/3rdparty/valgrind_p.h"
#endif
+#define QT_FUNCTION_TARGET_BASELINE
+
#if defined(Q_OS_WIN)
# if !defined(Q_CC_GNU)
# include <intrin.h>
# endif
-# if defined(Q_PROCESSOR_ARM64)
+# if defined(Q_PROCESSOR_ARM_64)
+# include <qt_windows.h>
# include <processthreadsapi.h>
# endif
#elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32)
@@ -88,6 +62,14 @@
QT_BEGIN_NAMESPACE
+template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE
+uint arraysize(T (&)[N])
+{
+ // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE,
+ // otherwise some versions of GCC fail to compile.
+ return N;
+}
+
#if defined(Q_PROCESSOR_ARM)
/* Data:
neon
@@ -117,16 +99,11 @@ static const int features_indices[] = {
# include "qsimd_x86.cpp" // generated by util/x86simdgen
#else
static const char features_string[] = "";
-static const int features_indices[] = { };
+static const int features_indices[] = { 0 };
#endif
// end generated
-#if defined (Q_OS_NACL)
-static inline uint detectProcessorFeatures()
-{
- return 0;
-}
-#elif defined(Q_PROCESSOR_ARM)
+#if defined(Q_PROCESSOR_ARM)
static inline quint64 detectProcessorFeatures()
{
quint64 features = 0;
@@ -166,7 +143,7 @@ static inline quint64 detectProcessorFeatures()
features |= CpuFeatureAES;
#endif
return features;
-#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM64)
+#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64)
features |= CpuFeatureNEON;
if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0)
features |= CpuFeatureCRC32;
@@ -194,9 +171,23 @@ static inline quint64 detectProcessorFeatures()
#else
# define PICreg "%%rbx"
#endif
+#ifdef __SSE2_MATH__
+# define X86_BASELINE "no-sse3"
+#else
+# define X86_BASELINE "no-sse"
+#endif
+
+#if defined(Q_CC_GNU)
+// lower the target for functions in this file
+# undef QT_FUNCTION_TARGET_BASELINE
+# define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE)))
+# define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND \
+ X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND
+#endif
static bool checkRdrndWorks() noexcept;
+QT_FUNCTION_TARGET_BASELINE
static int maxBasicCpuidSupported()
{
#if defined(Q_CC_EMSCRIPTEN)
@@ -244,6 +235,7 @@ static int maxBasicCpuidSupported()
#endif
}
+QT_FUNCTION_TARGET_BASELINE
static void cpuidFeatures01(uint &ecx, uint &edx)
{
#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
@@ -273,6 +265,7 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
+QT_FUNCTION_TARGET_BASELINE
static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
{
#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
@@ -306,6 +299,7 @@ static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
#endif
}
+QT_FUNCTION_TARGET_BASELINE
#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
inline quint64 _xgetbv(__int64) { return 0; }
@@ -327,22 +321,7 @@ static void xgetbv(uint in, uint &eax, uint &edx)
#endif
}
-// Flags from the XCR0 state register
-enum XCR0Flags {
- X87 = 1 << 0,
- XMM0_15 = 1 << 1,
- YMM0_15Hi128 = 1 << 2,
- BNDRegs = 1 << 3,
- BNDCSR = 1 << 4,
- OpMask = 1 << 5,
- ZMM0_15Hi256 = 1 << 6,
- ZMM16_31 = 1 << 7,
-
- SSEState = XMM0_15,
- AVXState = XMM0_15 | YMM0_15Hi128,
- AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
-};
-
+QT_FUNCTION_TARGET_BASELINE
static quint64 adjustedXcr0(quint64 xcr0)
{
/*
@@ -362,59 +341,54 @@ static quint64 adjustedXcr0(quint64 xcr0)
constexpr quintptr cpu_capabilities64 = commpage + 0x10;
quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
if (capab & kHasAVX512F)
- xcr0 |= AVX512State;
+ xcr0 |= XSave_Avx512State;
#endif
return xcr0;
}
+QT_FUNCTION_TARGET_BASELINE
static quint64 detectProcessorFeatures()
{
- static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
- static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
-
quint64 features = 0;
int cpuidLevel = maxBasicCpuidSupported();
#if Q_PROCESSOR_X86 < 5
if (cpuidLevel < 1)
return 0;
#else
- Q_ASSERT(cpuidLevel >= 1);
+ assert(cpuidLevel >= 1);
#endif
uint results[X86CpuidMaxLeaf] = {};
- cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
+ cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]);
if (cpuidLevel >= 7)
- cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
+ cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]);
// populate our feature list
- for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
+ for (uint i = 0; i < arraysize(x86_locators); ++i) {
uint word = x86_locators[i] / 32;
uint bit = 1U << (x86_locators[i] % 32);
- quint64 feature = Q_UINT64_C(1) << (i + 1);
+ quint64 feature = Q_UINT64_C(1) << i;
if (results[word] & bit)
features |= feature;
}
// now check the AVX state
quint64 xcr0 = 0;
- if (results[Leaf1ECX] & (1u << 27)) {
+ if (results[Leaf01ECX] & (1u << 27)) {
// XGETBV enabled
uint xgetbvA = 0, xgetbvD = 0;
xgetbv(0, xgetbvA, xgetbvD);
xcr0 = xgetbvA;
- if (sizeof(XCR0Flags) > sizeof(xgetbvA))
+ if (sizeof(XSaveBits) > sizeof(xgetbvA))
xcr0 |= quint64(xgetbvD) << 32;
xcr0 = adjustedXcr0(xcr0);
}
- if ((xcr0 & AVXState) != AVXState) {
- // support for YMM registers is disabled, disable all AVX
- features &= ~AllAVX;
- } else if ((xcr0 & AVX512State) != AVX512State) {
- // support for ZMM registers or mask registers is disabled, disable all AVX512
- features &= ~AllAVX512;
+ for (auto req : xsave_requirements) {
+ if ((xcr0 & req.xsave_state) != req.xsave_state)
+ features &= ~req.cpu_features;
}
if (features & CpuFeatureRDRND && !checkRdrndWorks())
@@ -577,32 +551,38 @@ static inline uint detectProcessorFeatures()
}
#endif
-static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
-
// record what CPU features were enabled by default in this Qt build
static const quint64 minFeature = qCompilerCpuFeatures;
-#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
-Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
-#else
-Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
-#endif
+static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1);
+Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 };
-quint64 qDetectCpuFeatures()
+QT_FUNCTION_TARGET_BASELINE
+uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)()
{
auto minFeatureTest = minFeature;
-#if defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64)
- // Yocto hard-codes CRC32+AES on. Since they are unlikely to be used
- // automatically by compilers, we can just add runtime check.
- minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32);
+#if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk)
+ // Controlflow Enforcement Technology (CET) is an OS-assisted
+ // hardware-feature, meaning the CPUID bit may be disabled if the OS
+ // doesn't support it, but that's ok.
+ minFeatureTest &= ~CpuFeatureSHSTK;
#endif
- quint64 f = detectProcessorFeatures();
- QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
- if (!disable.isEmpty()) {
- disable.prepend(' ');
- for (int i = 0; i < features_count; ++i) {
- if (disable.contains(features_string + features_indices[i]))
- f &= ~(Q_UINT64_C(1) << i);
+ QCpuFeatureType f = detectProcessorFeatures();
+
+ // Intentionally NOT qgetenv (this code runs too early)
+ if (char *disable = getenv("QT_NO_CPU_FEATURE"); disable && *disable) {
+#if _POSIX_C_SOURCE >= 200112L
+ char *saveptr = nullptr;
+ auto strtok = [&saveptr](char *str, const char *delim) {
+ return ::strtok_r(str, delim, &saveptr);
+ };
+#endif
+ while (char *token = strtok(disable, " ")) {
+ disable = nullptr;
+ for (uint i = 0; i < arraysize(features_indices); ++i) {
+ if (strcmp(token, features_string + features_indices[i]) == 0)
+ f &= ~(Q_UINT64_C(1) << i);
+ }
}
}
@@ -612,37 +592,36 @@ quint64 qDetectCpuFeatures()
bool runningOnValgrind = false;
#endif
if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) {
- quint64 missing = minFeatureTest & ~f;
+ quint64 missing = minFeatureTest & ~quint64(f);
fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
- for (int i = 0; i < features_count; ++i) {
+ for (uint i = 0; i < arraysize(features_indices); ++i) {
if (missing & (Q_UINT64_C(1) << i))
fprintf(stderr, "%s", features_string + features_indices[i]);
}
fprintf(stderr, "\n");
fflush(stderr);
- qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
- features_string + features_indices[qCountTrailingZeroBits(missing)]);
+ qAbort();
}
- qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
-#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
- qt_cpu_features[1].storeRelaxed(f >> 32);
-#endif
+ assert((f & SimdInitialized) == 0);
+ f |= SimdInitialized;
+ std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), f, std::memory_order_relaxed);
return f;
}
+QT_FUNCTION_TARGET_BASELINE
void qDumpCPUFeatures()
{
- quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
+ quint64 features = detectProcessorFeatures() & ~SimdInitialized;
printf("Processor features: ");
- for (int i = 0; i < features_count; ++i) {
+ for (uint i = 0; i < arraysize(features_indices); ++i) {
if (features & (Q_UINT64_C(1) << i))
printf("%s%s", features_string + features_indices[i],
minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
}
if ((features = (qCompilerCpuFeatures & ~features))) {
printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
- for (int i = 0; i < features_count; ++i) {
+ for (uint i = 0; i < arraysize(features_indices); ++i) {
if (features & (Q_UINT64_C(1) << i))
printf("%s", features_string + features_indices[i]);
}
@@ -723,7 +702,8 @@ out:
return ptr;
}
-static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept
+QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION
+static bool checkRdrndWorks() noexcept
{
/*
* Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
@@ -776,8 +756,20 @@ QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) no
ptr = qt_random_rdrnd(ptr, end);
return ptr - reinterpret_cast<unsigned *>(buffer);
}
-#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM)
+#elif defined(Q_PROCESSOR_X86) && !defined(Q_PROCESSOR_ARM)
static bool checkRdrndWorks() noexcept { return false; }
#endif // Q_PROCESSOR_X86 && RDRND
+#if QT_SUPPORTS_INIT_PRIORITY
+namespace {
+struct QSimdInitializer
+{
+ inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); }
+};
+}
+
+// This is intentionally a dynamic initialization of the variable
+Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer;
+#endif
+
QT_END_NAMESPACE