summaryrefslogtreecommitdiffstats
path: root/src/corelib/global
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2022-01-28 12:13:54 -0800
committerThiago Macieira <thiago.macieira@intel.com>2022-02-17 17:02:18 -0800
commitb7c37e5978f8bad0e223eac50f4b950c54248b6e (patch)
tree10a06728c274ffaf95f2bc016c46db2a4e0350a4 /src/corelib/global
parentb2a9646be9c3441a908a8060ad1e5b7cdab0dafe (diff)
qsimd: Initialize the current state as soon as QtCore loads
Instead of lazily initializing. We usually don't do this in Qt, but in this case there are two very good reasons for it: 1) the call tp qCpuFeatures() was not trivial, with the need to preserve a bit of state in the caller function across the call. GCC appeared to generate better code than Clang in this regard, but it still implied more cost than we'd like to do runtime detection in performance-sensitive places in Qt. 2) the early initialization allows us to use the detected state in GNU indirect functions on platforms that support it. In order to do this, I had to rewrite the QT_NO_CPU_FEATURE environment variable parsing without QByteArray and instead rely on string.h's strtok(). This can't be done for static Qt builds on platforms that don't support the GNU init_priority variable attribute or the MSVC equivalent[1], because otherwise we can't guarantee that this bit of code runs before everything else in Qt. For those platforms, we keep the existing lazy initialization. For shared builds in those platforms, we can use the dynamic initialization. All known linkers will sort the static initialization code in the order in which the .cpp are linked into the library. The x86 QSimdInitialized variable will be removed in the next commit. [1] https://docs.microsoft.com/en-us/cpp/preprocessor/init-seg?view=msvc-170 Change-Id: I6fcda969a9e9427198bffffd16ce885a27f6b9e2 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/corelib/global')
-rw-r--r--src/corelib/global/qglobal_p.h19
-rw-r--r--src/corelib/global/qsimd.cpp61
-rw-r--r--src/corelib/global/qsimd_p.h9
3 files changed, 65 insertions, 24 deletions
diff --git a/src/corelib/global/qglobal_p.h b/src/corelib/global/qglobal_p.h
index 49927ef30f..df4410ebde 100644
--- a/src/corelib/global/qglobal_p.h
+++ b/src/corelib/global/qglobal_p.h
@@ -68,6 +68,25 @@
QT_BEGIN_NAMESPACE
+#if defined(Q_CC_MSVC)
+# define QT_SUPPORTS_INIT_PRIORITY 1
+// warning C4075: initializers put in unrecognized initialization area
+# define Q_DECL_INIT_PRIORITY(nn) \
+ __pragma(warning(disable: 4075)) \
+ __pragma(init_seg(".CRT$QT" QT_STRINGIFY(nn))) Q_DECL_UNUSED
+#elif defined(Q_OS_WIN) || defined(Q_OF_ELF)
+# define QT_SUPPORTS_INIT_PRIORITY 1
+// priorities 0 to 1000 are reserved to the runtime;
+// we use above 2000 in case someone REALLY needs to go before us
+# define Q_DECL_INIT_PRIORITY(nn) __attribute__((init_priority(2000 + nn), used))
+#elif defined(QT_SHARED)
+// it doesn't support this exactly, but we can work around it
+# define QT_SUPPORTS_INIT_PRIORITY -1
+# define Q_DECL_INIT_PRIORITY(nn) Q_DECL_UNUSED
+#else
+# define QT_SUPPORTS_INIT_PRIORITY 0
+#endif
+
// These behave as if they consult the environment, so need to share its locking:
Q_CORE_EXPORT void qTzSet();
Q_CORE_EXPORT time_t qMkTime(struct tm *when);
diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp
index 8ff98ec7cd..fbfabee27c 100644
--- a/src/corelib/global/qsimd.cpp
+++ b/src/corelib/global/qsimd.cpp
@@ -43,8 +43,13 @@
#include "qsimd_p.h"
#include "qalgorithms.h"
-#include <QByteArray>
#include <stdio.h>
+#include <string.h>
+
+#if defined(QT_NO_DEBUG) && !defined(NDEBUG)
+# define NDEBUG
+#endif
+#include <assert.h>
#ifdef Q_OS_LINUX
# include "../testlib/3rdparty/valgrind_p.h"
@@ -118,7 +123,7 @@ static const int features_indices[] = {
# include "qsimd_x86.cpp" // generated by util/x86simdgen
#else
static const char features_string[] = "";
-static const int features_indices[] = { };
+static const int features_indices[] = { 0 };
#endif
// end generated
@@ -380,7 +385,7 @@ static quint64 detectProcessorFeatures()
if (cpuidLevel < 1)
return 0;
#else
- Q_ASSERT(cpuidLevel >= 1);
+ assert(cpuidLevel >= 1);
#endif
uint results[X86CpuidMaxLeaf] = {};
@@ -578,11 +583,10 @@ static inline uint detectProcessorFeatures()
}
#endif
-static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
-
// record what CPU features were enabled by default in this Qt build
static const quint64 minFeature = qCompilerCpuFeatures;
+static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1);
QBasicAtomicInteger<QCpuFeatureType> qt_cpu_features[1] = { 0 };
quint64 qDetectCpuFeatures()
@@ -594,12 +598,21 @@ quint64 qDetectCpuFeatures()
minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32);
#endif
quint64 f = detectProcessorFeatures();
- QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
- if (!disable.isEmpty()) {
- disable.prepend(' ');
- for (int i = 0; i < features_count; ++i) {
- if (disable.contains(features_string + features_indices[i]))
- f &= ~(Q_UINT64_C(1) << i);
+
+ // Intentionally NOT qgetenv (this code runs too early)
+ if (char *disable = getenv("QT_NO_CPU_FEATURE"); disable && *disable) {
+#if _POSIX_C_SOURCE >= 200112L
+ char *saveptr = nullptr;
+ auto strtok = [&saveptr](char *str, const char *delim) {
+ return ::strtok_r(str, delim, &saveptr);
+ };
+#endif
+ while (char *token = strtok(disable, " ")) {
+ disable = nullptr;
+ for (uint i = 0; i < std::size(features_indices); ++i) {
+ if (strcmp(token, features_string + features_indices[i]) == 0)
+ f &= ~(Q_UINT64_C(1) << i);
+ }
}
}
@@ -611,32 +624,32 @@ quint64 qDetectCpuFeatures()
if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) {
quint64 missing = minFeatureTest & ~f;
fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
- for (int i = 0; i < features_count; ++i) {
+ for (uint i = 0; i < std::size(features_indices); ++i) {
if (missing & (Q_UINT64_C(1) << i))
fprintf(stderr, "%s", features_string + features_indices[i]);
}
fprintf(stderr, "\n");
fflush(stderr);
- qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
- features_string + features_indices[qCountTrailingZeroBits(missing)]);
+ qAbort();
}
- qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
+ assert((f & SimdInitialized) == 0);
+ qt_cpu_features[0].storeRelease(f | SimdInitialized);
return f;
}
void qDumpCPUFeatures()
{
- quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
+ quint64 features = detectProcessorFeatures() & ~SimdInitialized;
printf("Processor features: ");
- for (int i = 0; i < features_count; ++i) {
+ for (uint i = 0; i < std::size(features_indices); ++i) {
if (features & (Q_UINT64_C(1) << i))
printf("%s%s", features_string + features_indices[i],
minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
}
if ((features = (qCompilerCpuFeatures & ~features))) {
printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
- for (int i = 0; i < features_count; ++i) {
+ for (uint i = 0; i < std::size(features_indices); ++i) {
if (features & (Q_UINT64_C(1) << i))
printf("%s", features_string + features_indices[i]);
}
@@ -774,4 +787,16 @@ QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) no
static bool checkRdrndWorks() noexcept { return false; }
#endif // Q_PROCESSOR_X86 && RDRND
+#if QT_SUPPORTS_INIT_PRIORITY
+namespace {
+struct QSimdInitializer
+{
+ inline QSimdInitializer() { qDetectCpuFeatures(); }
+};
+}
+
+// This is intentionally a dynamic initialization of the variable
+Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer;
+#endif
+
QT_END_NAMESPACE
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h
index 21eaeeef93..775aa2c6cb 100644
--- a/src/corelib/global/qsimd_p.h
+++ b/src/corelib/global/qsimd_p.h
@@ -337,9 +337,6 @@ enum CPUFeatures {
CpuFeatureDSP = 2,
CpuFeatureDSPR2 = 4,
#endif
-
- // used only to indicate that the CPU detection was initialised
- QSimdInitialized = 1
};
static const quint64 qCompilerCpuFeatures = 0
@@ -381,9 +378,9 @@ static inline qsizetype qRandomCpu(void *, qsizetype) noexcept
static inline quint64 qCpuFeatures()
{
quint64 features = qt_cpu_features[0].loadRelaxed();
- if (Q_UNLIKELY(features == 0)) {
- features = qDetectCpuFeatures();
- Q_ASSUME(features != 0);
+ if constexpr (!QT_SUPPORTS_INIT_PRIORITY) {
+ if (Q_UNLIKELY(features == 0))
+ features = qDetectCpuFeatures();
}
return features;
}