diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2022-01-28 12:13:54 -0800 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2022-02-17 17:02:18 -0800 |
commit | b7c37e5978f8bad0e223eac50f4b950c54248b6e (patch) | |
tree | 10a06728c274ffaf95f2bc016c46db2a4e0350a4 /src/corelib/global | |
parent | b2a9646be9c3441a908a8060ad1e5b7cdab0dafe (diff) |
qsimd: Initialize the current state as soon as QtCore loads
Instead of lazily initializing. We usually don't do this in Qt, but in
this case there are two very good reasons for it:
1) the call tp qCpuFeatures() was not trivial, with the need to preserve
a bit of state in the caller function across the call. GCC appeared
to generate better code than Clang in this regard, but it still
implied more cost than we'd like to do runtime detection in
performance-sensitive places in Qt.
2) the early initialization allows us to use the detected state in GNU
indirect functions on platforms that support it.
In order to do this, I had to rewrite the QT_NO_CPU_FEATURE environment
variable parsing without QByteArray and instead rely on string.h's
strtok().
This can't be done for static Qt builds on platforms that don't support
the GNU init_priority variable attribute or the MSVC equivalent[1],
because otherwise we can't guarantee that this bit of code runs before
everything else in Qt. For those platforms, we keep the existing lazy
initialization.
For shared builds in those platforms, we can use the dynamic
initialization. All known linkers will sort the static initialization
code in the order in which the .cpp are linked into the library.
The x86 QSimdInitialized variable will be removed in the next commit.
[1] https://docs.microsoft.com/en-us/cpp/preprocessor/init-seg?view=msvc-170
Change-Id: I6fcda969a9e9427198bffffd16ce885a27f6b9e2
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/corelib/global')
-rw-r--r-- | src/corelib/global/qglobal_p.h | 19 | ||||
-rw-r--r-- | src/corelib/global/qsimd.cpp | 61 | ||||
-rw-r--r-- | src/corelib/global/qsimd_p.h | 9 |
3 files changed, 65 insertions, 24 deletions
diff --git a/src/corelib/global/qglobal_p.h b/src/corelib/global/qglobal_p.h index 49927ef30f..df4410ebde 100644 --- a/src/corelib/global/qglobal_p.h +++ b/src/corelib/global/qglobal_p.h @@ -68,6 +68,25 @@ QT_BEGIN_NAMESPACE +#if defined(Q_CC_MSVC) +# define QT_SUPPORTS_INIT_PRIORITY 1 +// warning C4075: initializers put in unrecognized initialization area +# define Q_DECL_INIT_PRIORITY(nn) \ + __pragma(warning(disable: 4075)) \ + __pragma(init_seg(".CRT$QT" QT_STRINGIFY(nn))) Q_DECL_UNUSED +#elif defined(Q_OS_WIN) || defined(Q_OF_ELF) +# define QT_SUPPORTS_INIT_PRIORITY 1 +// priorities 0 to 1000 are reserved to the runtime; +// we use above 2000 in case someone REALLY needs to go before us +# define Q_DECL_INIT_PRIORITY(nn) __attribute__((init_priority(2000 + nn), used)) +#elif defined(QT_SHARED) +// it doesn't support this exactly, but we can work around it +# define QT_SUPPORTS_INIT_PRIORITY -1 +# define Q_DECL_INIT_PRIORITY(nn) Q_DECL_UNUSED +#else +# define QT_SUPPORTS_INIT_PRIORITY 0 +#endif + // These behave as if they consult the environment, so need to share its locking: Q_CORE_EXPORT void qTzSet(); Q_CORE_EXPORT time_t qMkTime(struct tm *when); diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp index 8ff98ec7cd..fbfabee27c 100644 --- a/src/corelib/global/qsimd.cpp +++ b/src/corelib/global/qsimd.cpp @@ -43,8 +43,13 @@ #include "qsimd_p.h" #include "qalgorithms.h" -#include <QByteArray> #include <stdio.h> +#include <string.h> + +#if defined(QT_NO_DEBUG) && !defined(NDEBUG) +# define NDEBUG +#endif +#include <assert.h> #ifdef Q_OS_LINUX # include "../testlib/3rdparty/valgrind_p.h" @@ -118,7 +123,7 @@ static const int features_indices[] = { # include "qsimd_x86.cpp" // generated by util/x86simdgen #else static const char features_string[] = ""; -static const int features_indices[] = { }; +static const int features_indices[] = { 0 }; #endif // end generated @@ -380,7 +385,7 @@ static quint64 detectProcessorFeatures() if (cpuidLevel < 1) return 0; #else - Q_ASSERT(cpuidLevel >= 1); + assert(cpuidLevel >= 1); #endif uint results[X86CpuidMaxLeaf] = {}; @@ -578,11 +583,10 @@ static inline uint detectProcessorFeatures() } #endif -static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); - // record what CPU features were enabled by default in this Qt build static const quint64 minFeature = qCompilerCpuFeatures; +static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1); QBasicAtomicInteger<QCpuFeatureType> qt_cpu_features[1] = { 0 }; quint64 qDetectCpuFeatures() @@ -594,12 +598,21 @@ quint64 qDetectCpuFeatures() minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32); #endif quint64 f = detectProcessorFeatures(); - QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); - if (!disable.isEmpty()) { - disable.prepend(' '); - for (int i = 0; i < features_count; ++i) { - if (disable.contains(features_string + features_indices[i])) - f &= ~(Q_UINT64_C(1) << i); + + // Intentionally NOT qgetenv (this code runs too early) + if (char *disable = getenv("QT_NO_CPU_FEATURE"); disable && *disable) { +#if _POSIX_C_SOURCE >= 200112L + char *saveptr = nullptr; + auto strtok = [&saveptr](char *str, const char *delim) { + return ::strtok_r(str, delim, &saveptr); + }; +#endif + while (char *token = strtok(disable, " ")) { + disable = nullptr; + for (uint i = 0; i < std::size(features_indices); ++i) { + if (strcmp(token, features_string + features_indices[i]) == 0) + f &= ~(Q_UINT64_C(1) << i); + } } } @@ -611,32 +624,32 @@ quint64 qDetectCpuFeatures() if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) { quint64 missing = minFeatureTest & ~f; fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n "); - for (int i = 0; i < features_count; ++i) { + for (uint i = 0; i < std::size(features_indices); ++i) { if (missing & (Q_UINT64_C(1) << i)) fprintf(stderr, "%s", features_string + features_indices[i]); } fprintf(stderr, "\n"); fflush(stderr); - qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing, - features_string + features_indices[qCountTrailingZeroBits(missing)]); + qAbort(); } - qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized)); + assert((f & SimdInitialized) == 0); + qt_cpu_features[0].storeRelease(f | SimdInitialized); return f; } void qDumpCPUFeatures() { - quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized); + quint64 features = detectProcessorFeatures() & ~SimdInitialized; printf("Processor features: "); - for (int i = 0; i < features_count; ++i) { + for (uint i = 0; i < std::size(features_indices); ++i) { if (features & (Q_UINT64_C(1) << i)) printf("%s%s", features_string + features_indices[i], minFeature & (Q_UINT64_C(1) << i) ? "[required]" : ""); } if ((features = (qCompilerCpuFeatures & ~features))) { printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:"); - for (int i = 0; i < features_count; ++i) { + for (uint i = 0; i < std::size(features_indices); ++i) { if (features & (Q_UINT64_C(1) << i)) printf("%s", features_string + features_indices[i]); } @@ -774,4 +787,16 @@ QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) no static bool checkRdrndWorks() noexcept { return false; } #endif // Q_PROCESSOR_X86 && RDRND +#if QT_SUPPORTS_INIT_PRIORITY +namespace { +struct QSimdInitializer +{ + inline QSimdInitializer() { qDetectCpuFeatures(); } +}; +} + +// This is intentionally a dynamic initialization of the variable +Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer; +#endif + QT_END_NAMESPACE diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 21eaeeef93..775aa2c6cb 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -337,9 +337,6 @@ enum CPUFeatures { CpuFeatureDSP = 2, CpuFeatureDSPR2 = 4, #endif - - // used only to indicate that the CPU detection was initialised - QSimdInitialized = 1 }; static const quint64 qCompilerCpuFeatures = 0 @@ -381,9 +378,9 @@ static inline qsizetype qRandomCpu(void *, qsizetype) noexcept static inline quint64 qCpuFeatures() { quint64 features = qt_cpu_features[0].loadRelaxed(); - if (Q_UNLIKELY(features == 0)) { - features = qDetectCpuFeatures(); - Q_ASSUME(features != 0); + if constexpr (!QT_SUPPORTS_INIT_PRIORITY) { + if (Q_UNLIKELY(features == 0)) + features = qDetectCpuFeatures(); } return features; } |