diff options
Diffstat (limited to 'src/corelib/global/qsimd.cpp')
-rw-r--r-- | src/corelib/global/qsimd.cpp | 775 |
1 files changed, 775 insertions, 0 deletions
diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp new file mode 100644 index 0000000000..8bc5381591 --- /dev/null +++ b/src/corelib/global/qsimd.cpp @@ -0,0 +1,775 @@ +// Copyright (C) 2021 The Qt Company Ltd. +// Copyright (C) 2022 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +// we need ICC to define the prototype for _rdseed64_step +#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES +#undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline + +#include "qsimd_p.h" +#include "qalgorithms.h" + +#include <stdio.h> +#include <string.h> + +#if defined(QT_NO_DEBUG) && !defined(NDEBUG) +# define NDEBUG +#endif +#include <assert.h> + +#ifdef Q_OS_LINUX +# include "../testlib/3rdparty/valgrind_p.h" +#endif + +#define QT_FUNCTION_TARGET_BASELINE + +#if defined(Q_OS_WIN) +# if !defined(Q_CC_GNU) +# include <intrin.h> +# endif +# if defined(Q_PROCESSOR_ARM_64) +# include <qt_windows.h> +# include <processthreadsapi.h> +# endif +#elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32) +# include "private/qcore_unix_p.h" +#elif QT_CONFIG(getauxval) && defined(Q_PROCESSOR_ARM) +# include <sys/auxv.h> + +// the kernel header definitions for HWCAP_* +// (the ones we need/may need anyway) + +// copied from <asm/hwcap.h> (ARM) +#define HWCAP_NEON 4096 + +// copied from <asm/hwcap.h> (ARM): +#define HWCAP2_AES (1 << 0) +#define HWCAP2_CRC32 (1 << 4) + +// copied from <asm/hwcap.h> (Aarch64) +#define HWCAP_AES (1 << 3) +#define HWCAP_CRC32 (1 << 7) + +// copied from <linux/auxvec.h> +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ +#define AT_HWCAP2 26 /* extension of AT_HWCAP */ + +#elif defined(Q_CC_GHS) +# include <INTEGRITY_types.h> +#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM) +# include <sys/sysctl.h> +#endif + +QT_BEGIN_NAMESPACE + +template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE +uint arraysize(T (&)[N]) +{ + // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE, + // otherwise some versions of GCC fail to compile. + return N; +} + +#if defined(Q_PROCESSOR_ARM) +/* Data: + neon + crc32 + aes + */ +static const char features_string[] = + "\0" + " neon\0" + " crc32\0" + " aes\0"; +static const int features_indices[] = { 0, 1, 7, 14 }; +#elif defined(Q_PROCESSOR_MIPS) +/* Data: + dsp + dspr2 +*/ +static const char features_string[] = + "\0" + " dsp\0" + " dspr2\0"; + +static const int features_indices[] = { + 0, 1, 6 +}; +#elif defined(Q_PROCESSOR_X86) +# include "qsimd_x86.cpp" // generated by util/x86simdgen +#else +static const char features_string[] = ""; +static const int features_indices[] = { 0 }; +#endif +// end generated + +#if defined(Q_PROCESSOR_ARM) +static inline quint64 detectProcessorFeatures() +{ + quint64 features = 0; + +#if QT_CONFIG(getauxval) + unsigned long auxvHwCap = getauxval(AT_HWCAP); + if (auxvHwCap != 0) { +# if defined(Q_PROCESSOR_ARM_64) + // For Aarch64: + features |= CpuFeatureNEON; // NEON is always available + if (auxvHwCap & HWCAP_CRC32) + features |= CpuFeatureCRC32; + if (auxvHwCap & HWCAP_AES) + features |= CpuFeatureAES; +# else + // For ARM32: + if (auxvHwCap & HWCAP_NEON) + features |= CpuFeatureNEON; + auxvHwCap = getauxval(AT_HWCAP2); + if (auxvHwCap & HWCAP2_CRC32) + features |= CpuFeatureCRC32; + if (auxvHwCap & HWCAP2_AES) + features |= CpuFeatureAES; +# endif + return features; + } + // fall back to compile-time flags if getauxval failed +#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM) + unsigned feature; + size_t len = sizeof(feature); + if (sysctlbyname("hw.optional.neon", &feature, &len, nullptr, 0) == 0) + features |= feature ? CpuFeatureNEON : 0; + if (sysctlbyname("hw.optional.armv8_crc32", &feature, &len, nullptr, 0) == 0) + features |= feature ? CpuFeatureCRC32 : 0; + // There is currently no optional value for crypto/AES. +#if defined(__ARM_FEATURE_CRYPTO) + features |= CpuFeatureAES; +#endif + return features; +#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64) + features |= CpuFeatureNEON; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0) + features |= CpuFeatureCRC32; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0) + features |= CpuFeatureAES; + return features; +#endif +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + features |= CpuFeatureNEON; +#endif +#if defined(__ARM_FEATURE_CRC32) + features |= CpuFeatureCRC32; +#endif +#if defined(__ARM_FEATURE_CRYPTO) + features |= CpuFeatureAES; +#endif + + return features; +} + +#elif defined(Q_PROCESSOR_X86) + +#ifdef Q_PROCESSOR_X86_32 +# define PICreg "%%ebx" +#else +# define PICreg "%%rbx" +#endif +#ifdef __SSE2_MATH__ +# define X86_BASELINE "no-sse3" +#else +# define X86_BASELINE "no-sse" +#endif + +#if defined(Q_CC_GNU) +// lower the target for functions in this file +# undef QT_FUNCTION_TARGET_BASELINE +# define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE))) +# define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND \ + X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND +#endif + +static bool checkRdrndWorks() noexcept; + +QT_FUNCTION_TARGET_BASELINE +static int maxBasicCpuidSupported() +{ +#if defined(Q_CC_EMSCRIPTEN) + return 6; // All features supported by Emscripten +#elif defined(Q_CC_GNU) + qregisterint tmp1; + +# if Q_PROCESSOR_X86 < 5 + // check if the CPUID instruction is supported + long cpuid_supported; + asm ("pushf\n" + "pop %0\n" + "mov %0, %1\n" + "xor $0x00200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "pop %0\n" + "xor %1, %0\n" // %eax is now 0 if CPUID is not supported + : "=a" (cpuid_supported), "=r" (tmp1) + ); + if (!cpuid_supported) + return 0; +# endif + + int result; + asm ("xchg " PICreg", %1\n" + "cpuid\n" + "xchg " PICreg", %1\n" + : "=&a" (result), "=&r" (tmp1) + : "0" (0) + : "ecx", "edx"); + return result; +#elif defined(Q_OS_WIN) + // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 + int info[4]; + __cpuid(info, 0); + return info[0]; +#elif defined(Q_CC_GHS) + unsigned int info[4]; + __CPUID(0, info); + return info[0]; +#else + return 0; +#endif +} + +QT_FUNCTION_TARGET_BASELINE +static void cpuidFeatures01(uint &ecx, uint &edx) +{ +#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) + qregisterint tmp1; + asm ("xchg " PICreg", %2\n" + "cpuid\n" + "xchg " PICreg", %2\n" + : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) + : "a" (1)); +#elif defined(Q_OS_WIN) + int info[4]; + __cpuid(info, 1); + ecx = info[2]; + edx = info[3]; +#elif defined(Q_CC_GHS) + unsigned int info[4]; + __CPUID(1, info); + ecx = info[2]; + edx = info[3]; +#else + Q_UNUSED(ecx); + Q_UNUSED(edx); +#endif +} + +#ifdef Q_OS_WIN +inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} +#endif + +QT_FUNCTION_TARGET_BASELINE +static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) +{ +#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) + qregisteruint rbx; // in case it's 64-bit + qregisteruint rcx = 0; + qregisteruint rdx = 0; + asm ("xchg " PICreg", %0\n" + "cpuid\n" + "xchg " PICreg", %0\n" + : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) + : "a" (7)); + ebx = rbx; + ecx = rcx; + edx = rdx; +#elif defined(Q_OS_WIN) + int info[4]; + __cpuidex(info, 7, 0); + ebx = info[1]; + ecx = info[2]; + edx = info[3]; +#elif defined(Q_CC_GHS) + unsigned int info[4]; + __CPUIDEX(7, 0, info); + ebx = info[1]; + ecx = info[2]; + edx = info[3]; +#else + Q_UNUSED(ebx); + Q_UNUSED(ecx); + Q_UNUSED(edx); +#endif +} + +QT_FUNCTION_TARGET_BASELINE +#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) +// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); +inline quint64 _xgetbv(__int64) { return 0; } +#endif +static void xgetbv(uint in, uint &eax, uint &edx) +{ +#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) + asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction + : "=a" (eax), "=d" (edx) + : "c" (in)); +#elif defined(Q_OS_WIN) + quint64 result = _xgetbv(in); + eax = result; + edx = result >> 32; +#else + Q_UNUSED(in); + Q_UNUSED(eax); + Q_UNUSED(edx); +#endif +} + +QT_FUNCTION_TARGET_BASELINE +static quint64 adjustedXcr0(quint64 xcr0) +{ + /* + * Some OSes hide their capability of context-switching the AVX512 state in + * the XCR0 register. They do that so the first time we execute an + * instruction that may access the AVX512 state (requiring the EVEX prefix) + * they allocate the necessary context switch space. + * + * This behavior is deprecated with the XFD (Extended Feature Disable) + * register, but we can't change existing OSes. + */ +#ifdef Q_OS_DARWIN + // from <machine/cpu_capabilities.h> in xnu + // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h> + constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000); + constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000; + constexpr quintptr cpu_capabilities64 = commpage + 0x10; + quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64); + if (capab & kHasAVX512F) + xcr0 |= XSave_Avx512State; +#endif + + return xcr0; +} + +QT_FUNCTION_TARGET_BASELINE +static quint64 detectProcessorFeatures() +{ + quint64 features = 0; + int cpuidLevel = maxBasicCpuidSupported(); +#if Q_PROCESSOR_X86 < 5 + if (cpuidLevel < 1) + return 0; +#else + assert(cpuidLevel >= 1); +#endif + + uint results[X86CpuidMaxLeaf] = {}; + cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]); + if (cpuidLevel >= 7) + cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]); + + // populate our feature list + for (uint i = 0; i < arraysize(x86_locators); ++i) { + uint word = x86_locators[i] / 32; + uint bit = 1U << (x86_locators[i] % 32); + quint64 feature = Q_UINT64_C(1) << i; + if (results[word] & bit) + features |= feature; + } + + // now check the AVX state + quint64 xcr0 = 0; + if (results[Leaf01ECX] & (1u << 27)) { + // XGETBV enabled + uint xgetbvA = 0, xgetbvD = 0; + xgetbv(0, xgetbvA, xgetbvD); + + xcr0 = xgetbvA; + if (sizeof(XSaveBits) > sizeof(xgetbvA)) + xcr0 |= quint64(xgetbvD) << 32; + xcr0 = adjustedXcr0(xcr0); + } + + for (auto req : xsave_requirements) { + if ((xcr0 & req.xsave_state) != req.xsave_state) + features &= ~req.cpu_features; + } + + if (features & CpuFeatureRDRND && !checkRdrndWorks()) + features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED); + + return features; +} + +#elif defined(Q_PROCESSOR_MIPS_32) + +#if defined(Q_OS_LINUX) +// +// Do not use QByteArray: it could use SIMD instructions itself at +// some point, thus creating a recursive dependency. Instead, use a +// QSimpleBuffer, which has the bare minimum needed to use memory +// dynamically and read lines from /proc/cpuinfo of arbitrary sizes. +// +struct QSimpleBuffer +{ + static const int chunk_size = 256; + char *data; + unsigned alloc; + unsigned size; + + QSimpleBuffer() : data(nullptr), alloc(0), size(0) { } + ~QSimpleBuffer() { ::free(data); } + + void resize(unsigned newsize) + { + if (newsize > alloc) { + unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); + if (newalloc < newsize) + newalloc = newsize; + if (newalloc != alloc) { + data = static_cast<char *>(::realloc(data, newalloc)); + alloc = newalloc; + } + } + size = newsize; + } + void append(const QSimpleBuffer &other, unsigned appendsize) + { + unsigned oldsize = size; + resize(oldsize + appendsize); + ::memcpy(data + oldsize, other.data, appendsize); + } + void popleft(unsigned amount) + { + if (amount >= size) + return resize(0); + size -= amount; + ::memmove(data, data + amount, size); + } + char *cString() + { + if (!alloc) + resize(1); + return (data[size] = '\0', data); + } +}; + +// +// Uses a scratch "buffer" (which must be used for all reads done in the +// same file descriptor) to read chunks of data from a file, to read +// one line at a time. Lines include the trailing newline character ('\n'). +// On EOF, line.size is zero. +// +static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) +{ + for (;;) { + char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size)); + if (newline) { + unsigned piece_size = newline - buffer.data + 1; + line.append(buffer, piece_size); + buffer.popleft(piece_size); + line.resize(line.size - 1); + return; + } + if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { + int oldsize = buffer.size; + buffer.resize(buffer.size + QSimpleBuffer::chunk_size); + buffer.size = oldsize; + } + ssize_t read_bytes = + ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); + if (read_bytes > 0) + buffer.size += read_bytes; + else + return; + } +} + +// +// Checks if any line with a given prefix from /proc/cpuinfo contains +// a certain string, surrounded by spaces. +// +static bool procCpuinfoContains(const char *prefix, const char *string) +{ + int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY); + if (cpuinfo_fd == -1) + return false; + + unsigned string_len = ::strlen(string); + unsigned prefix_len = ::strlen(prefix); + QSimpleBuffer line, buffer; + bool present = false; + do { + line.resize(0); + bufReadLine(cpuinfo_fd, line, buffer); + char *colon = static_cast<char *>(::memchr(line.data, ':', line.size)); + if (colon && line.size > prefix_len + string_len) { + if (!::strncmp(prefix, line.data, prefix_len)) { + // prefix matches, next character must be ':' or space + if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { + // Does it contain the string? + char *found = ::strstr(line.cString(), string); + if (found && ::isspace(found[-1]) && + (::isspace(found[string_len]) || found[string_len] == '\0')) { + present = true; + break; + } + } + } + } + } while (line.size); + + ::qt_safe_close(cpuinfo_fd); + return present; +} +#endif + +static inline quint64 detectProcessorFeatures() +{ + // NOTE: MIPS 74K cores are the only ones supporting DSPr2. + quint64 flags = 0; + +#if defined __mips_dsp + flags |= CpuFeatureDSP; +# if defined __mips_dsp_rev && __mips_dsp_rev >= 2 + flags |= CpuFeatureDSPR2; +# elif defined(Q_OS_LINUX) + if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) + flags |= CpuFeatureDSPR2; +# endif +#elif defined(Q_OS_LINUX) + if (procCpuinfoContains("ASEs implemented", "dsp")) { + flags |= CpuFeatureDSP; + if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) + flags |= CpuFeatureDSPR2; + } +#endif + + return flags; +} + +#else +static inline uint detectProcessorFeatures() +{ + return 0; +} +#endif + +// record what CPU features were enabled by default in this Qt build +static const quint64 minFeature = qCompilerCpuFeatures; + +static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1); +Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 }; + +QT_FUNCTION_TARGET_BASELINE +uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)() +{ + auto minFeatureTest = minFeature; +#if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk) + // Controlflow Enforcement Technology (CET) is an OS-assisted + // hardware-feature, meaning the CPUID bit may be disabled if the OS + // doesn't support it, but that's ok. + minFeatureTest &= ~CpuFeatureSHSTK; +#endif + QCpuFeatureType f = detectProcessorFeatures(); + + // Intentionally NOT qgetenv (this code runs too early) + if (char *disable = getenv("QT_NO_CPU_FEATURE"); disable && *disable) { +#if _POSIX_C_SOURCE >= 200112L + char *saveptr = nullptr; + auto strtok = [&saveptr](char *str, const char *delim) { + return ::strtok_r(str, delim, &saveptr); + }; +#endif + while (char *token = strtok(disable, " ")) { + disable = nullptr; + for (uint i = 0; i < arraysize(features_indices); ++i) { + if (strcmp(token, features_string + features_indices[i]) == 0) + f &= ~(Q_UINT64_C(1) << i); + } + } + } + +#ifdef RUNNING_ON_VALGRIND + bool runningOnValgrind = RUNNING_ON_VALGRIND; +#else + bool runningOnValgrind = false; +#endif + if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) { + quint64 missing = minFeatureTest & ~quint64(f); + fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n "); + for (uint i = 0; i < arraysize(features_indices); ++i) { + if (missing & (Q_UINT64_C(1) << i)) + fprintf(stderr, "%s", features_string + features_indices[i]); + } + fprintf(stderr, "\n"); + fflush(stderr); + qAbort(); + } + + assert((f & SimdInitialized) == 0); + f |= SimdInitialized; + std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), f, std::memory_order_relaxed); + return f; +} + +QT_FUNCTION_TARGET_BASELINE +void qDumpCPUFeatures() +{ + quint64 features = detectProcessorFeatures() & ~SimdInitialized; + printf("Processor features: "); + for (uint i = 0; i < arraysize(features_indices); ++i) { + if (features & (Q_UINT64_C(1) << i)) + printf("%s%s", features_string + features_indices[i], + minFeature & (Q_UINT64_C(1) << i) ? "[required]" : ""); + } + if ((features = (qCompilerCpuFeatures & ~features))) { + printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:"); + for (uint i = 0; i < arraysize(features_indices); ++i) { + if (features & (Q_UINT64_C(1) << i)) + printf("%s", features_string + features_indices[i]); + } + printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!"); + } + puts(""); +} + +#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) + +# ifdef Q_PROCESSOR_X86_64 +# define _rdrandXX_step _rdrand64_step +# define _rdseedXX_step _rdseed64_step +# else +# define _rdrandXX_step _rdrand32_step +# define _rdseedXX_step _rdseed32_step +# endif + +// The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for +// Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long +// long on Windows, but unsigned long on Linux. +namespace { +template <typename F> struct ExtractParameter; +template <typename T> struct ExtractParameter<int (T *)> { using Type = T; }; +using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type; +} + +# if QT_COMPILER_SUPPORTS_HERE(RDSEED) +static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept +{ + // Unlike for the RDRAND code below, the Intel whitepaper describing the + // use of the RDSEED instruction indicates we should not retry in a loop. + // If the independent bit generator used by RDSEED is out of entropy, it + // may take time to replenish. + // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide + while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) { + if (_rdseedXX_step(reinterpret_cast<randuint *>(ptr)) == 0) + goto out; + ptr += sizeof(randuint) / sizeof(*ptr); + } + + if (sizeof(*ptr) != sizeof(randuint) && ptr != end) { + if (_rdseed32_step(ptr) == 0) + goto out; + ++ptr; + } + +out: + return ptr; +} +# else +static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *) +{ + return ptr; +} +# endif + +static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept +{ + int retries = 10; + while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) { + if (_rdrandXX_step(reinterpret_cast<randuint *>(ptr))) + ptr += sizeof(randuint)/sizeof(*ptr); + else if (--retries == 0) + goto out; + } + + while (sizeof(*ptr) != sizeof(randuint) && ptr != end) { + bool ok = _rdrand32_step(ptr); + if (!ok && --retries) + continue; + if (ok) + ++ptr; + break; + } + +out: + return ptr; +} + +QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION +static bool checkRdrndWorks() noexcept +{ + /* + * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a + * failing random generation instruction, which always returns + * 0xffffffff, even when generation was "successful". + * + * This code checks if hardware random generator generates four consecutive + * equal numbers. If it does, then we probably have a failing one and + * should disable it completely. + * + * https://bugreports.qt.io/browse/QTBUG-69423 + */ + constexpr qsizetype TestBufferSize = 4; + unsigned testBuffer[TestBufferSize] = {}; + + unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize); + if (end < testBuffer + 3) { + // Random generation didn't produce enough data for us to make a + // determination whether it's working or not. Assume it isn't, but + // don't print a warning. + return false; + } + + // Check the results for equality + if (testBuffer[0] == testBuffer[1] + && testBuffer[0] == testBuffer[2] + && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) { + fprintf(stderr, "WARNING: CPU random generator seem to be failing, " + "disabling hardware random number generation\n" + "WARNING: RDRND generated:"); + for (unsigned *ptr = testBuffer; ptr < end; ++ptr) + fprintf(stderr, " 0x%x", *ptr); + fprintf(stderr, "\n"); + return false; + } + + // We're good + return true; +} + +QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept +{ + unsigned *ptr = reinterpret_cast<unsigned *>(buffer); + unsigned *end = ptr + count; + + if (qCpuHasFeature(RDSEED)) + ptr = qt_random_rdseed(ptr, end); + + // fill the buffer with RDRND if RDSEED didn't + ptr = qt_random_rdrnd(ptr, end); + return ptr - reinterpret_cast<unsigned *>(buffer); +} +#elif defined(Q_PROCESSOR_X86) && !defined(Q_PROCESSOR_ARM) +static bool checkRdrndWorks() noexcept { return false; } +#endif // Q_PROCESSOR_X86 && RDRND + +#if QT_SUPPORTS_INIT_PRIORITY +namespace { +struct QSimdInitializer +{ + inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); } +}; +} + +// This is intentionally a dynamic initialization of the variable +Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer; +#endif + +QT_END_NAMESPACE |