diff options
Diffstat (limited to 'src/corelib/tools/qsimd_p.h')
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 396 |
1 files changed, 0 insertions, 396 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h deleted file mode 100644 index 26e98c4542..0000000000 --- a/src/corelib/tools/qsimd_p.h +++ /dev/null @@ -1,396 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#ifndef QSIMD_P_H -#define QSIMD_P_H - -// -// W A R N I N G -// ------------- -// -// This file is not part of the Qt API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. -// - -#include <QtCore/private/qglobal_p.h> - -/* - * qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros. - * They mean the compiler supports the necessary flags and the headers - * for the x86 and ARM intrinsics: - * - GCC: the -mXXX or march=YYY flag is necessary before #include - * up to 4.8; GCC >= 4.9 can include unconditionally - * - Intel CC: #include can happen unconditionally - * - MSVC: #include can happen unconditionally - * - RVCT: ??? - * - * We will try to include all headers possible under this configuration. - * - * MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 & - * up do define __AVX__ if the -arch:AVX option is passed on the command-line. - * - * Supported XXX are: - * Flag | Arch | GCC | Intel CC | MSVC | - * ARM_NEON | ARM | I & C | None | ? | - * SSE2 | x86 | I & C | I & C | I & C | - * SSE3 | x86 | I & C | I & C | I only | - * SSSE3 | x86 | I & C | I & C | I only | - * SSE4_1 | x86 | I & C | I & C | I only | - * SSE4_2 | x86 | I & C | I & C | I only | - * AVX | x86 | I & C | I & C | I & C | - * AVX2 | x86 | I & C | I & C | I only | - * AVX512xx | x86 | I & C | I & C | I only | - * I = intrinsics; C = code generation - * - * Code can use the following constructs to determine compiler support & status: - * - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__) - * If this test passes, then the compiler is already generating code for that - * given sub-architecture. The intrinsics for that sub-architecture are - * #included and can be used without restriction or runtime check. - * - * - #if QT_COMPILER_SUPPORTS(XXX) - * If this test passes, then the compiler is able to generate code for that - * given sub-architecture in another translation unit, given the right set of - * flags. Use of the intrinsics is not guaranteed. This is useful with - * runtime detection (see below). - * - * - #if QT_COMPILER_SUPPORTS_HERE(XXX) - * If this test passes, then the compiler is able to generate code for that - * given sub-architecture in this translation unit, even if it is not doing - * that now (it might be). Individual functions may be tagged with - * QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that - * sub-arch. Only inside such functions is the use of the intrisics - * guaranteed to work. This is useful with runtime detection (see below). - * - * Runtime detection of a CPU sub-architecture can be done with the - * qCpuHasFeature(XXX) function. There are two strategies for generating - * optimized code like that: - * - * 1) place the optimized code in a different translation unit (C or assembly - * sources) and pass the correct flags to the compiler to enable support. Those - * sources must not include qglobal.h, which means they cannot include this - * file either. The dispatcher function would look like this: - * - * void foo() - * { - * #if QT_COMPILER_SUPPORTS(XXX) - * if (qCpuHasFeature(XXX)) { - * foo_optimized_xxx(); - * return; - * } - * #endif - * foo_plain(); - * } - * - * 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and - * surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use - * other Qt code. The dispatcher function would look like this: - * - * void foo() - * { - * #if QT_COMPILER_SUPPORTS_HERE(XXX) - * if (qCpuHasFeature(XXX)) { - * foo_optimized_xxx(); - * return; - * } - * #endif - * foo_plain(); - * } - */ - -#if defined(__MINGW64_VERSION_MAJOR) || defined(Q_CC_MSVC) -#include <intrin.h> -#endif - -#define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0) - -#if defined(Q_PROCESSOR_ARM) -# define QT_COMPILER_SUPPORTS_HERE(x) (__ARM_FEATURE_ ## x) -# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600 - /* GCC requires attributes for a function */ -# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) -# else -# define QT_FUNCTION_TARGET(x) -# endif -# if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__) -# define __ARM_FEATURE_NEON // also support QT_COMPILER_SUPPORTS_HERE(NEON) -# endif -#elif defined(Q_PROCESSOR_MIPS) -# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) -# define QT_FUNCTION_TARGET(x) -# if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32) -# define __MIPS_DSP__ -# endif -# if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32) -# define __MIPS_DSPR2__ -# endif -#elif defined(Q_PROCESSOR_X86) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) -# define QT_COMPILER_SUPPORTS_HERE(x) ((__ ## x ## __) || QT_COMPILER_SUPPORTS(x)) -# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) - /* GCC requires attributes for a function */ -# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) -# else -# define QT_FUNCTION_TARGET(x) -# endif -#else -# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) -# define QT_FUNCTION_TARGET(x) -#endif - -#ifdef Q_PROCESSOR_X86 -/* -- x86 intrinsic support -- */ - -# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2) -// MSVC doesn't define __SSE2__, so do it ourselves -# define __SSE__ 1 -# define __SSE2__ 1 -# endif - -# ifdef __SSE2__ -// #include the intrinsics -# include <immintrin.h> -# endif - -# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) -// GCC 4.4 and Clang 2.8 added a few more intrinsics there -# include <x86intrin.h> -# endif - -# if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__)) -// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros -// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx -# define __SSE3__ 1 -# define __SSSE3__ 1 -// no Intel CPU supports SSE4a, so don't define it -# define __SSE4_1__ 1 -# define __SSE4_2__ 1 -# ifndef __AVX__ -# define __AVX__ 1 -# endif -# endif - -# if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) -// POPCNT instructions: -// All processors that support SSE4.2 support POPCNT -// (but neither MSVC nor the Intel compiler define this macro) -# define __POPCNT__ 1 -# endif - -// AVX intrinsics -# if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) -// AES, PCLMULQDQ instructions: -// All processors that support AVX support PCLMULQDQ -// (but neither MSVC nor the Intel compiler define this macro) -# define __PCLMUL__ 1 -# endif - -# if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) -// F16C & RDRAND instructions: -// All processors that support AVX2 support F16C & RDRAND: -// (but neither MSVC nor the Intel compiler define these macros) -# define __F16C__ 1 -# define __RDRND__ 1 -# endif - -# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL) -// BMI2 instructions: -// All processors that support BMI support BMI2 (and AVX2) -// (but neither MSVC nor the Intel compiler define this macro) -# define __BMI2__ 1 -# endif - -# include "qsimd_x86_p.h" - -// Haswell sub-architecture -// -// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2, -// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a -// sub-target for us. The first AMD processor with AVX2 support (Zen) has the -// same features. -// -// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc -// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell). -# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell" -# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \ - defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__) -# define __haswell__ 1 -# endif - -// This constant does not include all CPU features found in a Haswell, only -// those that we'd have optimized code for. -// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode. -QT_BEGIN_NAMESPACE -static const quint64 CpuFeatureArchHaswell = 0 - | CpuFeatureSSE2 - | CpuFeatureSSE3 - | CpuFeatureSSSE3 - | CpuFeatureSSE4_1 - | CpuFeatureSSE4_2 - | CpuFeatureFMA - | CpuFeaturePOPCNT - | CpuFeatureAVX - | CpuFeatureF16C - | CpuFeatureAVX2 - | CpuFeatureBMI - | CpuFeatureBMI2; -QT_END_NAMESPACE - -#endif /* Q_PROCESSOR_X86 */ - -// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html -// This should be tweaked with an "upper version" of clang once we know which release fixes the -// issue. At that point we can rely on __ARM_FEATURE_CRC32 again. -#if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32) -# undef __ARM_FEATURE_CRC32 -#endif - -// NEON intrinsics -// note: as of GCC 4.9, does not support function targets for ARM -#if defined(__ARM_NEON) || defined(__ARM_NEON__) -#include <arm_neon.h> -#define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available. -#ifndef __ARM_NEON__ -// __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection. -#define __ARM_NEON__ -#endif -#endif -// AArch64/ARM64 -#if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32) -#if defined(Q_PROCESSOR_ARM_64) -// only available on aarch64 -#define QT_FUNCTION_TARGET_STRING_CRC32 "+crc" -#endif -# include <arm_acle.h> -#endif - -#ifdef __cplusplus -#include <qatomic.h> - -QT_BEGIN_NAMESPACE - -#ifndef Q_PROCESSOR_X86 -enum CPUFeatures { -#if defined(Q_PROCESSOR_ARM) - CpuFeatureNEON = 2, - CpuFeatureARM_NEON = CpuFeatureNEON, - CpuFeatureCRC32 = 4, -#elif defined(Q_PROCESSOR_MIPS) - CpuFeatureDSP = 2, - CpuFeatureDSPR2 = 4, -#endif - - // used only to indicate that the CPU detection was initialised - QSimdInitialized = 1 -}; - -static const quint64 qCompilerCpuFeatures = 0 -#if defined __ARM_NEON__ - | CpuFeatureNEON -#endif -#if defined __ARM_FEATURE_CRC32 - | CpuFeatureCRC32 -#endif -#if defined __mips_dsp - | CpuFeatureDSP -#endif -#if defined __mips_dspr2 - | CpuFeatureDSPR2 -#endif - ; -#endif - -#ifdef Q_ATOMIC_INT64_IS_SUPPORTED -extern Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1]; -#else -extern Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2]; -#endif -Q_CORE_EXPORT quint64 qDetectCpuFeatures(); - -#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED) -Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept; -#else -static inline qsizetype qRandomCpu(void *, qsizetype) noexcept -{ - return 0; -} -#endif - -static inline quint64 qCpuFeatures() -{ - quint64 features = qt_cpu_features[0].loadRelaxed(); -#ifndef Q_ATOMIC_INT64_IS_SUPPORTED - features |= quint64(qt_cpu_features[1].loadRelaxed()) << 32; -#endif - if (Q_UNLIKELY(features == 0)) { - features = qDetectCpuFeatures(); - Q_ASSUME(features != 0); - } - return features; -} - -#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \ - || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature)) - -inline bool qHasHwrng() -{ -#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) - return qCpuHasFeature(RDRND); -#else - return false; -#endif -} - -#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ - for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) - -#define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \ - for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((8 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x7)) & 0x7))); ++i) - -QT_END_NAMESPACE - -#endif // __cplusplus - -#define SIMD_EPILOGUE(i, length, max) \ - for (int _i = 0; _i < max && i < length; ++i, ++_i) - -#endif // QSIMD_P_H |