summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd_p.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qsimd_p.h')
-rw-r--r--src/corelib/tools/qsimd_p.h396
1 files changed, 0 insertions, 396 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
deleted file mode 100644
index 26e98c4542..0000000000
--- a/src/corelib/tools/qsimd_p.h
+++ /dev/null
@@ -1,396 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2018 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#ifndef QSIMD_P_H
-#define QSIMD_P_H
-
-//
-// W A R N I N G
-// -------------
-//
-// This file is not part of the Qt API. It exists purely as an
-// implementation detail. This header file may change from version to
-// version without notice, or even be removed.
-//
-// We mean it.
-//
-
-#include <QtCore/private/qglobal_p.h>
-
-/*
- * qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros.
- * They mean the compiler supports the necessary flags and the headers
- * for the x86 and ARM intrinsics:
- * - GCC: the -mXXX or march=YYY flag is necessary before #include
- * up to 4.8; GCC >= 4.9 can include unconditionally
- * - Intel CC: #include can happen unconditionally
- * - MSVC: #include can happen unconditionally
- * - RVCT: ???
- *
- * We will try to include all headers possible under this configuration.
- *
- * MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 &
- * up do define __AVX__ if the -arch:AVX option is passed on the command-line.
- *
- * Supported XXX are:
- * Flag | Arch | GCC | Intel CC | MSVC |
- * ARM_NEON | ARM | I & C | None | ? |
- * SSE2 | x86 | I & C | I & C | I & C |
- * SSE3 | x86 | I & C | I & C | I only |
- * SSSE3 | x86 | I & C | I & C | I only |
- * SSE4_1 | x86 | I & C | I & C | I only |
- * SSE4_2 | x86 | I & C | I & C | I only |
- * AVX | x86 | I & C | I & C | I & C |
- * AVX2 | x86 | I & C | I & C | I only |
- * AVX512xx | x86 | I & C | I & C | I only |
- * I = intrinsics; C = code generation
- *
- * Code can use the following constructs to determine compiler support & status:
- * - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__)
- * If this test passes, then the compiler is already generating code for that
- * given sub-architecture. The intrinsics for that sub-architecture are
- * #included and can be used without restriction or runtime check.
- *
- * - #if QT_COMPILER_SUPPORTS(XXX)
- * If this test passes, then the compiler is able to generate code for that
- * given sub-architecture in another translation unit, given the right set of
- * flags. Use of the intrinsics is not guaranteed. This is useful with
- * runtime detection (see below).
- *
- * - #if QT_COMPILER_SUPPORTS_HERE(XXX)
- * If this test passes, then the compiler is able to generate code for that
- * given sub-architecture in this translation unit, even if it is not doing
- * that now (it might be). Individual functions may be tagged with
- * QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that
- * sub-arch. Only inside such functions is the use of the intrisics
- * guaranteed to work. This is useful with runtime detection (see below).
- *
- * Runtime detection of a CPU sub-architecture can be done with the
- * qCpuHasFeature(XXX) function. There are two strategies for generating
- * optimized code like that:
- *
- * 1) place the optimized code in a different translation unit (C or assembly
- * sources) and pass the correct flags to the compiler to enable support. Those
- * sources must not include qglobal.h, which means they cannot include this
- * file either. The dispatcher function would look like this:
- *
- * void foo()
- * {
- * #if QT_COMPILER_SUPPORTS(XXX)
- * if (qCpuHasFeature(XXX)) {
- * foo_optimized_xxx();
- * return;
- * }
- * #endif
- * foo_plain();
- * }
- *
- * 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and
- * surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use
- * other Qt code. The dispatcher function would look like this:
- *
- * void foo()
- * {
- * #if QT_COMPILER_SUPPORTS_HERE(XXX)
- * if (qCpuHasFeature(XXX)) {
- * foo_optimized_xxx();
- * return;
- * }
- * #endif
- * foo_plain();
- * }
- */
-
-#if defined(__MINGW64_VERSION_MAJOR) || defined(Q_CC_MSVC)
-#include <intrin.h>
-#endif
-
-#define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0)
-
-#if defined(Q_PROCESSOR_ARM)
-# define QT_COMPILER_SUPPORTS_HERE(x) (__ARM_FEATURE_ ## x)
-# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600
- /* GCC requires attributes for a function */
-# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
-# else
-# define QT_FUNCTION_TARGET(x)
-# endif
-# if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__)
-# define __ARM_FEATURE_NEON // also support QT_COMPILER_SUPPORTS_HERE(NEON)
-# endif
-#elif defined(Q_PROCESSOR_MIPS)
-# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __)
-# define QT_FUNCTION_TARGET(x)
-# if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32)
-# define __MIPS_DSP__
-# endif
-# if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32)
-# define __MIPS_DSPR2__
-# endif
-#elif defined(Q_PROCESSOR_X86) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)
-# define QT_COMPILER_SUPPORTS_HERE(x) ((__ ## x ## __) || QT_COMPILER_SUPPORTS(x))
-# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
- /* GCC requires attributes for a function */
-# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
-# else
-# define QT_FUNCTION_TARGET(x)
-# endif
-#else
-# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __)
-# define QT_FUNCTION_TARGET(x)
-#endif
-
-#ifdef Q_PROCESSOR_X86
-/* -- x86 intrinsic support -- */
-
-# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2)
-// MSVC doesn't define __SSE2__, so do it ourselves
-# define __SSE__ 1
-# define __SSE2__ 1
-# endif
-
-# ifdef __SSE2__
-// #include the intrinsics
-# include <immintrin.h>
-# endif
-
-# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
-// GCC 4.4 and Clang 2.8 added a few more intrinsics there
-# include <x86intrin.h>
-# endif
-
-# if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__))
-// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros
-// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx
-# define __SSE3__ 1
-# define __SSSE3__ 1
-// no Intel CPU supports SSE4a, so don't define it
-# define __SSE4_1__ 1
-# define __SSE4_2__ 1
-# ifndef __AVX__
-# define __AVX__ 1
-# endif
-# endif
-
-# if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
-// POPCNT instructions:
-// All processors that support SSE4.2 support POPCNT
-// (but neither MSVC nor the Intel compiler define this macro)
-# define __POPCNT__ 1
-# endif
-
-// AVX intrinsics
-# if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
-// AES, PCLMULQDQ instructions:
-// All processors that support AVX support PCLMULQDQ
-// (but neither MSVC nor the Intel compiler define this macro)
-# define __PCLMUL__ 1
-# endif
-
-# if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
-// F16C & RDRAND instructions:
-// All processors that support AVX2 support F16C & RDRAND:
-// (but neither MSVC nor the Intel compiler define these macros)
-# define __F16C__ 1
-# define __RDRND__ 1
-# endif
-
-# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL)
-// BMI2 instructions:
-// All processors that support BMI support BMI2 (and AVX2)
-// (but neither MSVC nor the Intel compiler define this macro)
-# define __BMI2__ 1
-# endif
-
-# include "qsimd_x86_p.h"
-
-// Haswell sub-architecture
-//
-// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
-// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
-// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
-// same features.
-//
-// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
-// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
-# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell"
-# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \
- defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__)
-# define __haswell__ 1
-# endif
-
-// This constant does not include all CPU features found in a Haswell, only
-// those that we'd have optimized code for.
-// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
-QT_BEGIN_NAMESPACE
-static const quint64 CpuFeatureArchHaswell = 0
- | CpuFeatureSSE2
- | CpuFeatureSSE3
- | CpuFeatureSSSE3
- | CpuFeatureSSE4_1
- | CpuFeatureSSE4_2
- | CpuFeatureFMA
- | CpuFeaturePOPCNT
- | CpuFeatureAVX
- | CpuFeatureF16C
- | CpuFeatureAVX2
- | CpuFeatureBMI
- | CpuFeatureBMI2;
-QT_END_NAMESPACE
-
-#endif /* Q_PROCESSOR_X86 */
-
-// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
-// This should be tweaked with an "upper version" of clang once we know which release fixes the
-// issue. At that point we can rely on __ARM_FEATURE_CRC32 again.
-#if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32)
-# undef __ARM_FEATURE_CRC32
-#endif
-
-// NEON intrinsics
-// note: as of GCC 4.9, does not support function targets for ARM
-#if defined(__ARM_NEON) || defined(__ARM_NEON__)
-#include <arm_neon.h>
-#define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available.
-#ifndef __ARM_NEON__
-// __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection.
-#define __ARM_NEON__
-#endif
-#endif
-// AArch64/ARM64
-#if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32)
-#if defined(Q_PROCESSOR_ARM_64)
-// only available on aarch64
-#define QT_FUNCTION_TARGET_STRING_CRC32 "+crc"
-#endif
-# include <arm_acle.h>
-#endif
-
-#ifdef __cplusplus
-#include <qatomic.h>
-
-QT_BEGIN_NAMESPACE
-
-#ifndef Q_PROCESSOR_X86
-enum CPUFeatures {
-#if defined(Q_PROCESSOR_ARM)
- CpuFeatureNEON = 2,
- CpuFeatureARM_NEON = CpuFeatureNEON,
- CpuFeatureCRC32 = 4,
-#elif defined(Q_PROCESSOR_MIPS)
- CpuFeatureDSP = 2,
- CpuFeatureDSPR2 = 4,
-#endif
-
- // used only to indicate that the CPU detection was initialised
- QSimdInitialized = 1
-};
-
-static const quint64 qCompilerCpuFeatures = 0
-#if defined __ARM_NEON__
- | CpuFeatureNEON
-#endif
-#if defined __ARM_FEATURE_CRC32
- | CpuFeatureCRC32
-#endif
-#if defined __mips_dsp
- | CpuFeatureDSP
-#endif
-#if defined __mips_dspr2
- | CpuFeatureDSPR2
-#endif
- ;
-#endif
-
-#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
-extern Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1];
-#else
-extern Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2];
-#endif
-Q_CORE_EXPORT quint64 qDetectCpuFeatures();
-
-#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED)
-Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept;
-#else
-static inline qsizetype qRandomCpu(void *, qsizetype) noexcept
-{
- return 0;
-}
-#endif
-
-static inline quint64 qCpuFeatures()
-{
- quint64 features = qt_cpu_features[0].loadRelaxed();
-#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
- features |= quint64(qt_cpu_features[1].loadRelaxed()) << 32;
-#endif
- if (Q_UNLIKELY(features == 0)) {
- features = qDetectCpuFeatures();
- Q_ASSUME(features != 0);
- }
- return features;
-}
-
-#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
- || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
-
-inline bool qHasHwrng()
-{
-#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
- return qCpuHasFeature(RDRND);
-#else
- return false;
-#endif
-}
-
-#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
- for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
-
-#define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \
- for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((8 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x7)) & 0x7))); ++i)
-
-QT_END_NAMESPACE
-
-#endif // __cplusplus
-
-#define SIMD_EPILOGUE(i, length, max) \
- for (int _i = 0; _i < max && i < length; ++i, ++_i)
-
-#endif // QSIMD_P_H