summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd_p.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qsimd_p.h')
-rw-r--r--src/corelib/tools/qsimd_p.h208
1 files changed, 56 insertions, 152 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 18684caefb..9f1321df94 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -232,32 +232,48 @@
# define __RDRND__ 1
# endif
-#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
-#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
-#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
-#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
-#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
-#define QT_FUNCTION_TARGET_STRING_AVX "avx"
-#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
-#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
-#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
-#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
-#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
-#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
-#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
-#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
-#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
-#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
-
-#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_PCLMUL "pclmul,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
-#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
-#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
-#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
-#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
-#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
-#define QT_FUNCTION_TARGET_STRING_SHA "sha"
+# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL)
+// BMI2 instructions:
+// All processors that support BMI support BMI2 (and AVX2)
+// (but neither MSVC nor the Intel compiler define this macro)
+# define __BMI2__ 1
+# endif
+
+# include "qsimd_x86_p.h"
+
+// Haswell sub-architecture
+//
+// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
+// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
+// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
+// same features.
+//
+// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
+// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
+# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell"
+# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \
+ defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__)
+# define __haswell__ 1
+# endif
+
+// This constant does not include all CPU features found in a Haswell, only
+// those that we'd have optimized code for.
+// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
+QT_BEGIN_NAMESPACE
+static const quint64 CpuFeatureArchHaswell = 0
+ | CpuFeatureSSE2
+ | CpuFeatureSSE3
+ | CpuFeatureSSSE3
+ | CpuFeatureSSE4_1
+ | CpuFeatureSSE4_2
+ | CpuFeatureFMA
+ | CpuFeaturePOPCNT
+ | CpuFeatureAVX
+ | CpuFeatureF16C
+ | CpuFeatureAVX2
+ | CpuFeatureBMI
+ | CpuFeatureBMI2;
+QT_END_NAMESPACE
#endif /* Q_PROCESSOR_X86 */
@@ -292,148 +308,36 @@
QT_BEGIN_NAMESPACE
+#ifndef Q_PROCESSOR_X86
enum CPUFeatures {
#if defined(Q_PROCESSOR_ARM)
- CpuFeatureNEON = 0,
+ CpuFeatureNEON = 2,
CpuFeatureARM_NEON = CpuFeatureNEON,
- CpuFeatureCRC32 = 1,
+ CpuFeatureCRC32 = 4,
#elif defined(Q_PROCESSOR_MIPS)
- CpuFeatureDSP = 0,
- CpuFeatureDSPR2 = 1,
-#elif defined(Q_PROCESSOR_X86)
- // The order of the flags is jumbled so it matches most closely the bits in CPUID
- // Out of order:
- CpuFeatureSSE2 = 1, // uses the bit for PCLMULQDQ
- // in level 1, ECX
- CpuFeatureSSE3 = (0 + 0),
- CpuFeatureSSSE3 = (0 + 9),
- CpuFeatureSSE4_1 = (0 + 19),
- CpuFeatureSSE4_2 = (0 + 20),
- CpuFeatureMOVBE = (0 + 22),
- CpuFeaturePOPCNT = (0 + 23),
- CpuFeatureAES = (0 + 25),
- CpuFeatureAVX = (0 + 28),
- CpuFeatureF16C = (0 + 29),
- CpuFeatureRDRND = (0 + 30),
- // 31 is always zero and we've used it for the QSimdInitialized
-
- // in level 7, leaf 0, EBX
- CpuFeatureBMI = (32 + 3),
- CpuFeatureHLE = (32 + 4),
- CpuFeatureAVX2 = (32 + 5),
- CpuFeatureBMI2 = (32 + 8),
- CpuFeatureRTM = (32 + 11),
- CpuFeatureAVX512F = (32 + 16),
- CpuFeatureAVX512DQ = (32 + 17),
- CpuFeatureRDSEED = (32 + 18),
- CpuFeatureAVX512IFMA = (32 + 21),
- CpuFeatureAVX512PF = (32 + 26),
- CpuFeatureAVX512ER = (32 + 27),
- CpuFeatureAVX512CD = (32 + 28),
- CpuFeatureSHA = (32 + 29),
- CpuFeatureAVX512BW = (32 + 30),
- CpuFeatureAVX512VL = (32 + 31),
-
- // in level 7, leaf 0, ECX (out of order, for now)
- CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64
+ CpuFeatureDSP = 2,
+ CpuFeatureDSPR2 = 4,
#endif
// used only to indicate that the CPU detection was initialised
- QSimdInitialized = 0x80000000
+ QSimdInitialized = 1
};
static const quint64 qCompilerCpuFeatures = 0
-#if defined __SHA__
- | (Q_UINT64_C(1) << CpuFeatureSHA)
-#endif
-#if defined __AES__
- | (Q_UINT64_C(1) << CpuFeatureAES)
-#endif
-#if defined __RTM__
- | (Q_UINT64_C(1) << CpuFeatureRTM)
-#endif
-#ifdef __RDRND__
- | (Q_UINT64_C(1) << CpuFeatureRDRND)
-#endif
-#ifdef __RDSEED__
- | (Q_UINT64_C(1) << CpuFeatureRDSEED)
-#endif
-#if defined __BMI__
- | (Q_UINT64_C(1) << CpuFeatureBMI)
-#endif
-#if defined __BMI2__
- | (Q_UINT64_C(1) << CpuFeatureBMI2)
-#endif
-#if defined __F16C__
- | (Q_UINT64_C(1) << CpuFeatureF16C)
-#endif
-#if defined __POPCNT__
- | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
-#endif
-#if defined __MOVBE__ // GCC and Clang don't seem to define this
- | (Q_UINT64_C(1) << CpuFeatureMOVBE)
-#endif
-#if defined __AVX512F__
- | (Q_UINT64_C(1) << CpuFeatureAVX512F)
-#endif
-#if defined __AVX512CD__
- | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
-#endif
-#if defined __AVX512ER__
- | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
-#endif
-#if defined __AVX512PF__
- | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
-#endif
-#if defined __AVX512BW__
- | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
-#endif
-#if defined __AVX512DQ__
- | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
-#endif
-#if defined __AVX512VL__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
-#endif
-#if defined __AVX512IFMA__
- | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
-#endif
-#if defined __AVX512VBMI__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
-#endif
-#if defined __AVX2__
- | (Q_UINT64_C(1) << CpuFeatureAVX2)
-#endif
-#if defined __AVX__
- | (Q_UINT64_C(1) << CpuFeatureAVX)
-#endif
-#if defined __SSE4_2__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
-#endif
-#if defined __SSE4_1__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
-#endif
-#if defined __SSSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSSE3)
-#endif
-#if defined __SSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSE3)
-#endif
-#if defined __SSE2__
- | (Q_UINT64_C(1) << CpuFeatureSSE2)
-#endif
#if defined __ARM_NEON__
- | (Q_UINT64_C(1) << CpuFeatureNEON)
+ | CpuFeatureNEON
#endif
#if defined __ARM_FEATURE_CRC32
- | (Q_UINT64_C(1) << CpuFeatureCRC32)
+ | CpuFeatureCRC32
#endif
#if defined __mips_dsp
- | (Q_UINT64_C(1) << CpuFeatureDSP)
+ | CpuFeatureDSP
#endif
#if defined __mips_dspr2
- | (Q_UINT64_C(1) << CpuFeatureDSPR2)
+ | CpuFeatureDSPR2
#endif
;
+#endif
#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
extern Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1];
@@ -459,8 +363,8 @@ static inline quint64 qCpuFeatures()
return features;
}
-#define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (Q_UINT64_C(1) << CpuFeature ## feature)) \
- || (qCpuFeatures() & (Q_UINT64_C(1) << CpuFeature ## feature)))
+#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
+ || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)