summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2015-07-17 14:25:37 -0700
committerThiago Macieira <thiago.macieira@intel.com>2015-09-22 06:22:13 +0000
commit0829baf902dbf5982732aa54454178f55b50bdc6 (patch)
tree5fed3bed708fe9d48774bacf9faf6f3508f21a9b
parent6a8251a89b6a61258498f4af1ba7b3d5b7f7096c (diff)
Expand reporting of the Intel instruction set extensions
Detection for most of them is free because we're loading the entire registers anyway. The only exception is AVX512VBMI, which is in a new register we hadn't yet read from. I've also added the new GCC names so they can be used with QT_FUNCTION_TARGET. The only two exceptions are "movbe" and "popcnt", which are extremely restricted in use and we are not likely to have code dedicated to using them. Change-Id: Ib306f8f647014b399b87ffff13f1d8fd29e58be0 Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/tools/qsimd.cpp75
-rw-r--r--src/corelib/tools/qsimd_p.h93
2 files changed, 155 insertions, 13 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index 825028037f..d0c65a04b1 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -205,21 +205,24 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
-static void cpuidFeatures07_00(uint &ebx)
+static void cpuidFeatures07_00(uint &ebx, uint &ecx)
{
#if defined(Q_CC_GNU)
qregisteruint rbx; // in case it's 64-bit
+ qregisteruint rcx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
- : "=&r" (rbx)
- : "a" (7), "c" (0)
+ : "=&r" (rbx), "+&c" (rcx)
+ : "a" (7)
: "%edx");
ebx = rbx;
+ ecx = rcx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
+ ecx = info[2];
#endif
}
@@ -257,7 +260,12 @@ static quint64 detectProcessorFeatures()
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
- static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2);
+ static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VL) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
+ static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
quint64 features = 0;
@@ -295,8 +303,9 @@ static quint64 detectProcessorFeatures()
}
uint cpuid0700EBX = 0;
+ uint cpuid0700ECX = 0;
if (cpuidLevel >= 7) {
- cpuidFeatures07_00(cpuid0700EBX);
+ cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX);
// the high 32-bits of features is cpuid0700EBX
features |= quint64(cpuid0700EBX) << 32;
@@ -305,6 +314,15 @@ static quint64 detectProcessorFeatures()
if ((xgetbvA & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
+ } else if ((xgetbvA & AVX512State) != AVX512State) {
+ // support for ZMM registers or mask registers is disabled, disable all AVX512
+ features &= ~AllAVX512;
+ } else {
+ // this feature is out of order
+ if (cpuid0700ECX & (1u << 1))
+ features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI;
+ else
+ features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
}
return features;
@@ -484,7 +502,7 @@ static const int features_indices[] = {
/* Data:
sse3
sse2
-
+ avx512vbmi
@@ -526,10 +544,31 @@ static const int features_indices[] = {
rtm
+
+
+
+
+ avx512f
+ avx512dq
+ rdseed
+
+
+ avx512ifma
+
+
+
+
+ avx512pf
+ avx512er
+ avx512cd
+ sha
+ avx512bw
+ avx512vl
*/
static const char features_string[] =
" sse3\0"
" sse2\0"
+ " avx512vbmi\0"
" ssse3\0"
" fma\0"
" cmpxchg16b\0"
@@ -546,15 +585,27 @@ static const char features_string[] =
" avx2\0"
" bmi2\0"
" rtm\0"
+ " avx512f\0"
+ " avx512dq\0"
+ " rdseed\0"
+ " avx512ifma\0"
+ " avx512pf\0"
+ " avx512er\0"
+ " avx512cd\0"
+ " sha\0"
+ " avx512bw\0"
+ " avx512vl\0"
"\0";
static const quint8 features_indices[] = {
- 0, 6, 5, 5, 5, 5, 5, 5,
- 5, 12, 5, 5, 19, 24, 5, 5,
- 5, 5, 5, 36, 44, 5, 52, 59,
- 5, 67, 5, 5, 72, 77, 83, 5,
- 5, 5, 5, 91, 96, 101, 5, 5,
- 107, 5, 5, 113
+ 0, 6, 12, 5, 5, 5, 5, 5,
+ 5, 24, 5, 5, 31, 36, 5, 5,
+ 5, 5, 5, 48, 56, 5, 64, 71,
+ 5, 79, 5, 5, 84, 89, 95, 5,
+ 5, 5, 5, 103, 108, 113, 5, 5,
+ 119, 5, 5, 125, 5, 5, 5, 5,
+ 130, 139, 149, 5, 5, 157, 5, 5,
+ 5, 5, 169, 179, 189, 199, 204, 214
};
#else
static const char features_string[] = "";
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index b815e976a7..be003f6c6d 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -218,6 +218,23 @@
# endif
#endif
+#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
+#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
+#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
+#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
+#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
+#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
+#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
+#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
+
+#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
+#define QT_FUNCTION_TARGET_STRING_RDRAND "rdrnd"
+#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
+#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
+#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
+#define QT_FUNCTION_TARGET_STRING_SHA "sha"
+
// other x86 intrinsics
#if defined(Q_PROCESSOR_X86) && ((defined(Q_CC_GNU) && (Q_CC_GNU >= 404)) \
|| (defined(Q_CC_CLANG) && (Q_CC_CLANG >= 208)) \
@@ -260,23 +277,97 @@ enum CPUFeatures {
CpuFeatureSSSE3 = (0 + 9),
CpuFeatureSSE4_1 = (0 + 19),
CpuFeatureSSE4_2 = (0 + 20),
+ CpuFeatureMOVBE = (0 + 22),
+ CpuFeaturePOPCNT = (0 + 23),
CpuFeatureAES = (0 + 25),
CpuFeatureAVX = (0 + 28),
+ CpuFeatureF16C = (0 + 29),
+ CpuFeatureRDRAND = (0 + 30),
+ // 31 is always zero and we've used it for the QSimdInitialized
// in level 7, leaf 0, EBX
+ CpuFeatureBMI = (32 + 3),
CpuFeatureHLE = (32 + 4),
CpuFeatureAVX2 = (32 + 5),
+ CpuFeatureBMI2 = (32 + 8),
CpuFeatureRTM = (32 + 11),
+ CpuFeatureAVX512F = (32 + 16),
+ CpuFeatureAVX512DQ = (32 + 17),
+ CpuFeatureRDSEED = (32 + 18),
+ CpuFeatureAVX512IFMA = (32 + 21),
+ CpuFeatureAVX512PF = (32 + 26),
+ CpuFeatureAVX512ER = (32 + 27),
+ CpuFeatureAVX512CD = (32 + 28),
+ CpuFeatureSHA = (32 + 29),
+ CpuFeatureAVX512BW = (32 + 30),
+ CpuFeatureAVX512VL = (32 + 31),
+
+ // in level 7, leaf 0, ECX (out of order, for now)
+ CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64
#endif
// used only to indicate that the CPU detection was initialised
QSimdInitialized = 0x80000000
};
-static const uint qCompilerCpuFeatures = 0
+static const quint64 qCompilerCpuFeatures = 0
+#if defined __SHA__
+ | (Q_UINT64_C(1) << CpuFeatureSHA)
+#endif
+#if defined __AES__
+ | (Q_UINT64_C(1) << CpuFeatureAES)
+#endif
#if defined __RTM__
| (Q_UINT64_C(1) << CpuFeatureRTM)
#endif
+#ifdef __RDRND__
+ | (Q_UINT64_C(1) << CpuFeatureRDRAND)
+#endif
+#ifdef __RDSEED__
+ | (Q_UINT64_C(1) << CpuFeatureRDSEED)
+#endif
+#if defined __BMI__
+ | (Q_UINT64_C(1) << CpuFeatureBMI)
+#endif
+#if defined __BMI2__
+ | (Q_UINT64_C(1) << CpuFeatureBMI2)
+#endif
+#if defined __F16C__
+ | (Q_UINT64_C(1) << CpuFeatureF16C)
+#endif
+#if defined __POPCNT__
+ | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
+#endif
+#if defined __MOVBE__ // GCC and Clang don't seem to define this
+ | (Q_UINT64_C(1) << CpuFeatureMOVBE)
+#endif
+#if defined __AVX512F__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512F)
+#endif
+#if defined __AVX512CD__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
+#endif
+#if defined __AVX512ER__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
+#endif
+#if defined __AVX512PF__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
+#endif
+#if defined __AVX512BW__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
+#endif
+#if defined __AVX512DQ__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
+#endif
+#if defined __AVX512VL__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
+#endif
+#if defined __AVX512IFMA__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
+#endif
+#if defined __AVX512VBMI__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
+#endif
#if defined __AVX2__
| (Q_UINT64_C(1) << CpuFeatureAVX2)
#endif