qsimd: add support for new x86 CPU features

This adds detection for: VAES, GFNI, AVX512VBMI2, AVX512VNNI, AVX512BITALG, AVX512VPOPCNTDQ, AVX512_4NNIW, AVX512_4FMAPS. These features were found in the "Intel® Architecture Instruction Set Extensions and Future Features" manual, revision 30. This commit also adds support for RDPID (already in the main manual) and the Control-flow Enforcement Technology, which appears in a separate Intel paper. This new support was done by adding a new generator script so we don't have to maintain two tables in sync, one in qsimd.cpp with the feature names, and the other in qsimd_p.h. Since we now need a lot more bits, it's no longer worth keeping the two halves of the qt_cpu_features variable mostly similar to the main two CPUID results. This commit goes back to keeping things in order, like we used to prior to commit 6a8251a89b6a61258498f4af1ba7b3d5b7f7096c (Qt 5.6) At the time of this commit, GCC 8 has macros for AVX512VPOPCNTDQ, AVX512_4NNIW, AVX512_4FMAPS, AVX512VBMI2 and GFNI. Change-Id: I938b024e38bf4aac9154fffd14f7afae50faaa96 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Lars Knoll <lars.knoll@qt.io>
author: Thiago Macieira <thiago.macieira@intel.com> 2017-09-17 12:39:35 -0700
committer: Thiago Macieira <thiago.macieira@intel.com> 2018-05-05 06:20:07 +0000
commit: cf63b0e1dfc0bf3d11a92c5bf82840ddb6bb22ac (patch)
tree: b4aa1443bc4202f12dc1804606d3125851af8bc8 /src/corelib/tools/qsimd_p.h
parent: 6e1190053d28877b2e90375056735429525e2ee7 (diff)
1 files changed, 3 insertions, 141 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 3161ee7412..1b7ed57fa8 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -179,6 +179,7 @@
 
 #ifdef Q_PROCESSOR_X86
 /* -- x86 intrinsic support -- */
+#  include "qsimd_x86_p.h"
 
 #  if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2)
 // MSVC doesn't define __SSE2__, so do it ourselves
@@ -232,33 +233,6 @@
 #    define __RDRND__                       1
 #  endif
 
-#define QT_FUNCTION_TARGET_STRING_SSE2      "sse2"
-#define QT_FUNCTION_TARGET_STRING_SSE3      "sse3"
-#define QT_FUNCTION_TARGET_STRING_SSSE3     "ssse3"
-#define QT_FUNCTION_TARGET_STRING_SSE4_1    "sse4.1"
-#define QT_FUNCTION_TARGET_STRING_SSE4_2    "sse4.2"
-#define QT_FUNCTION_TARGET_STRING_AVX       "avx"
-#define QT_FUNCTION_TARGET_STRING_AVX2      "avx2"
-#define QT_FUNCTION_TARGET_STRING_AVX512F       "avx512f"
-#define QT_FUNCTION_TARGET_STRING_AVX512CD      "avx512cd"
-#define QT_FUNCTION_TARGET_STRING_AVX512ER      "avx512er"
-#define QT_FUNCTION_TARGET_STRING_AVX512PF      "avx512pf"
-#define QT_FUNCTION_TARGET_STRING_AVX512BW      "avx512bw"
-#define QT_FUNCTION_TARGET_STRING_AVX512DQ      "avx512dq"
-#define QT_FUNCTION_TARGET_STRING_AVX512VL      "avx512vl"
-#define QT_FUNCTION_TARGET_STRING_AVX512IFMA    "avx512ifma"
-#define QT_FUNCTION_TARGET_STRING_AVX512VBMI    "avx512vbmi"
-
-#define QT_FUNCTION_TARGET_STRING_AES           "aes,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_PCLMUL        "pclmul,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_POPCNT        "popcnt"
-#define QT_FUNCTION_TARGET_STRING_F16C          "f16c,avx"
-#define QT_FUNCTION_TARGET_STRING_RDRND         "rdrnd"
-#define QT_FUNCTION_TARGET_STRING_BMI           "bmi"
-#define QT_FUNCTION_TARGET_STRING_BMI2          "bmi2"
-#define QT_FUNCTION_TARGET_STRING_RDSEED        "rdseed"
-#define QT_FUNCTION_TARGET_STRING_SHA           "sha"
-
 #endif  /* Q_PROCESSOR_X86 */
 
 // Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
@@ -292,6 +266,7 @@
 
 QT_BEGIN_NAMESPACE
 
+#ifndef Q_PROCESSOR_X86
 enum CPUFeatures {
 #if defined(Q_PROCESSOR_ARM)
     CpuFeatureNEON          = 0,
@@ -300,42 +275,6 @@ enum CPUFeatures {
 #elif defined(Q_PROCESSOR_MIPS)
     CpuFeatureDSP           = 0,
     CpuFeatureDSPR2         = 1,
-#elif defined(Q_PROCESSOR_X86)
-    // The order of the flags is jumbled so it matches most closely the bits in CPUID
-    // Out of order:
-    CpuFeatureSSE2          = 1,                       // uses the bit for PCLMULQDQ
-    // in level 1, ECX
-    CpuFeatureSSE3          = (0 + 0),
-    CpuFeatureSSSE3         = (0 + 9),
-    CpuFeatureSSE4_1        = (0 + 19),
-    CpuFeatureSSE4_2        = (0 + 20),
-    CpuFeatureMOVBE         = (0 + 22),
-    CpuFeaturePOPCNT        = (0 + 23),
-    CpuFeatureAES           = (0 + 25),
-    CpuFeatureAVX           = (0 + 28),
-    CpuFeatureF16C          = (0 + 29),
-    CpuFeatureRDRND         = (0 + 30),
-    // 31 is always zero and we've used it for the QSimdInitialized
-
-    // in level 7, leaf 0, EBX
-    CpuFeatureBMI           = (32 + 3),
-    CpuFeatureHLE           = (32 + 4),
-    CpuFeatureAVX2          = (32 + 5),
-    CpuFeatureBMI2          = (32 + 8),
-    CpuFeatureRTM           = (32 + 11),
-    CpuFeatureAVX512F       = (32 + 16),
-    CpuFeatureAVX512DQ      = (32 + 17),
-    CpuFeatureRDSEED        = (32 + 18),
-    CpuFeatureAVX512IFMA    = (32 + 21),
-    CpuFeatureAVX512PF      = (32 + 26),
-    CpuFeatureAVX512ER      = (32 + 27),
-    CpuFeatureAVX512CD      = (32 + 28),
-    CpuFeatureSHA           = (32 + 29),
-    CpuFeatureAVX512BW      = (32 + 30),
-    CpuFeatureAVX512VL      = (32 + 31),
-
-    // in level 7, leaf 0, ECX (out of order, for now)
-    CpuFeatureAVX512VBMI    = 2,                       // uses the bit for DTES64
 #endif
 
     // used only to indicate that the CPU detection was initialised
@@ -343,84 +282,6 @@ enum CPUFeatures {
 };
 
 static const quint64 qCompilerCpuFeatures = 0
-#if defined __SHA__
-        | (Q_UINT64_C(1) << CpuFeatureSHA)
-#endif
-#if defined __AES__
-        | (Q_UINT64_C(1) << CpuFeatureAES)
-#endif
-#if defined __RTM__
-        | (Q_UINT64_C(1) << CpuFeatureRTM)
-#endif
-#ifdef __RDRND__
-        | (Q_UINT64_C(1) << CpuFeatureRDRND)
-#endif
-#ifdef __RDSEED__
-        | (Q_UINT64_C(1) << CpuFeatureRDSEED)
-#endif
-#if defined __BMI__
-        | (Q_UINT64_C(1) << CpuFeatureBMI)
-#endif
-#if defined __BMI2__
-        | (Q_UINT64_C(1) << CpuFeatureBMI2)
-#endif
-#if defined __F16C__
-        | (Q_UINT64_C(1) << CpuFeatureF16C)
-#endif
-#if defined __POPCNT__
-        | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
-#endif
-#if defined __MOVBE__           // GCC and Clang don't seem to define this
-        | (Q_UINT64_C(1) << CpuFeatureMOVBE)
-#endif
-#if defined __AVX512F__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512F)
-#endif
-#if defined __AVX512CD__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
-#endif
-#if defined __AVX512ER__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
-#endif
-#if defined __AVX512PF__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
-#endif
-#if defined __AVX512BW__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
-#endif
-#if defined __AVX512DQ__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
-#endif
-#if defined __AVX512VL__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
-#endif
-#if defined __AVX512IFMA__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
-#endif
-#if defined __AVX512VBMI__
-        | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
-#endif
-#if defined __AVX2__
-        | (Q_UINT64_C(1) << CpuFeatureAVX2)
-#endif
-#if defined __AVX__
-        | (Q_UINT64_C(1) << CpuFeatureAVX)
-#endif
-#if defined __SSE4_2__
-        | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
-#endif
-#if defined __SSE4_1__
-        | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
-#endif
-#if defined __SSSE3__
-        | (Q_UINT64_C(1) << CpuFeatureSSSE3)
-#endif
-#if defined __SSE3__
-        | (Q_UINT64_C(1) << CpuFeatureSSE3)
-#endif
-#if defined __SSE2__
-        | (Q_UINT64_C(1) << CpuFeatureSSE2)
-#endif
 #if defined __ARM_NEON__
         | (Q_UINT64_C(1) << CpuFeatureNEON)
 #endif
@@ -434,6 +295,7 @@ static const quint64 qCompilerCpuFeatures = 0
         | (Q_UINT64_C(1) << CpuFeatureDSPR2)
 #endif
         ;
+#endif
 
 #ifdef QT_BOOTSTRAPPED
 static inline quint64 qCpuFeatures()
author	Thiago Macieira <thiago.macieira@intel.com>	2017-09-17 12:39:35 -0700
committer	Thiago Macieira <thiago.macieira@intel.com>	2018-05-05 06:20:07 +0000
commit	cf63b0e1dfc0bf3d11a92c5bf82840ddb6bb22ac (patch)
tree	b4aa1443bc4202f12dc1804606d3125851af8bc8 /src/corelib/tools/qsimd_p.h
parent	6e1190053d28877b2e90375056735429525e2ee7 (diff)