summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCoby Tayree <coby.tayree@intel.com>2017-12-27 09:00:31 +0000
committerCoby Tayree <coby.tayree@intel.com>2017-12-27 09:00:31 +0000
commit74bc9cb6f3a05063aa28e9950696725edaf331c7 (patch)
tree135de7d4a2e07cbad3d9279a877a18c049a3cbdf
parent0a839f847066999507289787bc8a7860901b074a (diff)
[x86][icelake][vpclmulqdq]
added vpclmulqdq feature recognition added intrinsics support for vpclmulqdq instructions _mm256_clmulepi64_epi128 _mm512_clmulepi64_epi128 matching a similar work on the backend (D40101) Differential Revision: https://reviews.llvm.org/D41573 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@321480 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/Basic/BuiltinsX86.def4
-rw-r--r--include/clang/Driver/Options.td2
-rw-r--r--lib/Basic/Targets/X86.cpp17
-rw-r--r--lib/Basic/Targets/X86.h1
-rw-r--r--lib/Headers/CMakeLists.txt1
-rw-r--r--lib/Headers/immintrin.h4
-rw-r--r--lib/Headers/vpclmulqdqintrin.h42
-rw-r--r--test/CodeGen/attr-target-x86.c4
-rw-r--r--test/CodeGen/vpclmulqdq-builtins.c17
-rw-r--r--test/Driver/x86-target-features.c11
-rw-r--r--test/Preprocessor/predefined-arch-macros.c2
-rw-r--r--test/Preprocessor/x86_target_features.c9
12 files changed, 111 insertions, 3 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 95dfd8b4c1..481f7afd4c 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -453,6 +453,10 @@ TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "", "avx512bw,gf
// CLMUL
TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul")
+// VPCLMULQDQ
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "", "vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "", "avx512f,vpclmulqdq")
+
// AVX
TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "", "avx")
TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "", "avx")
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index f528ddcadc..c062338f75 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -2547,6 +2547,8 @@ def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
def mvaes : Flag<["-"], "mvaes">, Group<m_x86_Features_Group>;
def mno_vaes : Flag<["-"], "mno-vaes">, Group<m_x86_Features_Group>;
+def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group<m_x86_Features_Group>;
+def mno_vpclmulqdq : Flag<["-"], "mno-vpclmulqdq">, Group<m_x86_Features_Group>;
def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>;
diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp
index 1cf5a92594..1609904017 100644
--- a/lib/Basic/Targets/X86.cpp
+++ b/lib/Basic/Targets/X86.cpp
@@ -134,6 +134,7 @@ bool X86TargetInfo::initFeatureMap(
case CK_Icelake:
setFeatureEnabledImpl(Features, "vaes", true);
setFeatureEnabledImpl(Features, "gfni", true);
+ setFeatureEnabledImpl(Features, "vpclmulqdq", true);
// TODO: Add icelake features here.
LLVM_FALLTHROUGH;
case CK_Cannonlake:
@@ -462,7 +463,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
LLVM_FALLTHROUGH;
case AVX:
Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
- Features["xsaveopt"] = Features["vaes"] = false;
+ Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false;
setXOPLevel(Features, FMA4, false);
LLVM_FALLTHROUGH;
case AVX2:
@@ -584,6 +585,13 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
} else if (Name == "pclmul") {
if (Enabled)
setSSELevel(Features, SSE2, Enabled);
+ else
+ Features["vpclmulqdq"] = false;
+ } else if (Name == "vpclmulqdq") {
+ if (Enabled) {
+ setSSELevel(Features, AVX, Enabled);
+ Features["pclmul"] = true;
+ }
} else if (Name == "gfni") {
if (Enabled)
setSSELevel(Features, SSE2, Enabled);
@@ -652,6 +660,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasVAES = true;
} else if (Feature == "+pclmul") {
HasPCLMUL = true;
+ } else if (Feature == "+vpclmulqdq") {
+ HasVPCLMULQDQ = true;
} else if (Feature == "+lzcnt") {
HasLZCNT = true;
} else if (Feature == "+rdrnd") {
@@ -956,6 +966,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasPCLMUL)
Builder.defineMacro("__PCLMUL__");
+ if (HasVPCLMULQDQ)
+ Builder.defineMacro("__VPCLMULQDQ__");
+
if (HasLZCNT)
Builder.defineMacro("__LZCNT__");
@@ -1209,6 +1222,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("sse4a", true)
.Case("tbm", true)
.Case("vaes", true)
+ .Case("vpclmulqdq", true)
.Case("x87", true)
.Case("xop", true)
.Case("xsave", true)
@@ -1275,6 +1289,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("sse4a", XOPLevel >= SSE4A)
.Case("tbm", HasTBM)
.Case("vaes", HasVAES)
+ .Case("vpclmulqdq", HasVPCLMULQDQ)
.Case("x86", true)
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h
index 2f60fd5181..ffdf8168c6 100644
--- a/lib/Basic/Targets/X86.h
+++ b/lib/Basic/Targets/X86.h
@@ -50,6 +50,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAES = false;
bool HasVAES = false;
bool HasPCLMUL = false;
+ bool HasVPCLMULQDQ = false;
bool HasGFNI = false;
bool HasLZCNT = false;
bool HasRDRND = false;
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index 51861fc4f8..722c2851b6 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -86,6 +86,7 @@ set(files
vaesintrin.h
varargs.h
vecintrin.h
+ vpclmulqdqintrin.h
wmmintrin.h
__wmmintrin_aes.h
__wmmintrin_pclmul.h
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index a6cd6236b1..c39cb4dc33 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -118,6 +118,10 @@ _mm256_cvtph_ps(__m128i __a)
}
#endif /* __AVX2__ */
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
+#include <vpclmulqdqintrin.h>
+#endif
+
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
#include <bmiintrin.h>
#endif
diff --git a/lib/Headers/vpclmulqdqintrin.h b/lib/Headers/vpclmulqdqintrin.h
new file mode 100644
index 0000000000..21cda22210
--- /dev/null
+++ b/lib/Headers/vpclmulqdqintrin.h
@@ -0,0 +1,42 @@
+/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __VPCLMULQDQINTRIN_H
+#define __VPCLMULQDQINTRIN_H
+
+#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \
+ (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (char)(I)); })
+
+#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \
+ (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (char)(I)); })
+
+#endif // __VPCLMULQDQINTRIN_H
+
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
index 147f405e1d..55aca6ab3b 100644
--- a/test/CodeGen/attr-target-x86.c
+++ b/test/CodeGen/attr-target-x86.c
@@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
// CHECK: lake{{.*}} #7
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-xop,-xsave,-xsaveopt"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
diff --git a/test/CodeGen/vpclmulqdq-builtins.c b/test/CodeGen/vpclmulqdq-builtins.c
new file mode 100644
index 0000000000..8c610e2d85
--- /dev/null
+++ b/test/CodeGen/vpclmulqdq-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - | FileCheck %s --check-prefix AVX
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512
+
+#include <immintrin.h>
+
+__m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
+ // AVX: @llvm.x86.pclmulqdq.256
+ return _mm256_clmulepi64_epi128(A, B, 0);
+}
+
+#ifdef AVX512
+__m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
+ // AVX512: @llvm.x86.pclmulqdq.512
+ return _mm512_clmulepi64_epi128(A, B, 0);
+}
+#endif
+
diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c
index b8b202c28a..0eb10e55c3 100644
--- a/test/Driver/x86-target-features.c
+++ b/test/Driver/x86-target-features.c
@@ -102,6 +102,17 @@
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mgfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=GFNI %s
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-gfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-GFNI %s
+<<<<<<<
// GFNI: "-target-feature" "+gfni"
// NO-GFNI: "-target-feature" "-gfni
+=======
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
+// CLZERO: "-target-feature" "+clzero"
+// NO-CLZERO: "-target-feature" "-clzero"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VPCLMULQDQ %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VPCLMULQDQ %s
+// VPCLMULQDQ: "-target-feature" "+vpclmulqdq"
+// NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq"
+>>>>>>>
diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c
index 5c44090189..2c12e45b9f 100644
--- a/test/Preprocessor/predefined-arch-macros.c
+++ b/test/Preprocessor/predefined-arch-macros.c
@@ -1081,6 +1081,7 @@
// CHECK_ICL_M32: #define __SSE__ 1
// CHECK_ICL_M32: #define __SSSE3__ 1
// CHECK_ICL_M32: #define __VAES__ 1
+// CHECK_ICL_M32: #define __VPCLMULQDQ__ 1
// CHECK_ICL_M32: #define __XSAVEC__ 1
// CHECK_ICL_M32: #define __XSAVEOPT__ 1
// CHECK_ICL_M32: #define __XSAVES__ 1
@@ -1128,6 +1129,7 @@
// CHECK_ICL_M64: #define __SSE__ 1
// CHECK_ICL_M64: #define __SSSE3__ 1
// CHECK_ICL_M64: #define __VAES__ 1
+// CHECK_ICL_M64: #define __VPCLMULQDQ__ 1
// CHECK_ICL_M64: #define __XSAVEC__ 1
// CHECK_ICL_M64: #define __XSAVEOPT__ 1
// CHECK_ICL_M64: #define __XSAVES__ 1
diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c
index 1db49743d8..3581744560 100644
--- a/test/Preprocessor/x86_target_features.c
+++ b/test/Preprocessor/x86_target_features.c
@@ -384,3 +384,12 @@
// GFNI: #define __GFNI__ 1
// GFNI: #define __SSE2__ 1
+// RUN: %clang -target i386-unknown-unknown -march=atom -mvpclmulqdq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VPCLMULQDQ %s
+
+// VPCLMULQDQ: #define __PCLMUL__ 1
+// VPCLMULQDQ: #define __VPCLMULQDQ__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mvpclmulqdq -mno-pclmul -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VPCLMULQDQNOPCLMUL %s
+// VPCLMULQDQNOPCLMUL-NOT: #define __PCLMUL__ 1
+// VPCLMULQDQNOPCLMUL-NOT: #define __VPCLMULQDQ__ 1
+