summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuo, Yuanke <yuanke.luo@intel.com>2019-05-06 08:25:11 +0000
committerLuo, Yuanke <yuanke.luo@intel.com>2019-05-06 08:25:11 +0000
commit7bdbc78bb83e99cfc4bc1f9cfd9fa128cb850771 (patch)
treea581dfb7a92305873889170351a19c5bd8b5f48a
parent1b26ab1b5287ec6c00e8e506657abe438c90a71b (diff)
Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake
Summary: 1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake; 2. Enable intrinsics for VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision. For more details about BF16 intrinsic, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference Patch by LiuTianle Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, spatel, RKSimon Reviewed By: craig.topper Subscribers: mgorny, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D60552 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@360018 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--docs/ClangCommandLineReference.rst2
-rw-r--r--include/clang/Basic/BuiltinsX86.def18
-rw-r--r--include/clang/Driver/Options.td2
-rw-r--r--lib/Basic/Targets/X86.cpp13
-rw-r--r--lib/Basic/Targets/X86.h1
-rw-r--r--lib/CodeGen/CGBuiltin.cpp8
-rw-r--r--lib/Headers/CMakeLists.txt2
-rw-r--r--lib/Headers/cpuid.h3
-rw-r--r--lib/Headers/immintrin.h9
-rw-r--r--test/CodeGen/attr-target-x86.c4
-rw-r--r--test/Driver/x86-target-features.c5
-rw-r--r--test/Preprocessor/x86_target_features.c15
12 files changed, 80 insertions, 2 deletions
diff --git a/docs/ClangCommandLineReference.rst b/docs/ClangCommandLineReference.rst
index e852c3e387..f153a0cd07 100644
--- a/docs/ClangCommandLineReference.rst
+++ b/docs/ClangCommandLineReference.rst
@@ -2610,6 +2610,8 @@ X86
.. option:: -mavx512bitalg, -mno-avx512bitalg
+.. option:: -mavx512bf16, -mno-avx512bf16
+
.. option:: -mavx512bw, -mno-avx512bw
.. option:: -mavx512cd, -mno-avx512cd
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 5cbab8a3b3..6eef6954f0 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -1831,6 +1831,24 @@ TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:",
+ "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8sV4fV8sUc", "ncV:128:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256, "V8sV8f", "ncV:256:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512, "V16sV16f", "ncV:512:",
+ "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:",
+ "avx512bf16")
// generic select intrinsics
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index 6d4aa1bf42..a293a39f28 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -2854,6 +2854,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
+def mavx512bf16 : Flag<["-"], "mavx512bf16">, Group<m_x86_Features_Group>;
+def mno_avx512bf16 : Flag<["-"], "mno-avx512bf16">, Group<m_x86_Features_Group>;
def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp
index 2345d7ffcc..b83c9382fc 100644
--- a/lib/Basic/Targets/X86.cpp
+++ b/lib/Basic/Targets/X86.cpp
@@ -521,6 +521,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
Features["avx512ifma"] = Features["avx512vpopcntdq"] =
Features["avx512bitalg"] = Features["avx512vnni"] =
Features["avx512vbmi2"] = false;
+ Features["avx512bf16"] = false;
break;
}
}
@@ -652,16 +653,22 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
Name == "avx512vbmi" || Name == "avx512ifma" ||
Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
+ Name == "avx512bf16" ||
Name == "avx512vnni" || Name == "avx512vbmi2") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
// Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled.
if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
Features["avx512bw"] = true;
+ if (Name == "avx512bf16" && Enabled)
+ Features["avx512bw"] = Features["avx512vl"] = true;
// Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
if (Name == "avx512bw" && !Enabled)
Features["avx512vbmi"] = Features["avx512vbmi2"] =
+ Features["avx512bf16"] =
Features["avx512bitalg"] = false;
+ if (Name == "avx512vl" && !Enabled)
+ Features["avx512bf16"] = false;
} else if (Name == "fma") {
if (Enabled)
setSSELevel(Features, AVX, Enabled);
@@ -751,6 +758,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512VPOPCNTDQ = true;
} else if (Feature == "+avx512vnni") {
HasAVX512VNNI = true;
+ } else if (Feature == "+avx512bf16") {
+ HasAVX512BF16 = true;
} else if (Feature == "+avx512er") {
HasAVX512ER = true;
} else if (Feature == "+avx512pf") {
@@ -1141,6 +1150,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512VPOPCNTDQ__");
if (HasAVX512VNNI)
Builder.defineMacro("__AVX512VNNI__");
+ if (HasAVX512BF16)
+ Builder.defineMacro("__AVX512BF16__");
if (HasAVX512ER)
Builder.defineMacro("__AVX512ER__");
if (HasAVX512PF)
@@ -1305,6 +1316,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512cd", true)
.Case("avx512vpopcntdq", true)
.Case("avx512vnni", true)
+ .Case("avx512bf16", true)
.Case("avx512er", true)
.Case("avx512pf", true)
.Case("avx512dq", true)
@@ -1383,6 +1395,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512cd", HasAVX512CD)
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512vnni", HasAVX512VNNI)
+ .Case("avx512bf16", HasAVX512BF16)
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h
index 9f285d8d50..d7a87f8d7c 100644
--- a/lib/Basic/Targets/X86.h
+++ b/lib/Basic/Targets/X86.h
@@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512CD = false;
bool HasAVX512VPOPCNTDQ = false;
bool HasAVX512VNNI = false;
+ bool HasAVX512BF16 = false;
bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 5abb62c560..14b8d0b18d 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -11851,6 +11851,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpordsd:
return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
+// AVX512 bf16 intrinsics
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+ Ops[2] = getMaskVecValue(*this, Ops[2],
+ Ops[0]->getType()->getVectorNumElements());
+ Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
+ return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ }
+
case X86::BI__emul:
case X86::BI__emulu: {
llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index a921d99399..bfcade4584 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -6,6 +6,7 @@ set(files
armintr.h
arm64intr.h
avx2intrin.h
+ avx512bf16intrin.h
avx512bwintrin.h
avx512bitalgintrin.h
avx512vlbitalgintrin.h
@@ -21,6 +22,7 @@ set(files
avx512vbmivlintrin.h
avx512vbmi2intrin.h
avx512vlvbmi2intrin.h
+ avx512vlbf16intrin.h
avx512vlbwintrin.h
avx512vlcdintrin.h
avx512vldqintrin.h
diff --git a/lib/Headers/cpuid.h b/lib/Headers/cpuid.h
index 137b9bc7fb..ffe638adf3 100644
--- a/lib/Headers/cpuid.h
+++ b/lib/Headers/cpuid.h
@@ -184,6 +184,9 @@
#define bit_PCONFIG 0x00040000
#define bit_IBT 0x00100000
+/* Features in %eax for leaf 7 sub-leaf 1 */
+#define bit_AVX512BF16 0x00000020
+
/* Features in %eax for leaf 13 sub-leaf 1 */
#define bit_XSAVEOPT 0x00000001
#define bit_XSAVEC 0x00000002
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index 1fda86ed26..2bd79bd88f 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -181,6 +181,15 @@
#include <avx512pfintrin.h>
#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
+#include <avx512bf16intrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512VL__) && defined(__AVX512BF16__))
+#include <avx512vlbf16intrin.h>
+#endif
+
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
#include <pkuintrin.h>
#endif
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
index 56ccaf98ea..e3a2cb2e16 100644
--- a/test/CodeGen/attr-target-x86.c
+++ b/test/CodeGen/attr-target-x86.c
@@ -50,9 +50,9 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
// CHECK: use_before_def{{.*}} #7
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c
index ee2e6afd61..d925f6824c 100644
--- a/test/Driver/x86-target-features.c
+++ b/test/Driver/x86-target-features.c
@@ -178,3 +178,8 @@
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-invpcid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-INVPCID %s
// INVPCID: "-target-feature" "+invpcid"
// NO-INVPCID: "-target-feature" "-invpcid"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX512BF16 %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512BF16 %s
+// AVX512BF16: "-target-feature" "+avx512bf16"
+// NO-AVX512BF16: "-target-feature" "-avx512bf16"
diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c
index 3e4ffe44dd..54f56a826b 100644
--- a/test/Preprocessor/x86_target_features.c
+++ b/test/Preprocessor/x86_target_features.c
@@ -443,3 +443,18 @@
// RUN: %clang -target i386-unknown-unknown -march=atom -mrdpid -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=RDPID %s
// RDPID: #define __RDPID__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16 %s
+
+// AVX512BF16: #define __AVX512BF16__ 1
+// AVX512BF16: #define __AVX512BW__ 1
+// AVX512BF16: #define __AVX512VL__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512BW %s
+
+// AVX512BF16_NOAVX512BW-NOT: #define __AVX512BF16__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512VL %s
+
+// AVX512BF16_NOAVX512VL-NOT: #define __AVX512BF16__ 1
+