summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCoby Tayree <coby.tayree@intel.com>2017-12-27 10:01:00 +0000
committerCoby Tayree <coby.tayree@intel.com>2017-12-27 10:01:00 +0000
commit8b794e9ef26a956e0c720c9daf049cfe23641717 (patch)
tree1971b7d01432d27ed66c687bdc3ad2f8c5166df9
parent4086d83a7b37ebaf959bf235ae6ce154fc2d81cb (diff)
[x86][icelake][bitalg]
added bitalg feature recognition added intrinsics support for bitalg instructions _mm512_popcnt_epi16 _mm512_mask_popcnt_epi16 _mm512_maskz_popcnt_epi16 _mm512_popcnt_epi8 _mm512_mask_popcnt_epi8 _mm512_maskz_popcnt_epi8 _mm512_mask_bitshuffle_epi64_mask _mm512_bitshuffle_epi64_mask _mm256_popcnt_epi16 _mm256_mask_popcnt_epi16 _mm256_maskz_popcnt_epi16 _mm128_popcnt_epi16 _mm128_mask_popcnt_epi16 _mm128_maskz_popcnt_epi16 _mm256_popcnt_epi8 _mm256_mask_popcnt_epi8 _mm256_maskz_popcnt_epi8 _mm128_popcnt_epi8 _mm128_mask_popcnt_epi8 _mm128_maskz_popcnt_epi8 _mm256_mask_bitshuffle_epi32_mask _mm256_bitshuffle_epi32_mask _mm128_mask_bitshuffle_epi16_mask _mm128_bitshuffle_epi16_mask matching a similar work on the backend (D40222) Differential Revision: https://reviews.llvm.org/D41564 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@321483 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/Basic/BuiltinsX86.def11
-rw-r--r--include/clang/Driver/Options.td2
-rw-r--r--lib/Basic/Targets/X86.cpp20
-rw-r--r--lib/Basic/Targets/X86.h1
-rw-r--r--lib/CodeGen/CGBuiltin.cpp8
-rw-r--r--lib/Headers/CMakeLists.txt2
-rw-r--r--lib/Headers/avx512bitalgintrin.h97
-rw-r--r--lib/Headers/avx512vlbitalgintrin.h157
-rw-r--r--lib/Headers/immintrin.h9
-rw-r--r--test/CodeGen/attr-target-x86.c4
-rw-r--r--test/CodeGen/avx512bitalg-builtins.c54
-rw-r--r--test/CodeGen/avx512vlbitalg-builtins.c104
-rw-r--r--test/Driver/x86-target-features.c5
-rw-r--r--test/Preprocessor/predefined-arch-macros.c2
-rw-r--r--test/Preprocessor/x86_target_features.c22
15 files changed, 489 insertions, 9 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 481f7afd4c..1039b3c0b3 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -1092,6 +1092,17 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,a
TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg")
+
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg")
+
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index c062338f75..d24eb736f0 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -2467,6 +2467,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
+def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
+def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>;
def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp
index 1609904017..e0ede257af 100644
--- a/lib/Basic/Targets/X86.cpp
+++ b/lib/Basic/Targets/X86.cpp
@@ -135,6 +135,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "vaes", true);
setFeatureEnabledImpl(Features, "gfni", true);
setFeatureEnabledImpl(Features, "vpclmulqdq", true);
+ setFeatureEnabledImpl(Features, "avx512bitalg", true);
// TODO: Add icelake features here.
LLVM_FALLTHROUGH;
case CK_Cannonlake:
@@ -473,7 +474,8 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
Features["avx512vl"] = Features["avx512vbmi"] =
- Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
+ Features["avx512ifma"] = Features["avx512vpopcntdq"] =
+ Features["avx512bitalg"] = false;
break;
}
}
@@ -604,15 +606,15 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
Name == "avx512vbmi" || Name == "avx512ifma" ||
- Name == "avx512vpopcntdq") {
+ Name == "avx512vpopcntdq" || Name == "avx512bitalg") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
- // Enable BWI instruction if VBMI is being enabled.
- if (Name == "avx512vbmi" && Enabled)
+ // Enable BWI instruction if VBMI / BITALG is being enabled.
+ if ((Name == "avx512vbmi" || Name == "avx512bitalg") && Enabled)
Features["avx512bw"] = true;
- // Also disable VBMI if BWI is being disabled.
+ // Also disable VBMI / BITALG if BWI is being disabled.
if (Name == "avx512bw" && !Enabled)
- Features["avx512vbmi"] = false;
+ Features["avx512vbmi"] = Features["avx512bitalg"] = false;
} else if (Name == "fma") {
if (Enabled)
setSSELevel(Features, AVX, Enabled);
@@ -702,6 +704,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512PF = true;
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
+ } else if (Feature == "+avx512bitalg") {
+ HasAVX512BITALG = true;
} else if (Feature == "+avx512bw") {
HasAVX512BW = true;
} else if (Feature == "+avx512vl") {
@@ -1041,6 +1045,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512PF__");
if (HasAVX512DQ)
Builder.defineMacro("__AVX512DQ__");
+ if (HasAVX512BITALG)
+ Builder.defineMacro("__AVX512BITALG__");
if (HasAVX512BW)
Builder.defineMacro("__AVX512BW__");
if (HasAVX512VL)
@@ -1179,6 +1185,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512er", true)
.Case("avx512pf", true)
.Case("avx512dq", true)
+ .Case("avx512bitalg", true)
.Case("avx512bw", true)
.Case("avx512vl", true)
.Case("avx512vbmi", true)
@@ -1244,6 +1251,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
+ .Case("avx512bitalg", HasAVX512BITALG)
.Case("avx512bw", HasAVX512BW)
.Case("avx512vl", HasAVX512VL)
.Case("avx512vbmi", HasAVX512VBMI)
diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h
index ffdf8168c6..eb742c9e09 100644
--- a/lib/Basic/Targets/X86.h
+++ b/lib/Basic/Targets/X86.h
@@ -71,6 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;
+ bool HasAVX512BITALG = false;
bool HasAVX512BW = false;
bool HasAVX512VL = false;
bool HasAVX512VBMI = false;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 609987c4fa..ba54f8342f 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -8143,12 +8143,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask: {
return EmitX86MaskedStore(*this, Ops, 16);
}
+ case X86::BI__builtin_ia32_vpopcntb_128:
case X86::BI__builtin_ia32_vpopcntd_128:
case X86::BI__builtin_ia32_vpopcntq_128:
+ case X86::BI__builtin_ia32_vpopcntw_128:
+ case X86::BI__builtin_ia32_vpopcntb_256:
case X86::BI__builtin_ia32_vpopcntd_256:
case X86::BI__builtin_ia32_vpopcntq_256:
+ case X86::BI__builtin_ia32_vpopcntw_256:
+ case X86::BI__builtin_ia32_vpopcntb_512:
case X86::BI__builtin_ia32_vpopcntd_512:
- case X86::BI__builtin_ia32_vpopcntq_512: {
+ case X86::BI__builtin_ia32_vpopcntq_512:
+ case X86::BI__builtin_ia32_vpopcntw_512: {
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
return Builder.CreateCall(F, Ops);
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index 722c2851b6..d3b577e309 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -7,6 +7,8 @@ set(files
arm64intr.h
avx2intrin.h
avx512bwintrin.h
+ avx512bitalgintrin.h
+ avx512vlbitalgintrin.h
avx512cdintrin.h
avx512vpopcntdqintrin.h
avx512dqintrin.h
diff --git a/lib/Headers/avx512bitalgintrin.h b/lib/Headers/avx512bitalgintrin.h
new file mode 100644
index 0000000000..2dd1471d2f
--- /dev/null
+++ b/lib/Headers/avx512bitalgintrin.h
@@ -0,0 +1,97 @@
+/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512BITALGINTRIN_H
+#define __AVX512BITALGINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_popcnt_epi16(__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
+ (__v32hi) _mm512_popcnt_epi16(__B),
+ (__v32hi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
+{
+ return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
+ __U,
+ __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_popcnt_epi8(__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
+ (__v64qi) _mm512_popcnt_epi8(__B),
+ (__v64qi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
+{
+ return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
+ __U,
+ __B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
+ (__v64qi) __B,
+ __U);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
+{
+ return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
+ __A,
+ __B);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avx512vlbitalgintrin.h b/lib/Headers/avx512vlbitalgintrin.h
new file mode 100644
index 0000000000..76eb87721b
--- /dev/null
+++ b/lib/Headers/avx512vlbitalgintrin.h
@@ -0,0 +1,157 @@
+/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLBITALGINTRIN_H
+#define __AVX512VLBITALGINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_popcnt_epi16(__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
+ (__v16hi) _mm256_popcnt_epi16(__B),
+ (__v16hi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
+{
+ return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
+ __U,
+ __B);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_popcnt_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
+ (__v8hi) _mm128_popcnt_epi16(__B),
+ (__v8hi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
+{
+ return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
+ __U,
+ __B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_popcnt_epi8(__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
+ (__v32qi) _mm256_popcnt_epi8(__B),
+ (__v32qi) __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
+{
+ return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
+ __U,
+ __B);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_popcnt_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
+ (__v16qi) _mm128_popcnt_epi8(__B),
+ (__v16qi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
+{
+ return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
+ __U,
+ __B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
+ (__v32qi) __B,
+ __U);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
+{
+ return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
+ __A,
+ __B);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
+ (__v16qi) __B,
+ __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
+{
+ return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
+ __A,
+ __B);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index c39cb4dc33..714398b36d 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -150,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512bwintrin.h>
#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
+#include <avx512bitalgintrin.h>
+#endif
+
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
#include <avx512cdintrin.h>
#endif
@@ -168,6 +172,11 @@ _mm256_cvtph_ps(__m128i __a)
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512VL__) && defined(__AVX512BITALG__))
+#include <avx512vlbitalgintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BW__))
#include <avx512vlbwintrin.h>
#endif
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
index 55aca6ab3b..2facfe0a0e 100644
--- a/test/CodeGen/attr-target-x86.c
+++ b/test/CodeGen/attr-target-x86.c
@@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
// CHECK: lake{{.*}} #7
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
diff --git a/test/CodeGen/avx512bitalg-builtins.c b/test/CodeGen/avx512bitalg-builtins.c
new file mode 100644
index 0000000000..5770c662f0
--- /dev/null
+++ b/test/CodeGen/avx512bitalg-builtins.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_popcnt_epi16(__m512i __A) {
+ // CHECK-LABEL: @test_mm512_popcnt_epi16
+ // CHECK: @llvm.ctpop.v32i16
+ return _mm512_popcnt_epi16(__A);
+}
+
+__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_mask_popcnt_epi16
+ // CHECK: @llvm.ctpop.v32i16
+ // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
+ return _mm512_mask_popcnt_epi16(__A, __U, __B);
+}
+__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_maskz_popcnt_epi16
+ // CHECK: @llvm.ctpop.v32i16
+ // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
+ return _mm512_maskz_popcnt_epi16(__U, __B);
+}
+
+__m512i test_mm512_popcnt_epi8(__m512i __A) {
+ // CHECK-LABEL: @test_mm512_popcnt_epi8
+ // CHECK: @llvm.ctpop.v64i8
+ return _mm512_popcnt_epi8(__A);
+}
+
+__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_mask_popcnt_epi8
+ // CHECK: @llvm.ctpop.v64i8
+ // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ return _mm512_mask_popcnt_epi8(__A, __U, __B);
+}
+__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_maskz_popcnt_epi8
+ // CHECK: @llvm.ctpop.v64i8
+ // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ return _mm512_maskz_popcnt_epi8(__U, __B);
+}
+
+__mmask64 test_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_mask_bitshuffle_epi64_mask
+ // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
+ return _mm512_mask_bitshuffle_epi64_mask(__U, __A, __B);
+}
+
+__mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_bitshuffle_epi64_mask
+ // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
+ return _mm512_bitshuffle_epi64_mask(__A, __B);
+}
+
diff --git a/test/CodeGen/avx512vlbitalg-builtins.c b/test/CodeGen/avx512vlbitalg-builtins.c
new file mode 100644
index 0000000000..9b2a1a469b
--- /dev/null
+++ b/test/CodeGen/avx512vlbitalg-builtins.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m256i test_mm256_popcnt_epi16(__m256i __A) {
+ // CHECK-LABEL: @test_mm256_popcnt_epi16
+ // CHECK: @llvm.ctpop.v16i16
+ return _mm256_popcnt_epi16(__A);
+}
+
+__m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
+ // CHECK-LABEL: @test_mm256_mask_popcnt_epi16
+ // CHECK: @llvm.ctpop.v16i16
+ // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
+ return _mm256_mask_popcnt_epi16(__A, __U, __B);
+}
+__m256i test_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
+ // CHECK-LABEL: @test_mm256_maskz_popcnt_epi16
+ // CHECK: @llvm.ctpop.v16i16
+ // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
+ return _mm256_maskz_popcnt_epi16(__U, __B);
+}
+
+__m128i test_mm128_popcnt_epi16(__m128i __A) {
+ // CHECK-LABEL: @test_mm128_popcnt_epi16
+ // CHECK: @llvm.ctpop.v8i16
+ return _mm128_popcnt_epi16(__A);
+}
+
+__m128i test_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
+ // CHECK-LABEL: @test_mm128_mask_popcnt_epi16
+ // CHECK: @llvm.ctpop.v8i16
+ // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
+ return _mm128_mask_popcnt_epi16(__A, __U, __B);
+}
+__m128i test_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
+ // CHECK-LABEL: @test_mm128_maskz_popcnt_epi16
+ // CHECK: @llvm.ctpop.v8i16
+ // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
+ return _mm128_maskz_popcnt_epi16(__U, __B);
+}
+
+__m256i test_mm256_popcnt_epi8(__m256i __A) {
+ // CHECK-LABEL: @test_mm256_popcnt_epi8
+ // CHECK: @llvm.ctpop.v32i8
+ return _mm256_popcnt_epi8(__A);
+}
+
+__m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
+ // CHECK-LABEL: @test_mm256_mask_popcnt_epi8
+ // CHECK: @llvm.ctpop.v32i8
+ // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ return _mm256_mask_popcnt_epi8(__A, __U, __B);
+}
+__m256i test_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
+ // CHECK-LABEL: @test_mm256_maskz_popcnt_epi8
+ // CHECK: @llvm.ctpop.v32i8
+ // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ return _mm256_maskz_popcnt_epi8(__U, __B);
+}
+
+__m128i test_mm128_popcnt_epi8(__m128i __A) {
+ // CHECK-LABEL: @test_mm128_popcnt_epi8
+ // CHECK: @llvm.ctpop.v16i8
+ return _mm128_popcnt_epi8(__A);
+}
+
+__m128i test_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
+ // CHECK-LABEL: @test_mm128_mask_popcnt_epi8
+ // CHECK: @llvm.ctpop.v16i8
+ // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ return _mm128_mask_popcnt_epi8(__A, __U, __B);
+}
+__m128i test_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
+ // CHECK-LABEL: @test_mm128_maskz_popcnt_epi8
+ // CHECK: @llvm.ctpop.v16i8
+ // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ return _mm128_maskz_popcnt_epi8(__U, __B);
+}
+
+__mmask32 test_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) {
+ // CHECK-LABEL: @test_mm256_mask_bitshuffle_epi32_mask
+ // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
+ return _mm256_mask_bitshuffle_epi32_mask(__U, __A, __B);
+}
+
+__mmask32 test_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) {
+ // CHECK-LABEL: @test_mm256_bitshuffle_epi32_mask
+ // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
+ return _mm256_bitshuffle_epi32_mask(__A, __B);
+}
+
+__mmask16 test_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) {
+ // CHECK-LABEL: @test_mm128_mask_bitshuffle_epi16_mask
+ // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
+ return _mm128_mask_bitshuffle_epi16_mask(__U, __A, __B);
+}
+
+__mmask16 test_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) {
+ // CHECK-LABEL: @test_mm128_bitshuffle_epi16_mask
+ // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
+ return _mm128_bitshuffle_epi16_mask(__A, __B);
+}
+
diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c
index 5fe3cd37f9..57d3265bd4 100644
--- a/test/Driver/x86-target-features.c
+++ b/test/Driver/x86-target-features.c
@@ -110,3 +110,8 @@
// VPCLMULQDQ: "-target-feature" "+vpclmulqdq"
// NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq"
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BITALG %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BITALG %s
+// BITALG: "-target-feature" "+avx512bitalg"
+// NO-BITALG: "-target-feature" "-avx512bitalg"
+
diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c
index 2c12e45b9f..dc552c76b5 100644
--- a/test/Preprocessor/predefined-arch-macros.c
+++ b/test/Preprocessor/predefined-arch-macros.c
@@ -1050,6 +1050,7 @@
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M32
// CHECK_ICL_M32: #define __AES__ 1
// CHECK_ICL_M32: #define __AVX2__ 1
+// CHECK_ICL_M32: #define __AVX512BITALG__ 1
// CHECK_ICL_M32: #define __AVX512BW__ 1
// CHECK_ICL_M32: #define __AVX512CD__ 1
// CHECK_ICL_M32: #define __AVX512DQ__ 1
@@ -1098,6 +1099,7 @@
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M64
// CHECK_ICL_M64: #define __AES__ 1
// CHECK_ICL_M64: #define __AVX2__ 1
+// CHECK_ICL_M64: #define __AVX512BITALG__ 1
// CHECK_ICL_M64: #define __AVX512BW__ 1
// CHECK_ICL_M64: #define __AVX512CD__ 1
// CHECK_ICL_M64: #define __AVX512DQ__ 1
diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c
index 3581744560..61a57246a5 100644
--- a/test/Preprocessor/x86_target_features.c
+++ b/test/Preprocessor/x86_target_features.c
@@ -209,11 +209,33 @@
// AVX512VBMI: #define __SSE__ 1
// AVX512VBMI: #define __SSSE3__ 1
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALG %s
+
+// AVX512BITALG: #define __AVX2__ 1
+// AVX512BITALG: #define __AVX512BITALG__ 1
+// AVX512BITALG: #define __AVX512BW__ 1
+// AVX512BITALG: #define __AVX512F__ 1
+// AVX512BITALG: #define __AVX__ 1
+// AVX512BITALG: #define __SSE2_MATH__ 1
+// AVX512BITALG: #define __SSE2__ 1
+// AVX512BITALG: #define __SSE3__ 1
+// AVX512BITALG: #define __SSE4_1__ 1
+// AVX512BITALG: #define __SSE4_2__ 1
+// AVX512BITALG: #define __SSE_MATH__ 1
+// AVX512BITALG: #define __SSE__ 1
+// AVX512BITALG: #define __SSSE3__ 1
+
+
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMINOAVX512BW %s
// AVX512VBMINOAVX512BW-NOT: #define __AVX512BW__ 1
// AVX512VBMINOAVX512BW-NOT: #define __AVX512VBMI__ 1
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALGNOAVX512BW %s
+
+// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BITALG__ 1
+// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BW__ 1
+
// RUN: %clang -target i386-unknown-unknown -march=atom -msse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SSE42POPCNT %s
// SSE42POPCNT: #define __POPCNT__ 1