summaryrefslogtreecommitdiffstats
path: root/include/clang/Basic/BuiltinsX86.def
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-06 00:24:55 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-06 00:24:55 +0000
commit26b20caa52ce8f4a08d1f8b6e2e927a490ae5e0c (patch)
treea098cfa538adb271f6c2ad2490f3820c636157e5 /include/clang/Basic/BuiltinsX86.def
parentb5a0e0ac7dc6e30aa1c9bf57b11d16f8fa4f7a29 (diff)
[X86] Add builtins for vector element insert and extract for different 128 and 256 bit vector types. Use them to implement the extract and insert intrinsics.
Previously we were just using extended vector operations in the header file. This unfortunately allowed non-constant indices to be used with the intrinsics. This is incompatible with gcc, icc, and MSVC. It also introduces a different performance characteristic because non-constant index gets lowered to a vector store and an element sized load. By adding the builtins we can check for the index to be a constant and ensure its in range of the vector element count. User code still has the option to use extended vector operations themselves if they need non-constant indexing. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@334057 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/clang/Basic/BuiltinsX86.def')
-rw-r--r--include/clang/Basic/BuiltinsX86.def17
1 files changed, 17 insertions, 0 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 0657354bd5..04646b8fd8 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -272,6 +272,11 @@ TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "nc", "sse3")
TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "nc", "sse3")
@@ -387,6 +392,10 @@ TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "nc", "sse4.1")
// SSE 4.2
TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "nc", "sse4.2")
@@ -519,6 +528,14 @@ TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "n", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "nc", "avx")
// AVX2
TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "nc", "avx2")