[X86] Lowering integer truncation intrinsics to native IR

This patch lowers the _mm[256|512]_cvtepi{64|32|16}_epi{32|16|8} intrinsics to native IR in cases where the result's length is less than 128 bits. The resulting IR for 256-bit inputs is folded into VPMOV instructions, while for 128-bit inputs the vpshufb (or, in the 64-to-32-bit case, vinsertps) instructions are generated instead Differential Revision: https://reviews.llvm.org/D48712 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@336643 91177308-0d34-0410-b5e6-96231b3b80d8
author: Mikhail Dvoretckii <mikhail.dvoretckii@intel.com> 2018-07-10 08:22:44 +0000
committer: Mikhail Dvoretckii <mikhail.dvoretckii@intel.com> 2018-07-10 08:22:44 +0000
commit: b4d2cac846d91bba7f952a0ce7651425fb56d585 (patch)
tree: 6d93930c25e982f773813ed48f7791b801ab82c3 /test/CodeGen
parent: 5734eced0745f4cf3d04c6b07d5d048d8a785537 (diff)
2 files changed, 18 insertions, 9 deletions
diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c
index 540ea223d9..7e4e64381c 100644
--- a/test/CodeGen/avx512vl-builtins.c
+++ b/test/CodeGen/avx512vl-builtins.c
@@ -8503,7 +8503,8 @@ void test_mm256_mask_cvtusepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i _
 
 __m128i test_mm_cvtepi32_epi8(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi32_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.db.128
+  // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8>
+  // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   return _mm_cvtepi32_epi8(__A); 
 }
 
@@ -8527,7 +8528,8 @@ void test_mm_mask_cvtepi32_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) {
 
 __m128i test_mm256_cvtepi32_epi8(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepi32_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.db.256
+  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8>
+  // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm256_cvtepi32_epi8(__A); 
 }
 
@@ -8551,7 +8553,8 @@ void test_mm256_mask_cvtepi32_storeu_epi8(void * __P, __mmask8 __M, __m256i __A)
 
 __m128i test_mm_cvtepi32_epi16(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi32_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmov.dw.128
+  // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16>
+  // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm_cvtepi32_epi16(__A); 
 }
 
@@ -8599,7 +8602,8 @@ void test_mm256_mask_cvtepi32_storeu_epi16(void *  __P, __mmask8 __M, __m256i __
 
 __m128i test_mm_cvtepi64_epi8(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi64_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.qb.128
+  // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8>
+  // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   return _mm_cvtepi64_epi8(__A); 
 }
 
@@ -8623,7 +8627,8 @@ void test_mm_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) {
 
 __m128i test_mm256_cvtepi64_epi8(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepi64_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.qb.256
+  // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8>
+  // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   return _mm256_cvtepi64_epi8(__A); 
 }
 
@@ -8647,7 +8652,8 @@ void test_mm256_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m256i __A)
 
 __m128i test_mm_cvtepi64_epi32(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi64_epi32
-  // CHECK: @llvm.x86.avx512.mask.pmov.qd.128
+  // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32>
+  // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm_cvtepi64_epi32(__A); 
 }
 
@@ -8697,7 +8703,8 @@ void test_mm256_mask_cvtepi64_storeu_epi32(void * __P, __mmask8 __M, __m256i __A
 
 __m128i test_mm_cvtepi64_epi16(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi64_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmov.qw.128
+  // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16>
+  // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
   return _mm_cvtepi64_epi16(__A); 
 }
 
@@ -8721,7 +8728,8 @@ void test_mm_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) {
 
 __m128i test_mm256_cvtepi64_epi16(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepi64_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmov.qw.256
+  // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16>
+  // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_cvtepi64_epi16(__A); 
 }
 
diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c
index 607b011794..c28c3fbd7f 100644
--- a/test/CodeGen/avx512vlbw-builtins.c
+++ b/test/CodeGen/avx512vlbw-builtins.c
@@ -1792,7 +1792,8 @@ __m128i test_mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A) {
 
 __m128i test_mm_cvtepi16_epi8(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi16_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  // CHECK: trunc <8 x i16> %{{.*}} to <8 x i8>
+  // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm_cvtepi16_epi8(__A); 
 }
author	Mikhail Dvoretckii <mikhail.dvoretckii@intel.com>	2018-07-10 08:22:44 +0000
committer	Mikhail Dvoretckii <mikhail.dvoretckii@intel.com>	2018-07-10 08:22:44 +0000
commit	b4d2cac846d91bba7f952a0ce7651425fb56d585 (patch)
tree	6d93930c25e982f773813ed48f7791b801ab82c3 /test/CodeGen
parent	5734eced0745f4cf3d04c6b07d5d048d8a785537 (diff)