diff options
author | Craig Topper <craig.topper@intel.com> | 2018-01-08 22:37:56 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-01-08 22:37:56 +0000 |
commit | 78c4666039c8228fc5725beaf94fa86a00bddb34 (patch) | |
tree | c6d86b3b588ba838c8ac9c9f077d204e9c27108c | |
parent | 590cfc2ac3ed6b2b18bab2eba2c78a8ba56435d4 (diff) |
[X86] Replace cvt*2mask intrinsics with native IR using 'icmp slt X, zeroinitializer.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@322038 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 31 | ||||
-rw-r--r-- | test/CodeGen/avx512bw-builtins.c | 6 | ||||
-rw-r--r-- | test/CodeGen/avx512dq-builtins.c | 6 | ||||
-rw-r--r-- | test/CodeGen/avx512vlbw-builtins.c | 12 | ||||
-rw-r--r-- | test/CodeGen/avx512vldq-builtins.c | 15 |
5 files changed, 54 insertions, 16 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 631085a979..9ffc7de4bc 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -7791,7 +7791,9 @@ static Value *EmitX86Select(CodeGenFunction &CGF, } static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, - bool Signed, SmallVectorImpl<Value *> &Ops) { + bool Signed, ArrayRef<Value *> Ops) { + assert((Ops.size() == 2 || Ops.size() == 4) && + "Unexpected number of arguments"); unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); Value *Cmp; @@ -7815,9 +7817,11 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); } - const auto *C = dyn_cast<Constant>(Ops.back()); - if (!C || !C->isAllOnesValue()) - Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); + if (Ops.size() == 4) { + const auto *C = dyn_cast<Constant>(Ops[3]); + if (!C || !C->isAllOnesValue()) + Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts)); + } if (NumElts < 8) { uint32_t Indices[8]; @@ -7833,6 +7837,11 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, std::max(NumElts, 8U))); } +static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { + Value *Zero = Constant::getNullValue(In->getType()); + return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); +} + static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *Ty = Ops[0]->getType(); @@ -8179,6 +8188,20 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtmask2q512: return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + return EmitX86ConvertToMask(*this, Ops[0]); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 3160a6667c..f84df5c06b 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -1743,7 +1743,8 @@ __mmask32 test_mm512_mask_testn_epi16_mask(__mmask32 __U, __m512i __A, __m512i _ __mmask64 test_mm512_movepi8_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi8_mask - // CHECK: @llvm.x86.avx512.cvtb2mask.512 + // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer + // CHECK: bitcast <64 x i1> [[CMP]] to i64 return _mm512_movepi8_mask(__A); } @@ -1941,7 +1942,8 @@ __m512i test_mm512_sad_epu8(__m512i __A, __m512i __B) { __mmask32 test_mm512_movepi16_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi16_mask - // CHECK: @llvm.x86.avx512.cvtw2mask.512 + // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer + // CHECK: bitcast <32 x i1> [[CMP]] to i32 return _mm512_movepi16_mask(__A); } diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c index 1b21ca3c43..47943c5740 100644 --- a/test/CodeGen/avx512dq-builtins.c +++ b/test/CodeGen/avx512dq-builtins.c @@ -923,7 +923,8 @@ __m128d test_mm_maskz_reduce_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { __mmask16 test_mm512_movepi32_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi32_mask - // CHECK: @llvm.x86.avx512.cvtd2mask.512 + // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer + // CHECK: bitcast <16 x i1> [[CMP]] to i16 return _mm512_movepi32_mask(__A); } @@ -943,7 +944,8 @@ __m512i test_mm512_movm_epi64(__mmask8 __A) { __mmask8 test_mm512_movepi64_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi64_mask - // CHECK: @llvm.x86.avx512.cvtq2mask.512 + // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer + // CHECK: bitcast <8 x i1> [[CMP]] to i8 return _mm512_movepi64_mask(__A); } diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c index 23fbd4026a..7adc50c231 100644 --- a/test/CodeGen/avx512vlbw-builtins.c +++ b/test/CodeGen/avx512vlbw-builtins.c @@ -2601,13 +2601,15 @@ __mmask16 test_mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i _ __mmask16 test_mm_movepi8_mask(__m128i __A) { // CHECK-LABEL: @test_mm_movepi8_mask - // CHECK: @llvm.x86.avx512.cvtb2mask.128 + // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer + // CHECK: bitcast <16 x i1> [[CMP]] to i16 return _mm_movepi8_mask(__A); } __mmask32 test_mm256_movepi8_mask(__m256i __A) { // CHECK-LABEL: @test_mm256_movepi8_mask - // CHECK: @llvm.x86.avx512.cvtb2mask.256 + // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer + // CHECK: bitcast <32 x i1> [[CMP]] to i32 return _mm256_movepi8_mask(__A); } @@ -2985,13 +2987,15 @@ __m256i test_mm256_maskz_dbsad_epu8(__mmask16 __U, __m256i __A, __m256i __B) { } __mmask8 test_mm_movepi16_mask(__m128i __A) { // CHECK-LABEL: @test_mm_movepi16_mask - // CHECK: @llvm.x86.avx512.cvtw2mask.128 + // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer + // CHECK: bitcast <8 x i1> [[CMP]] to i8 return _mm_movepi16_mask(__A); } __mmask16 test_mm256_movepi16_mask(__m256i __A) { // CHECK-LABEL: @test_mm256_movepi16_mask - // CHECK: @llvm.x86.avx512.cvtw2mask.256 + // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer + // CHECK: bitcast <16 x i1> [[CMP]] to i16 return _mm256_movepi16_mask(__A); } diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c index 3ca4b2135e..0812d682dc 100644 --- a/test/CodeGen/avx512vldq-builtins.c +++ b/test/CodeGen/avx512vldq-builtins.c @@ -853,13 +853,16 @@ __m256 test_mm256_maskz_reduce_ps(__mmask8 __U, __m256 __A) { __mmask8 test_mm_movepi32_mask(__m128i __A) { // CHECK-LABEL: @test_mm_movepi32_mask - // CHECK: @llvm.x86.avx512.cvtd2mask.128 + // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> %{{.*}}, zeroinitializer + // CHECK: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // CHECK: bitcast <8 x i1> [[SHUF]] to i8 return _mm_movepi32_mask(__A); } __mmask8 test_mm256_movepi32_mask(__m256i __A) { // CHECK-LABEL: @test_mm256_movepi32_mask - // CHECK: @llvm.x86.avx512.cvtd2mask.256 + // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer + // CHECK: bitcast <8 x i1> [[CMP]] to i8 return _mm256_movepi32_mask(__A); } @@ -896,13 +899,17 @@ __m256i test_mm256_movm_epi64(__mmask8 __A) { __mmask8 test_mm_movepi64_mask(__m128i __A) { // CHECK-LABEL: @test_mm_movepi64_mask - // CHECK: @llvm.x86.avx512.cvtq2mask.128 + // CHECK: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer + // CHECK: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // CHECK: bitcast <8 x i1> [[SHUF]] to i8 return _mm_movepi64_mask(__A); } __mmask8 test_mm256_movepi64_mask(__m256i __A) { // CHECK-LABEL: @test_mm256_movepi64_mask - // CHECK: @llvm.x86.avx512.cvtq2mask.256 + // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer + // CHECK: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // CHECK: bitcast <8 x i1> [[SHUF]] to i8 return _mm256_movepi64_mask(__A); } |