diff options
author | Craig Topper <craig.topper@intel.com> | 2018-07-10 00:37:25 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-07-10 00:37:25 +0000 |
commit | 8ccbbcf85598e60c728f171857f3e82681860dc6 (patch) | |
tree | 0c864e8e4700b2c381ed78ab585f76fd1d016965 /test/CodeGen | |
parent | 48ab15fabbe6c74f02f5d6717960161877b0e588 (diff) |
[X86] Add __builtin_ia32_selectss_128 and __builtin_ia32_selectsd_128 that is suitable for use in scalar mask intrinsics.
This will convert the i8 mask argument to <8 x i1> and extract an i1 and then emit a select instruction. This replaces the '(__U & 1)" and ternary operator used in some of intrinsics. The old sequence was lowered to a scalar and and compare. The new sequence uses an i1 vector that will interoperate better with other mask intrinsics.
This removes the need to handle div_ss/sd specially in CGBuiltin.cpp. A follow up patch will add the GCCBuiltin name back in llvm and remove the custom handling.
I made some adjustments to legacy move_ss/sd intrinsics which we reused here to do a simpler extract and insert instead of 2 extracts and two inserts or a shuffle.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@336622 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/avx512f-builtins.c | 196 | ||||
-rw-r--r-- | test/CodeGen/sse-builtins.c | 3 | ||||
-rw-r--r-- | test/CodeGen/sse2-builtins.c | 4 |
3 files changed, 123 insertions, 80 deletions
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index be5fcb207c..2beae24e45 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -3150,11 +3150,12 @@ __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fadd float %{{.*}}, %{{.*}} // CHECK: insertelement <4 x float> %{{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} + // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 return _mm_mask_add_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) { @@ -3164,10 +3165,12 @@ __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) { // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fadd float %{{.*}}, %{{.*}} // CHECK: insertelement <4 x float> %{{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} + // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 return _mm_maskz_add_ss(__U,__A,__B); } __m128d test_mm_add_round_sd(__m128d __A, __m128d __B) { @@ -3192,11 +3195,12 @@ __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fadd double %{{.*}}, %{{.*}} // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} + // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 return _mm_mask_add_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) { @@ -3206,10 +3210,12 @@ __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) { // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fadd double %{{.*}}, %{{.*}} // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} + // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 return _mm_maskz_add_sd(__U,__A,__B); } __m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) { @@ -3292,11 +3298,12 @@ __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fsub float %{{.*}}, %{{.*}} // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} + // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 return _mm_mask_sub_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) { @@ -3306,10 +3313,12 @@ __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) { // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fsub float %{{.*}}, %{{.*}} // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} + // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 return _mm_maskz_sub_ss(__U,__A,__B); } __m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) { @@ -3334,11 +3343,12 @@ __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fsub double %{{.*}}, %{{.*}} // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} + // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 return _mm_mask_sub_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) { @@ -3348,10 +3358,12 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) { // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fsub double %{{.*}}, %{{.*}} // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} + // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 return _mm_maskz_sub_sd(__U,__A,__B); } __m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) { @@ -3434,11 +3446,12 @@ __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fmul float %{{.*}}, %{{.*}} // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} + // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 return _mm_mask_mul_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) { @@ -3448,10 +3461,12 @@ __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) { // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fmul float %{{.*}}, %{{.*}} // CHECK: insertelement <4 x float> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} + // CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 return _mm_maskz_mul_ss(__U,__A,__B); } __m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) { @@ -3476,11 +3491,12 @@ __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fmul double %{{.*}}, %{{.*}} // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} + // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 return _mm_mask_mul_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) { @@ -3490,10 +3506,12 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) { // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fmul double %{{.*}}, %{{.*}} // CHECK: insertelement <2 x double> {{.*}}, i32 0 - // CHECK: and i32 {{.*}}, 1 - // CHECK: icmp ne i32 %{{.*}}, 0 - // CHECK: br {{.*}}, {{.*}}, {{.*}} - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} + // CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 return _mm_maskz_mul_sd(__U,__A,__B); } __m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) { @@ -3581,10 +3599,12 @@ __m128 test_mm_maskz_div_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { } __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_div_ss + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: fdiv float %{{.*}}, %{{.*}} + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: fdiv float %{{.*}}, %{{.*}} // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} @@ -3593,10 +3613,12 @@ __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { } __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_div_ss + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: fdiv float %{{.*}}, %{{.*}} + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK-NEXT: fdiv float %{{.*}}, %{{.*}} // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 // CHECK-NEXT: select i1 %{{.*}}, float %{{.*}}, float %{{.*}} @@ -3620,10 +3642,12 @@ __m128d test_mm_maskz_div_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { } __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_div_sd + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: fdiv double %{{.*}}, %{{.*}} + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: fdiv double %{{.*}}, %{{.*}} // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} @@ -3632,10 +3656,12 @@ __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_div_sd + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: fdiv double %{{.*}}, %{{.*}} + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK-NEXT: fdiv double %{{.*}}, %{{.*}} // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 // CHECK-NEXT: select i1 %{{.*}}, double %{{.*}}, double %{{.*}} @@ -10531,38 +10557,56 @@ int test_mm512_mask2int(__mmask16 __a) __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_move_ss - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: phi float [ %{{.*}}, %{{.*}} ], [ %{{.*}}, %{{.*}} ] - // CHECK: insertelement <4 x float> %{{.*}}, float %cond.i, i32 0 + // CHECK: [[EXT:%.*]] = extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float [[EXT]], i32 0 + // CHECK: [[A:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0 + // CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, float [[A]], float [[B]] + // CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0 return _mm_mask_move_ss ( __W, __U, __A, __B); } __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_move_ss - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: phi float [ %{{.*}}, %{{.*}} ], [ 0.000000e+00, %{{.*}} ] - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: [[EXT:%.*]] = extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float [[EXT]], i32 0 + // CHECK: [[A:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0 + // CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, float [[A]], float [[B]] + // CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0 return _mm_maskz_move_ss (__U, __A, __B); } __m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_move_sd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: phi double [ %{{.*}}, %{{.*}} ], [ %{{.*}}, %{{.*}} ] - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: [[EXT:%.*]] = extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> %{{.*}}, double [[EXT]], i32 0 + // CHECK: [[A:%.*]] = extractelement <2 x double> [[VEC:%.*]], i64 0 + // CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, double [[A]], double [[B]] + // CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0 return _mm_mask_move_sd ( __W, __U, __A, __B); } __m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_move_sd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: phi double [ %{{.*}}, %{{.*}} ], [ 0.000000e+00, %{{.*}} ] - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: [[EXT:%.*]] = extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> %{{.*}}, double [[EXT]], i32 0 + // CHECK: [[A:%.*]] = extractelement <2 x double> [[VEC:%.*]], i64 0 + // CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> %{{.*}}, i64 0 + // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1> + // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0 + // CHECK-NEXT: [[SEL:%.*]] = select i1 %13, double [[A]], double [[B]] + // CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0 return _mm_maskz_move_sd (__U, __A, __B); } diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c index b7c7a7fc7e..e9801487be 100644 --- a/test/CodeGen/sse-builtins.c +++ b/test/CodeGen/sse-builtins.c @@ -450,7 +450,8 @@ __m128 test_mm_min_ss(__m128 A, __m128 B) { __m128 test_mm_move_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_move_ss - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_move_ss(A, B); } diff --git a/test/CodeGen/sse2-builtins.c b/test/CodeGen/sse2-builtins.c index dbf79c4736..fe7f7ccf83 100644 --- a/test/CodeGen/sse2-builtins.c +++ b/test/CodeGen/sse2-builtins.c @@ -794,9 +794,7 @@ __m128i test_mm_move_epi64(__m128i A) { __m128d test_mm_move_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_move_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 - // CHECK: extractelement <2 x double> %{{.*}}, i32 1 - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_move_sd(A, B); } |