[X86] Add shuffle tests for BLEND(PERMUTE(X),PERMUTE(Y)) patterns

Some very basic tests for a case where we could fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y)) These assume the permute masks are the same, and "complete" (no undefs/duplicate elements) but we could relax that depending on the blend mask
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2024-04-16 13:03:09 +0100
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2024-04-16 13:27:12 +0100
commit: 34013e7ce25868aa8ddea116f79184e8603af56c (patch)
tree: e0846652d87f4569f84aa24596969b84cac6c51c
parent: c309dc6d0759b23b570c563f611530ff1a49e1bd (diff)
3 files changed, 90 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 0c76c14afb0a..4859a8e0eaaa 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -305,6 +305,37 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
   ret <4 x float> %2
 }
 
+define <8 x i32> @combine_blend_of_permutes_v8i32(<4 x i64> %a0, <4 x i64> %a1) {
+; AVX1-LABEL: combine_blend_of_permutes_v8i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7]
+; AVX1-NEXT:    ret{{[l|q]}}
+;
+; AVX2-LABEL: combine_blend_of_permutes_v8i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,3,0,1]
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7]
+; AVX2-NEXT:    ret{{[l|q]}}
+;
+; AVX512-LABEL: combine_blend_of_permutes_v8i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT:    vpmovsxbd {{.*#+}} ymm2 = [4,21,6,23,16,1,2,19]
+; AVX512-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT:    ret{{[l|q]}}
+  %s0 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  %s1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  %x0 = bitcast <4 x i64> %s0 to <8 x i32>
+  %x1 = bitcast <4 x i64> %s1 to <8 x i32>
+  %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 12, i32 5, i32 6, i32 15>
+  ret <8 x i32> %r
+}
+
 define <2 x double> @constant_fold_vpermilvar_pd() {
 ; CHECK-LABEL: constant_fold_vpermilvar_pd:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index f53b1eeaf8f5..e87e810971e1 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -973,3 +973,47 @@ define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) {
   %2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer)
   ret <8 x i64> %2
 }
+
+define <16 x i32> @blend_of_permutes_v16i32(<8 x i64> %a0, <8x i64> %a1) {
+; X86-AVX512F-LABEL: blend_of_permutes_v16i32:
+; X86-AVX512F:       # %bb.0:
+; X86-AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; X86-AVX512F-NEXT:    vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
+; X86-AVX512F-NEXT:    movw $-25958, %ax # imm = 0x9A9A
+; X86-AVX512F-NEXT:    kmovw %eax, %k1
+; X86-AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
+; X86-AVX512F-NEXT:    retl
+;
+; X86-AVX512BW-LABEL: blend_of_permutes_v16i32:
+; X86-AVX512BW:       # %bb.0:
+; X86-AVX512BW-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; X86-AVX512BW-NEXT:    vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
+; X86-AVX512BW-NEXT:    movw $-25958, %ax # imm = 0x9A9A
+; X86-AVX512BW-NEXT:    kmovd %eax, %k1
+; X86-AVX512BW-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
+; X86-AVX512BW-NEXT:    retl
+;
+; X64-AVX512F-LABEL: blend_of_permutes_v16i32:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; X64-AVX512F-NEXT:    vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
+; X64-AVX512F-NEXT:    movw $-25958, %ax # imm = 0x9A9A
+; X64-AVX512F-NEXT:    kmovw %eax, %k1
+; X64-AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: blend_of_permutes_v16i32:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; X64-AVX512BW-NEXT:    vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
+; X64-AVX512BW-NEXT:    movw $-25958, %ax # imm = 0x9A9A
+; X64-AVX512BW-NEXT:    kmovd %eax, %k1
+; X64-AVX512BW-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
+; X64-AVX512BW-NEXT:    retq
+  %s0 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+  %s1 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+  %x0 = bitcast <8 x i64> %s0 to <16 x i32>
+  %x1 = bitcast <8 x i64> %s1 to <16 x i32>
+  %r = shufflevector <16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31>
+  ret <16 x i32> %r
+}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
index 5eb017bc80ca..33851f56fe8d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
@@ -22,6 +22,21 @@ define <16 x i8> @combine_vpshufb_as_movzx(<16 x i8> %a0) {
   ret <16 x i8> %res0
 }
 
+define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1) {
+; SSE-LABEL: combine_blend_of_permutes_v4i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; SSE-NEXT:    retq
+  %s0 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  %s1 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  %x0 = bitcast <2 x i64> %s0 to <4 x i32>
+  %x1 = bitcast <2 x i64> %s1 to <4 x i32>
+  %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %r
+}
+
 define <16 x i8> @PR50049(ptr %p1, ptr %p2) {
 ; SSE-LABEL: PR50049:
 ; SSE:       # %bb.0:
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2024-04-16 13:03:09 +0100
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2024-04-16 13:27:12 +0100
commit	34013e7ce25868aa8ddea116f79184e8603af56c (patch)
tree	e0846652d87f4569f84aa24596969b84cac6c51c
parent	c309dc6d0759b23b570c563f611530ff1a49e1bd (diff)