diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-04-08 11:01:06 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-04-08 11:01:19 +0100 |
commit | 170c525d79a4ab3659041b0655ac9697768fc915 (patch) | |
tree | b700c62bb216bb0d8a8fb40188d01a91e3b5e01a | |
parent | cf7d36fe342c5c5ac39150ca0b4b70a3d17ae66b (diff) |
[X86] combineExtractVectorElt - fold extract(trunc(x),c) -> trunc(extract(x,c))
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-insert-extract.ll | 29 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/insertelement-var-index.ll | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/movmsk-cmp.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr63439.ll | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr64439.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_cast.ll | 2 |
7 files changed, 52 insertions, 68 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6f65344215c0..f24e0fc25fac 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44710,6 +44710,17 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } } + // Attempt to fold extract(trunc(x),c) -> trunc(extract(x,c)). + if (CIdx && InputVector.getOpcode() == ISD::TRUNCATE) { + SDValue TruncSrc = InputVector.getOperand(0); + EVT TruncSVT = TruncSrc.getValueType().getScalarType(); + if (DCI.isBeforeLegalize() && TLI.isTypeLegal(TruncSVT)) { + SDValue NewExt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TruncSVT, TruncSrc, EltIdx); + return DAG.getAnyExtOrTrunc(NewExt, dl, VT); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 3e40bfa1e791..2a77d0238721 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -1050,11 +1050,9 @@ define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) nounwin ; KNL: ## %bb.0: ; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpextrb $2, %xmm0, %eax +; KNL-NEXT: notb %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1081,11 +1079,9 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) nounwin ; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: notb %al +; KNL-NEXT: movzbl %al, %ecx ; KNL-NEXT: andl $1, %ecx ; KNL-NEXT: movl $4, %eax ; KNL-NEXT: subl %ecx, %eax @@ -1116,15 +1112,10 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) nounwind ; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: andb $1, %al -; KNL-NEXT: movb $4, %cl -; KNL-NEXT: subb %al, %cl -; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: notb %al +; KNL-NEXT: addb $4, %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 5420e6b5ce86..16946caf9a32 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -2294,13 +2294,13 @@ define i32 @PR44139(ptr %p) { ; ; AVX1-LABEL: PR44139: ; AVX1: # %bb.0: +; AVX1-NEXT: movq (%rdi), %rax ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX1-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm1 +; AVX1-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vmovaps %ymm0, 64(%rdi) ; AVX1-NEXT: vmovaps %ymm0, 96(%rdi) ; AVX1-NEXT: vmovaps %ymm0, 32(%rdi) -; AVX1-NEXT: movl (%rdi), %eax ; AVX1-NEXT: vmovaps %ymm1, (%rdi) ; AVX1-NEXT: leal 2147483647(%rax), %ecx ; AVX1-NEXT: testl %eax, %eax @@ -2315,13 +2315,13 @@ define i32 @PR44139(ptr %p) { ; ; AVX2-LABEL: PR44139: ; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rax ; AVX2-NEXT: vpbroadcastq (%rdi), %ymm0 -; AVX2-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm1 +; AVX2-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: vmovdqa %ymm0, 64(%rdi) ; AVX2-NEXT: vmovdqa %ymm0, 96(%rdi) ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdi) -; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: vmovdqa %ymm1, (%rdi) ; AVX2-NEXT: leal 2147483647(%rax), %ecx ; AVX2-NEXT: testl %eax, %eax @@ -2336,14 +2336,12 @@ define i32 @PR44139(ptr %p) { ; ; AVX512-LABEL: PR44139: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512-NEXT: vpbroadcastq (%rdi), %zmm1 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpinsrq $1, (%rdi), %xmm1, %xmm2 -; AVX512-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm2 -; AVX512-NEXT: vmovdqa64 %zmm1, 64(%rdi) -; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi) -; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: movq (%rdi), %rax +; AVX512-NEXT: vpbroadcastq (%rdi), %zmm0 +; AVX512-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 +; AVX512-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 +; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rdi) +; AVX512-NEXT: vmovdqa64 %zmm1, (%rdi) ; AVX512-NEXT: leal 2147483647(%rax), %ecx ; AVX512-NEXT: testl %eax, %eax ; AVX512-NEXT: cmovnsl %eax, %ecx diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index e8b3121ecfb5..253f990f8735 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -3682,18 +3682,12 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) { ; KNL-LABEL: movmsk_v16i8: ; KNL: # %bb.0: ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: kshiftrw $8, %k0, %k1 -; KNL-NEXT: kmovw %k1, %edx -; KNL-NEXT: kshiftrw $3, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: xorb %dl, %al -; KNL-NEXT: andb %cl, %al +; KNL-NEXT: vpextrb $15, %xmm0, %ecx +; KNL-NEXT: vpextrb $8, %xmm0, %edx +; KNL-NEXT: vpextrb $3, %xmm0, %eax +; KNL-NEXT: xorl %edx, %eax +; KNL-NEXT: andl %ecx, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: movmsk_v16i8: diff --git a/llvm/test/CodeGen/X86/pr63439.ll b/llvm/test/CodeGen/X86/pr63439.ll index 155da0c62912..7018940faa81 100644 --- a/llvm/test/CodeGen/X86/pr63439.ll +++ b/llvm/test/CodeGen/X86/pr63439.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s define i16 @mulhs(i16 %a0, i16 %a1) { ; CHECK-LABEL: mulhs: ; CHECK: # %bb.0: -; CHECK-NEXT: movswl %si, %ecx -; CHECK-NEXT: movswl %di, %eax +; CHECK-NEXT: movswl %di, %ecx +; CHECK-NEXT: movswl %si, %eax ; CHECK-NEXT: imull %ecx, %eax ; CHECK-NEXT: shrl $16, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax @@ -23,23 +23,14 @@ define i16 @mulhs(i16 %a0, i16 %a1) { } define i16 @mulhu(i16 %a0, i16 %a1) { -; SSE-LABEL: mulhu: -; SSE: # %bb.0: -; SSE-NEXT: movzwl %si, %ecx -; SSE-NEXT: movzwl %di, %eax -; SSE-NEXT: imull %ecx, %eax -; SSE-NEXT: shrl $16, %eax -; SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SSE-NEXT: retq -; -; AVX-LABEL: mulhu: -; AVX: # %bb.0: -; AVX-NEXT: vmovd %edi, %xmm0 -; AVX-NEXT: vmovd %esi, %xmm1 -; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: # kill: def $ax killed $ax killed $eax -; AVX-NEXT: retq +; CHECK-LABEL: mulhu: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl %di, %ecx +; CHECK-NEXT: movzwl %si, %eax +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq %x0 = zext i16 %a0 to i32 %x1 = zext i16 %a1 to i32 %v0 = insertelement <1 x i32> <i32 undef>, i32 %x0, i32 0 diff --git a/llvm/test/CodeGen/X86/pr64439.ll b/llvm/test/CodeGen/X86/pr64439.ll index 7aa52fc49a9f..6e3d007dd78c 100644 --- a/llvm/test/CodeGen/X86/pr64439.ll +++ b/llvm/test/CodeGen/X86/pr64439.ll @@ -4,10 +4,9 @@ define void @f(ptr %0, <32 x i1> %1, i32 %2) nounwind { ; CHECK-LABEL: f: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 -; CHECK-NEXT: vpmovb2m %ymm0, %k0 -; CHECK-NEXT: kshiftrd $3, %k0, %k1 -; CHECK-NEXT: kmovd %k1, %eax +; CHECK-NEXT: vpsllw $7, %ymm0, %ymm1 +; CHECK-NEXT: vpmovb2m %ymm1, %k0 +; CHECK-NEXT: vpextrb $3, %xmm0, %eax ; CHECK-NEXT: vpbroadcastb %esi, %ymm0 ; CHECK-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 ; CHECK-NEXT: vpmovm2b %k0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vec_cast.ll b/llvm/test/CodeGen/X86/vec_cast.ll index 0a6bc2f59b68..e0089354cc95 100644 --- a/llvm/test/CodeGen/X86/vec_cast.ll +++ b/llvm/test/CodeGen/X86/vec_cast.ll @@ -156,7 +156,7 @@ define <3 x i16> @h(<3 x i32> %a) nounwind { ; CHECK-WIN-LABEL: h: ; CHECK-WIN: # %bb.0: ; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0 -; CHECK-WIN-NEXT: movd %xmm0, %eax +; CHECK-WIN-NEXT: movl (%rcx), %eax ; CHECK-WIN-NEXT: pextrw $2, %xmm0, %edx ; CHECK-WIN-NEXT: pextrw $4, %xmm0, %ecx ; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax |