diff options
author | Craig Topper <craig.topper@sifive.com> | 2024-03-28 14:43:49 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-28 14:43:49 -0700 |
commit | 23d45e55edb0ca4567f5876e7051ff4a649213df (patch) | |
tree | 300e3ba5caf01869819168e872b378dcc5deb14d | |
parent | 62d6beba976142a58c7c95afd84a5d632ab4cd84 (diff) |
[MCP] Remove dead copies from basic blocks with successors. (#86973)
Previously we wouldn't remove dead copies from basic blocks with
successors. The comment said we didn't want to trust the live-in lists.
The comment is very old so I'm not sure if that's still a concern today.
This patch checks the live-in lists and removes copies from
MaybeDeadCopies if they are referenced by any live-ins in any
successors. We only do this if the tracksLiveness property is set. If
that property is not set, we retain the old behavior.
13 files changed, 40 insertions, 43 deletions
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 65c067e4874b..8dc6781fcb01 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -411,6 +411,7 @@ private: typedef enum { DebugUse = false, RegularUse = true } DebugType; void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT); + void readSuccessorLiveIns(const MachineBasicBlock &MBB); void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); void EliminateSpillageCopies(MachineBasicBlock &MBB); @@ -463,6 +464,22 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, } } +void MachineCopyPropagation::readSuccessorLiveIns( + const MachineBasicBlock &MBB) { + if (MaybeDeadCopies.empty()) + return; + + // If a copy result is livein to a successor, it is not dead. + for (const MachineBasicBlock *Succ : MBB.successors()) { + for (const auto &LI : Succ->liveins()) { + for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) { + if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI)) + MaybeDeadCopies.remove(Copy); + } + } + } +} + /// Return true if \p PreviousCopy did copy register \p Src to register \p Def. /// This fact may have been obscured by sub register usage or may not be true at /// all even though Src and Def are subregisters of the registers used in @@ -914,10 +931,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } - // If MBB doesn't have successors, delete the copies whose defs are not used. - // If MBB does have successors, then conservative assume the defs are live-out - // since we don't want to trust live-in lists. - if (MBB.succ_empty()) { + bool TracksLiveness = MRI->tracksLiveness(); + + // If liveness is tracked, we can use the live-in lists to know which + // copies aren't dead. + if (TracksLiveness) + readSuccessorLiveIns(MBB); + + // If MBB doesn't have succesor, delete copies whose defs are not used. + // If MBB does have successors, we can only delete copies if we are able to + // use liveness information from successors to confirm they are really dead. + if (MBB.succ_empty() || TracksLiveness) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); diff --git a/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir b/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir index 23cf1dcda839..5b379c2bd562 100644 --- a/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir +++ b/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir @@ -10,7 +10,6 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0, implicit $w0 ; CHECK-NEXT: $w8 = ORRWrs $wzr, $w0, 0, implicit-def $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll index 932b230726a3..934ff44900c0 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -147,10 +147,10 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: mov v19.16b, v23.16b ; CHECK-NEXT: mov v3.d[1], x20 ; CHECK-NEXT: mov v23.16b, v27.16b -; CHECK-NEXT: mov v27.16b, v9.16b -; CHECK-NEXT: mul x15, x4, x5 ; CHECK-NEXT: add v27.2d, v9.2d, v1.2d +; CHECK-NEXT: mul x15, x4, x5 ; CHECK-NEXT: str q11, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov v11.16b, v15.16b ; CHECK-NEXT: mov v4.d[1], x22 ; CHECK-NEXT: add v19.2d, v19.2d, v1.2d ; CHECK-NEXT: add v7.2d, v7.2d, v1.2d @@ -171,9 +171,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: mov v10.16b, v26.16b ; CHECK-NEXT: mov v14.d[1], x13 ; CHECK-NEXT: mov v22.16b, v31.16b -; CHECK-NEXT: mov v20.16b, v8.16b ; CHECK-NEXT: ldp q26, q31, [sp] // 32-byte Folded Reload -; CHECK-NEXT: mov v11.16b, v15.16b ; CHECK-NEXT: mov v0.d[1], x12 ; CHECK-NEXT: add v13.2d, v13.2d, v14.2d ; CHECK-NEXT: add v31.2d, v31.2d, v14.2d diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll index 1ebd864e7e03..29704959fc17 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll @@ -477,7 +477,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac ; GFX1032-NEXT: s_cbranch_execz .LBB1_3 ; GFX1032-NEXT: ; %bb.2: ; GFX1032-NEXT: v_mov_b32_e32 v0, s11 -; GFX1032-NEXT: s_mov_b32 s10, s11 ; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc ; GFX1032-NEXT: .LBB1_3: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 @@ -615,7 +614,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac ; GFX1132-NEXT: s_cbranch_execz .LBB1_3 ; GFX1132-NEXT: ; %bb.2: ; GFX1132-NEXT: v_mov_b32_e32 v0, s11 -; GFX1132-NEXT: s_mov_b32 s10, s11 ; GFX1132-NEXT: buffer_atomic_add_u32 v0, off, s[4:7], 0 glc ; GFX1132-NEXT: .LBB1_3: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s9 diff --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll index c8278e58ad06..8748767501bd 100644 --- a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll +++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll @@ -29,9 +29,7 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar ; CHECK-NEXT: nop ; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-NEXT: cmpwi 3, 0 -; CHECK-NEXT: crmove 20, 10 ; CHECK-NEXT: crorc 20, 10, 2 -; CHECK-NEXT: crmove 21, 2 ; CHECK-NEXT: bc 4, 20, .LBB0_4 ; CHECK-NEXT: # %bb.2: # %if.end5 ; CHECK-NEXT: addis 3, 2, .L.str@toc@ha @@ -76,11 +74,9 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar ; BE-NEXT: addi 3, 31, 128 ; BE-NEXT: bl _setjmp ; BE-NEXT: nop -; BE-NEXT: crmove 20, 10 ; BE-NEXT: # kill: def $r3 killed $r3 killed $x3 ; BE-NEXT: cmpwi 3, 0 ; BE-NEXT: crorc 20, 10, 2 -; BE-NEXT: crmove 21, 2 ; BE-NEXT: bc 4, 20, .LBB0_4 ; BE-NEXT: # %bb.2: # %if.end5 ; BE-NEXT: addis 3, 2, .L.str@toc@ha diff --git a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll index 767b7028a967..a0f8374e074d 100644 --- a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll +++ b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll @@ -42,9 +42,8 @@ define i64 @loopif(ptr nocapture readonly %x, i32 %y, i32 %n) { ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: blt .LBB1_4 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph -; CHECK-NEXT: mov lr, r2 -; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll index 4ab569777b2a..93cab25c2cb7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -542,9 +542,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_simple(ptr noalias nocapture reado ; CHECK-NEXT: .pad #28 ; CHECK-NEXT: sub sp, #28 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: strd r1, r2, [sp, #4] @ 8-byte Folded Spill ; CHECK-NEXT: blt .LBB11_5 ; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload @@ -661,9 +659,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(ptr noalias nocapture read ; CHECK-NEXT: .pad #136 ; CHECK-NEXT: sub sp, #136 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: str r1, [sp, #64] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: str r2, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: strd r1, r2, [sp, #64] @ 8-byte Folded Spill ; CHECK-NEXT: blt.w .LBB12_5 ; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload @@ -952,11 +948,9 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_complex(ptr noalias nocapture read ; CHECK-NEXT: vstrw.32 q1, [sp, #152] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q1, [sp, #296] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q0, [sp, #168] @ 16-byte Spill -; CHECK-NEXT: vmov q0, q2 -; CHECK-NEXT: vmov q3, q5 -; CHECK-NEXT: vadd.i32 q1, q1, r0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #248] @ 16-byte Reload ; CHECK-NEXT: vldrw.u32 q3, [sp, #216] @ 16-byte Reload +; CHECK-NEXT: vadd.i32 q1, q1, r0 ; CHECK-NEXT: vstrw.32 q5, [sp, #120] @ 16-byte Spill ; CHECK-NEXT: vadd.i32 q0, q0, r0 ; CHECK-NEXT: subs.w r11, r11, #16 @@ -1243,9 +1237,7 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado ; CHECK-NEXT: .pad #64 ; CHECK-NEXT: sub sp, #64 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: str r2, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: strd r1, r2, [sp, #56] @ 8-byte Folded Spill ; CHECK-NEXT: blt.w .LBB14_5 ; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: adr r5, .LCPI14_3 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index 18c8a8a22ef2..7b8b884576d1 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -609,7 +609,6 @@ define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: strd r0, r2, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r0, r3 ; CHECK-NEXT: itt ne ; CHECK-NEXT: ldrne r0, [sp, #136] ; CHECK-NEXT: cmpne r0, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll index 9987ff940b5a..77980be90520 100644 --- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll @@ -108,9 +108,7 @@ define void @correlate(ptr nocapture noundef readonly %ID, ptr nocapture noundef ; CHECK-NEXT: .pad #12 ; CHECK-NEXT: sub sp, #12 ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: strd r0, r1, [sp] @ 8-byte Folded Spill -; CHECK-NEXT: mov r1, r3 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: stm.w sp, {r0, r1, r3} @ 12-byte Folded Spill ; CHECK-NEXT: blt .LBB4_12 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph ; CHECK-NEXT: ldr r1, [sp, #48] diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 82a186bcc73d..c03339b52f26 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -1062,9 +1062,8 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 ; CHECK-NEXT: cmp r2, #8 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: vstr s0, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: blo .LBB7_9 ; CHECK-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll index 219541cffb94..2e51e9e059f6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll @@ -95,14 +95,13 @@ define void @vldst4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRo ; CHECK-NEXT: vmovx.f16 s8, s27 ; CHECK-NEXT: vins.f16 s12, s24 ; CHECK-NEXT: vins.f16 s13, s25 +; CHECK-NEXT: vins.f16 s2, s10 ; CHECK-NEXT: vins.f16 s3, s11 ; CHECK-NEXT: vins.f16 s1, s9 -; CHECK-NEXT: vins.f16 s2, s10 ; CHECK-NEXT: vins.f16 s22, s8 ; CHECK-NEXT: vmov q2, q3 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s10, s4 ; CHECK-NEXT: vmov q6, q0 +; CHECK-NEXT: vmov.f32 s10, s4 ; CHECK-NEXT: vmov.f32 s11, s7 ; CHECK-NEXT: vmov.f32 s9, s0 ; CHECK-NEXT: vmov.f32 s17, s2 diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll index 1bd427c4a4b0..81dafdffe311 100644 --- a/llvm/test/CodeGen/X86/optimize-max-0.ll +++ b/llvm/test/CodeGen/X86/optimize-max-0.ll @@ -489,7 +489,6 @@ define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind { ; CHECK-NEXT: jb LBB1_4 ; CHECK-NEXT: ## %bb.5: ## %bb9 ; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: incl %ecx ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl %edi, %edx diff --git a/llvm/test/CodeGen/X86/tls-loads-control3.ll b/llvm/test/CodeGen/X86/tls-loads-control3.ll index 82daac5a9bae..4e521b1c696a 100644 --- a/llvm/test/CodeGen/X86/tls-loads-control3.ll +++ b/llvm/test/CodeGen/X86/tls-loads-control3.ll @@ -183,7 +183,6 @@ define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 { ; HOIST0-NEXT: # %bb.1: # %while.body.preheader ; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@TLSLD(%rip), %rdi ; HOIST0-NEXT: callq __tls_get_addr@PLT -; HOIST0-NEXT: movq %rax, %rcx ; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@DTPOFF(%rax), %r15 ; HOIST0-NEXT: leaq _ZZ2f2iE2st.1@DTPOFF(%rax), %r12 ; HOIST0-NEXT: .p2align 4, 0x90 @@ -245,9 +244,7 @@ define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 { ; HOIST2-NEXT: movq %rax, %r14 ; HOIST2-NEXT: addb %bpl, _ZZ2f2iE2st.0@DTPOFF(%rax) ; HOIST2-NEXT: callq _Z5gfuncv@PLT -; HOIST2-NEXT: movl %eax, %ecx -; HOIST2-NEXT: movq %r14, %rax -; HOIST2-NEXT: addl %ecx, _ZZ2f2iE2st.1@DTPOFF(%r14) +; HOIST2-NEXT: addl %eax, _ZZ2f2iE2st.1@DTPOFF(%r14) ; HOIST2-NEXT: decl %ebx ; HOIST2-NEXT: jne .LBB1_2 ; HOIST2-NEXT: .LBB1_3: # %while.end |