diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 45 |
1 files changed, 15 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 27621906e4c5..bb499c5c8c57 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -708,9 +708,6 @@ public: WaitcntBrackets &ScoreBrackets, MachineInstr *OldWaitcntInstr, bool FlushVmCnt); - bool generateWaitcntBlockEnd(MachineBasicBlock &Block, - WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr); bool generateWaitcnt(AMDGPU::Waitcnt Wait, MachineBasicBlock::instr_iterator It, MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets, @@ -1902,31 +1899,6 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, OldWaitcntInstr); } -// Add a waitcnt to flush the LOADcnt, SAMPLEcnt and BVHcnt counters at the -// end of the given block if needed. -bool SIInsertWaitcnts::generateWaitcntBlockEnd(MachineBasicBlock &Block, - WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr) { - AMDGPU::Waitcnt Wait; - - unsigned LoadCntPending = ScoreBrackets.hasPendingEvent(LOAD_CNT); - unsigned SampleCntPending = ScoreBrackets.hasPendingEvent(SAMPLE_CNT); - unsigned BvhCntPending = ScoreBrackets.hasPendingEvent(BVH_CNT); - - if (LoadCntPending == 0 && SampleCntPending == 0 && BvhCntPending == 0) - return false; - - if (LoadCntPending != 0) - Wait.LoadCnt = 0; - if (SampleCntPending != 0) - Wait.SampleCnt = 0; - if (BvhCntPending != 0) - Wait.BvhCnt = 0; - - return generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets, - OldWaitcntInstr); -} - bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait, MachineBasicBlock::instr_iterator It, MachineBasicBlock &Block, @@ -2355,9 +2327,22 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, ++Iter; } + // Flush the LOADcnt, SAMPLEcnt and BVHcnt counters at the end of the block if + // needed. + AMDGPU::Waitcnt Wait; if (Block.getFirstTerminator() == Block.end() && - isPreheaderToFlush(Block, ScoreBrackets)) - Modified |= generateWaitcntBlockEnd(Block, ScoreBrackets, OldWaitcntInstr); + isPreheaderToFlush(Block, ScoreBrackets)) { + if (ScoreBrackets.hasPendingEvent(LOAD_CNT)) + Wait.LoadCnt = 0; + if (ScoreBrackets.hasPendingEvent(SAMPLE_CNT)) + Wait.SampleCnt = 0; + if (ScoreBrackets.hasPendingEvent(BVH_CNT)) + Wait.BvhCnt = 0; + } + + // Combine or remove any redundant waitcnts at the end of the block. + Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets, + OldWaitcntInstr); return Modified; } |