summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp45
1 files changed, 15 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 27621906e4c5..bb499c5c8c57 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -708,9 +708,6 @@ public:
WaitcntBrackets &ScoreBrackets,
MachineInstr *OldWaitcntInstr,
bool FlushVmCnt);
- bool generateWaitcntBlockEnd(MachineBasicBlock &Block,
- WaitcntBrackets &ScoreBrackets,
- MachineInstr *OldWaitcntInstr);
bool generateWaitcnt(AMDGPU::Waitcnt Wait,
MachineBasicBlock::instr_iterator It,
MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets,
@@ -1902,31 +1899,6 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
OldWaitcntInstr);
}
-// Add a waitcnt to flush the LOADcnt, SAMPLEcnt and BVHcnt counters at the
-// end of the given block if needed.
-bool SIInsertWaitcnts::generateWaitcntBlockEnd(MachineBasicBlock &Block,
- WaitcntBrackets &ScoreBrackets,
- MachineInstr *OldWaitcntInstr) {
- AMDGPU::Waitcnt Wait;
-
- unsigned LoadCntPending = ScoreBrackets.hasPendingEvent(LOAD_CNT);
- unsigned SampleCntPending = ScoreBrackets.hasPendingEvent(SAMPLE_CNT);
- unsigned BvhCntPending = ScoreBrackets.hasPendingEvent(BVH_CNT);
-
- if (LoadCntPending == 0 && SampleCntPending == 0 && BvhCntPending == 0)
- return false;
-
- if (LoadCntPending != 0)
- Wait.LoadCnt = 0;
- if (SampleCntPending != 0)
- Wait.SampleCnt = 0;
- if (BvhCntPending != 0)
- Wait.BvhCnt = 0;
-
- return generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
- OldWaitcntInstr);
-}
-
bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
MachineBasicBlock::instr_iterator It,
MachineBasicBlock &Block,
@@ -2355,9 +2327,22 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
++Iter;
}
+ // Flush the LOADcnt, SAMPLEcnt and BVHcnt counters at the end of the block if
+ // needed.
+ AMDGPU::Waitcnt Wait;
if (Block.getFirstTerminator() == Block.end() &&
- isPreheaderToFlush(Block, ScoreBrackets))
- Modified |= generateWaitcntBlockEnd(Block, ScoreBrackets, OldWaitcntInstr);
+ isPreheaderToFlush(Block, ScoreBrackets)) {
+ if (ScoreBrackets.hasPendingEvent(LOAD_CNT))
+ Wait.LoadCnt = 0;
+ if (ScoreBrackets.hasPendingEvent(SAMPLE_CNT))
+ Wait.SampleCnt = 0;
+ if (ScoreBrackets.hasPendingEvent(BVH_CNT))
+ Wait.BvhCnt = 0;
+ }
+
+ // Combine or remove any redundant waitcnts at the end of the block.
+ Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
+ OldWaitcntInstr);
return Modified;
}