R600/SI: Insert s_waitcnt before s_barrier instructions.

This ensures that all memory operations are complete when all threads
reach the barrier.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225290 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2015-01-06 19:52:07 +00:00
parent fad04531a9
commit bac89f3dd2
3 changed files with 10 additions and 1 deletions

View File

@ -428,7 +428,11 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
Changes |= insertWait(MBB, I, handleOperands(*I));
// Wait for everything before a barrier.
if (I->getOpcode() == AMDGPU::S_BARRIER)
Changes |= insertWait(MBB, I, LastIssued);
else
Changes |= insertWait(MBB, I, handleOperands(*I));
pushInstruction(MBB, I);
}

View File

@ -3,6 +3,8 @@
; FUNC-LABEL: {{^}}test_barrier_global:
; EG: GROUP_BARRIER
; SI: buffer_store_dword
; SI: s_waitcnt
; SI: s_barrier
define void @test_barrier_global(i32 addrspace(1)* %out) {

View File

@ -3,6 +3,9 @@
; FUNC-LABEL: {{^}}test_barrier_local:
; EG: GROUP_BARRIER
; SI: buffer_store_dword
; SI: s_waitcnt
; SI: s_barrier
define void @test_barrier_local(i32 addrspace(1)* %out) {