From ad7ecc65b1b1d6466ff035168c86f208a91aa1b4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 5 Jun 2013 03:43:06 +0000 Subject: [PATCH] R600: Make sure to schedule AR register uses and defs in the same clause Reviewed-by: vljn at ovi.com git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183294 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 6 ++-- lib/Target/R600/R600MachineScheduler.cpp | 36 ++++++++++++++++++++++-- lib/Target/R600/R600MachineScheduler.h | 2 ++ test/CodeGen/R600/indirect-addressing.ll | 32 +++++++++++++++++++++ 4 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/R600/indirect-addressing.ll diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 2a4a2459015..d915f40a626 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -816,7 +816,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, AddrReg, ValueReg) - .addReg(AMDGPU::AR_X, RegState::Implicit); + .addReg(AMDGPU::AR_X, + RegState::Implicit | RegState::Kill); setImmOperand(Mov, R600Operands::DST_REL, 1); return Mov; } @@ -833,7 +834,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, ValueReg, AddrReg) - .addReg(AMDGPU::AR_X, RegState::Implicit); + .addReg(AMDGPU::AR_X, + RegState::Implicit | RegState::Kill); setImmOperand(Mov, R600Operands::SRC0_REL, 1); return Mov; diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 8d61b8c610b..9469e0fc6b3 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -59,8 +59,16 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && (!Available[IDFetch].empty() || !Available[IDOther].empty()); - if ((AllowSwitchToAlu && CurInstKind != IDAlu) || - (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { + // We want to scheduled AR defs as soon as possible to make sure they aren't + // put in a different ALU clause from their uses. + if (!SU && !UnscheduledARDefs.empty()) { + SU = UnscheduledARDefs[0]; + UnscheduledARDefs.erase(UnscheduledARDefs.begin()); + NextInstKind = IDAlu; + } + + if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) || + (!AllowSwitchFromAlu && CurInstKind == IDAlu))) { // try to pick ALU SU = pickAlu(); if (SU) { @@ -84,6 +92,15 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { NextInstKind = IDOther; } + // We want to schedule the AR uses as late as possible to make sure that + // the AR defs have been released. + if (!SU && !UnscheduledARUses.empty()) { + SU = UnscheduledARUses[0]; + UnscheduledARUses.erase(UnscheduledARUses.begin()); + NextInstKind = IDAlu; + } + + DEBUG( if (SU) { dbgs() << " ** Pick node **\n"; @@ -149,6 +166,21 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) { DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG);); int IK = getInstKind(SU); + + // Check for AR register defines + for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(), + E = SU->getInstr()->operands_end(); + I != E; ++I) { + if (I->isReg() && I->getReg() == AMDGPU::AR_X) { + if (I->isDef()) { + UnscheduledARDefs.push_back(SU); + } else { + UnscheduledARUses.push_back(SU); + } + return; + } + } + // There is no export clause, we can schedule one as soon as its ready if (IK == IDOther) Available[IDOther].push_back(SU); diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index 814ae9e546c..4dedf70d5f7 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -52,6 +52,8 @@ class R600SchedStrategy : public MachineSchedStrategy { std::vector Available[IDLast], Pending[IDLast]; std::vector AvailableAlus[AluLast]; + std::vector UnscheduledARDefs; + std::vector UnscheduledARUses; InstKind CurInstKind; int CurEmitted; diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/indirect-addressing.ll new file mode 100644 index 00000000000..7291cb42e7f --- /dev/null +++ b/test/CodeGen/R600/indirect-addressing.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This test checks that uses and defs of the AR register happen in the same +; instruction clause. + +; CHECK: @mova_same_clause +; CHECK: MOVA_INT +; CHECK-NOT: ALU clause +; CHECK: 0 + AR.x +; CHECK: MOVA_INT +; CHECK-NOT: ALU clause +; CHECK: 0 + AR.x + +define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { +entry: + %stack = alloca [5 x i32], align 4 + %0 = load i32 addrspace(1)* %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0 + store i32 4, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1 + %1 = load i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1 + store i32 5, i32* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0 + %2 = load i32* %arrayidx10, align 4 + store i32 %2, i32 addrspace(1)* %out, align 4 + %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1 + %3 = load i32* %arrayidx12 + %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1 + store i32 %3, i32 addrspace(1)* %arrayidx13 + ret void +}