From 71e39dac0ce9676dd3d0a92164167e18499d40fa Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Thu, 20 Jan 2011 05:08:26 +0000 Subject: [PATCH] Sparc backend: Implements a delay slot filler that attempt to fill delay slots with useful instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123884 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/DelaySlotFiller.cpp | 227 ++++++++++++++++++++- lib/Target/Sparc/SparcInstrInfo.td | 6 +- test/CodeGen/SPARC/2011-01-19-DelaySlot.ll | 77 +++++++ 3 files changed, 302 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/SPARC/2011-01-19-DelaySlot.ll diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index aae5da85600..b0d669b860f 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -7,21 +7,32 @@ // //===----------------------------------------------------------------------===// // -// This is a simple local pass that fills delay slots with NOPs. -// +// This is a simple local pass that attempts to fill delay slots with useful +// instructions. If no instructions can be moved into the delay slot, then a +// NOP is placed. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "delayslotfiller" +#define DEBUG_TYPE "delay-slot-filler" #include "Sparc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" + using namespace llvm; STATISTIC(FilledSlots, "Number of delay slots filled"); +static cl::opt DisableDelaySlotFiller( + "disable-sparc-delay-filler", + cl::init(false), + cl::desc("Disable the Sparc delay slot filler."), + cl::Hidden); + namespace { struct Filler : public MachineFunctionPass { /// Target machine description which we query for reg. names, data @@ -47,6 +58,28 @@ namespace { return Changed; } + bool isDelayFiller(MachineBasicBlock &MBB, + MachineBasicBlock::iterator candidate); + + void insertCallUses(MachineBasicBlock::iterator MI, + SmallSet& RegUses); + + void insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet& RegDefs, + SmallSet& RegUses); + + bool IsRegInSet(SmallSet& RegSet, + unsigned Reg); + + bool delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, bool &sawStore, + SmallSet &RegDefs, + SmallSet &RegUses); + + MachineBasicBlock::iterator + findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); + + }; char Filler::ID = 0; } // end of anonymous namespace @@ -59,18 +92,198 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { } /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. -/// Currently, we fill delay slots with NOPs. We assume there is only one -/// delay slot per delayed instruction. +/// We assume there is only one delay slot per delayed instruction. /// bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) if (I->getDesc().hasDelaySlot()) { + MachineBasicBlock::iterator D = MBB.end(); MachineBasicBlock::iterator J = I; - ++J; - BuildMI(MBB, J, DebugLoc(), TII->get(SP::NOP)); + + if (!DisableDelaySlotFiller) + D = findDelayInstr(MBB, I); + ++FilledSlots; Changed = true; + + if (D == MBB.end()) + BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); + else + MBB.splice(++J, &MBB, D); } return Changed; } + +MachineBasicBlock::iterator +Filler::findDelayInstr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator slot) +{ + SmallSet RegDefs; + SmallSet RegUses; + bool sawLoad = false; + bool sawStore = false; + + MachineBasicBlock::iterator I = slot; + + if (slot->getOpcode() == SP::RET) + return MBB.end(); + + if (slot->getOpcode() == SP::RETL) { + --I; + if (I->getOpcode() != SP::RESTORErr) + return MBB.end(); + //change retl to ret + slot->setDesc(TII->get(SP::RET)); + return I; + } + + //Call's delay filler can def some of call's uses. + if (slot->getDesc().isCall()) + insertCallUses(slot, RegUses); + else + insertDefsUses(slot, RegDefs, RegUses); + + bool done = false; + + while (!done) { + done = (I == MBB.begin()); + + if (!done) + --I; + + // skip debug value + if (I->isDebugValue()) + continue; + + + if (I->hasUnmodeledSideEffects() + || I->isInlineAsm() + || I->isLabel() + || I->getDesc().hasDelaySlot() + || isDelayFiller(MBB, I)) + break; + + if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) { + insertDefsUses(I, RegDefs, RegUses); + continue; + } + + return I; + } + return MBB.end(); +} + +bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, + bool &sawStore, + SmallSet &RegDefs, + SmallSet &RegUses) +{ + + if (candidate->getDesc().mayLoad()) { + sawLoad = true; + if (sawStore) + return true; + } + + if (candidate->getDesc().mayStore()) { + if (sawStore) + return true; + sawStore = true; + if (sawLoad) + return true; + } + + for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { + const MachineOperand &MO = candidate->getOperand(i); + if (!MO.isReg()) + continue; // skip + + unsigned Reg = MO.getReg(); + + if (MO.isDef()) { + //check whether Reg is defined or used before delay slot. + if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) + return true; + } + if (MO.isUse()) { + //check whether Reg is defined before delay slot. + if (IsRegInSet(RegDefs, Reg)) + return true; + } + } + return false; +} + + +void Filler::insertCallUses(MachineBasicBlock::iterator MI, + SmallSet& RegUses) +{ + + switch(MI->getOpcode()) { + default: llvm_unreachable("Unknown opcode."); + case SP::CALL: break; + case SP::JMPLrr: + case SP::JMPLri: + assert(MI->getNumOperands() >= 2); + const MachineOperand &Reg = MI->getOperand(0); + assert(Reg.isReg() && "JMPL first operand is not a register."); + assert(Reg.isUse() && "JMPL first operand is not a use."); + RegUses.insert(Reg.getReg()); + + const MachineOperand &RegOrImm = MI->getOperand(1); + if (RegOrImm.isImm()) + break; + assert(RegOrImm.isReg() && "JMPLrr second operand is not a register."); + assert(RegOrImm.isUse() && "JMPLrr second operand is not a use."); + RegUses.insert(RegOrImm.getReg()); + break; + } +} + +//Insert Defs and Uses of MI into the sets RegDefs and RegUses. +void Filler::insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet& RegDefs, + SmallSet& RegUses) +{ + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (MO.isDef()) + RegDefs.insert(Reg); + if (MO.isUse()) + RegUses.insert(Reg); + + } +} + +//returns true if the Reg or its alias is in the RegSet. +bool Filler::IsRegInSet(SmallSet& RegSet, unsigned Reg) +{ + if (RegSet.count(Reg)) + return true; + // check Aliased Registers + for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg); + *Alias; ++ Alias) + if (RegSet.count(*Alias)) + return true; + + return false; +} + +// return true if the candidate is a delay filler. +bool Filler::isDelayFiller(MachineBasicBlock &MBB, + MachineBasicBlock::iterator candidate) +{ + if (candidate == MBB.begin()) + return false; + const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); + return prevdesc.hasDelaySlot(); +} diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 7da86a1f5be..4f7b7c57680 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -289,6 +289,9 @@ let usesCustomInserter = 1, Uses = [FCC] in { let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; + + let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in + def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>; } // Section B.1 - Load Integer Instructions, p. 90 @@ -530,7 +533,8 @@ let Uses = [FCC] in let Uses = [O6], hasDelaySlot = 1, isCall = 1, Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7, - D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in { + D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, + ICC, FCC, Y] in { def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops), "call $dst", []> { bits<30> disp; diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll new file mode 100644 index 00000000000..d9fc8ea3aa8 --- /dev/null +++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll @@ -0,0 +1,77 @@ +;RUN: llc -march=sparc < %s | FileCheck %s + + +define i32 @test(i32 %a) nounwind { +entry: +; CHECK: test +; CHECK: call bar +; CHECK-NOT: nop +; CHECK: ret +; CHECK-NEXT: restore + %0 = tail call i32 @bar(i32 %a) nounwind + ret i32 %0 +} + +define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind { +entry: +; CHECK: test_jmpl +; CHECK: call +; CHECK-NOT: nop +; CHECK: ret +; CHECK-NEXT: restore + %0 = tail call i32 %f(i32 %a, i32 %b) nounwind + ret i32 %0 +} + +define i32 @test_loop(i32 %a, i32 %b) nounwind readnone { +; CHECK: test_loop +entry: + %0 = icmp sgt i32 %b, 0 + br i1 %0, label %bb, label %bb5 + +bb: ; preds = %entry, %bb + %a_addr.18 = phi i32 [ %a_addr.0, %bb ], [ %a, %entry ] + %1 = phi i32 [ %3, %bb ], [ 0, %entry ] + %tmp9 = mul i32 %1, %b + %2 = and i32 %1, 1 + %tmp = xor i32 %2, 1 + %.pn = shl i32 %tmp9, %tmp + %a_addr.0 = add i32 %.pn, %a_addr.18 + %3 = add nsw i32 %1, 1 + %exitcond = icmp eq i32 %3, %b +;CHECK: subcc +;CHECK: bne +;CHECK-NOT: nop + br i1 %exitcond, label %bb5, label %bb + +bb5: ; preds = %bb, %entry + %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ] +;CHECK: ret +;CHECK-NEXT: restore + ret i32 %a_addr.1.lcssa +} + +define i32 @test_inlineasm(i32 %a) nounwind { +entry: +;CHECK: test_inlineasm +;CHECK: sethi +;CHECK: !NO_APP +;CHECK-NEXT: subcc +;CHECK-NEXT: bg +;CHECK-NEXT: nop + tail call void asm sideeffect "sethi 0, %g0", ""() nounwind + %0 = icmp slt i32 %a, 0 + br i1 %0, label %bb, label %bb1 + +bb: ; preds = %entry + %1 = tail call i32 (...)* @foo(i32 %a) nounwind + ret i32 %1 + +bb1: ; preds = %entry + %2 = tail call i32 @bar(i32 %a) nounwind + ret i32 %2 +} + +declare i32 @foo(...) + +declare i32 @bar(i32)