diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index e796debd79b..d512d6589c4 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -23,6 +23,7 @@ namespace llvm { class FunctionPass; FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp new file mode 100644 index 00000000000..8718e047055 --- /dev/null +++ b/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -0,0 +1,297 @@ +//===--------- MipsOptimizePICCall.cpp - Optimize PIC Calls ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates unnecessary instructions that set up $gp and replace +// instructions that load target function addresses with copy instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "optimize-mips-pic-call" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt LoadTargetFromGOT("mips-load-target-from-got", + cl::init(true), + cl::desc("Load target address from GOT"), + cl::Hidden); + +static cl::opt EraseGPOpnd("mips-erase-gp-opnd", + cl::init(true), cl::desc("Erase GP Operand"), + cl::Hidden); + +namespace { +typedef std::pair CntRegP; +typedef RecyclingAllocator > +AllocatorTy; +typedef ScopedHashTable, + AllocatorTy> ScopedHTType; + +class MBBInfo { +public: + MBBInfo(MachineDomTreeNode *N); + const MachineDomTreeNode *getNode() const; + bool isVisited() const; + void preVisit(ScopedHTType &ScopedHT); + void postVisit(); + +private: + MachineDomTreeNode *Node; + ScopedHTType::ScopeTy *HTScope; +}; + +class OptimizePICCall : public MachineFunctionPass { +public: + OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {} + + virtual const char *getPassName() const { return "Mips OptimizePICCall"; } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + /// \brief Visit MBB. + bool visitNode(MBBInfo &MBBI); + + /// \brief Test if MI jumps to a function via a register. + /// + /// Also, return the virtual register containing the target function's address + /// and the underlying object in Reg and Val respectively, if the function's + /// address can be resolved lazily. + bool isCallViaRegister(MachineInstr &MI, unsigned &Reg, + const Value *&Val) const; + + /// \brief Return the number of instructions that dominate the current + /// instruction and load the function address from object Entry. + unsigned getCount(const Value *Entry); + + /// \brief Return the destination virtual register of the last instruction + /// that loads from object Entry. + unsigned getReg(const Value *Entry); + + /// \brief Update ScopedHT. + void incCntAndSetReg(const Value *Entry, unsigned Reg); + + ScopedHTType ScopedHT; + static char ID; +}; + +char OptimizePICCall::ID = 0; +} // end of anonymous namespace + +/// Return the first MachineOperand of MI if it is a used virtual register. +static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) { + if (MI.getNumOperands() == 0) + return 0; + + MachineOperand &MO = MI.getOperand(0); + + if (!MO.isReg() || !MO.isUse() || + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + return 0; + + return &MO; +} + +/// Return type of register Reg. +static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + assert(RC->vt_end() - RC->vt_begin() == 1); + return *RC->vt_begin(); +} + +/// Do the following transformation: +/// +/// jalr $vreg +/// => +/// copy $t9, $vreg +/// jalr $t9 +static void setCallTargetReg(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I) { + MachineFunction &MF = *MBB->getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned SrcReg = I->getOperand(0).getReg(); + unsigned DstReg = getRegTy(SrcReg, MF) == MVT::i32 ? Mips::T9 : Mips::T9_64; + BuildMI(*MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg) + .addReg(SrcReg); + I->getOperand(0).setReg(DstReg); +} + +/// Search MI's operands for register GP and erase it. +static void eraseGPOpnd(MachineInstr &MI) { + if (!EraseGPOpnd) + return; + + MachineFunction &MF = *MI.getParent()->getParent(); + MVT::SimpleValueType Ty = getRegTy(MI.getOperand(0).getReg(), MF); + unsigned Reg = Ty == MVT::i32 ? Mips::GP : Mips::GP_64; + + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.getReg() == Reg) { + MI.RemoveOperand(I); + return; + } + } + + llvm_unreachable(0); +} + +MBBInfo::MBBInfo(MachineDomTreeNode *N) : Node(N), HTScope(0) {} + +const MachineDomTreeNode *MBBInfo::getNode() const { return Node; } + +bool MBBInfo::isVisited() const { return HTScope; } + +void MBBInfo::preVisit(ScopedHTType &ScopedHT) { + HTScope = new ScopedHTType::ScopeTy(ScopedHT); +} + +void MBBInfo::postVisit() { + delete HTScope; +} + +// OptimizePICCall methods. +bool OptimizePICCall::runOnMachineFunction(MachineFunction &F) { + if (F.getTarget().getSubtarget().inMips16Mode()) + return false; + + // Do a pre-order traversal of the dominator tree. + MachineDominatorTree *MDT = &getAnalysis(); + bool Changed = false; + + SmallVector WorkList(1, MBBInfo(MDT->getRootNode())); + + while (!WorkList.empty()) { + MBBInfo &MBBI = WorkList.back(); + + // If this MBB has already been visited, destroy the scope for the MBB and + // pop it from the work list. + if (MBBI.isVisited()) { + MBBI.postVisit(); + WorkList.pop_back(); + continue; + } + + // Visit the MBB and add its children to the work list. + MBBI.preVisit(ScopedHT); + Changed |= visitNode(MBBI); + const MachineDomTreeNode *Node = MBBI.getNode(); + const std::vector &Children = Node->getChildren(); + WorkList.append(Children.begin(), Children.end()); + } + + return Changed; +} + +bool OptimizePICCall::visitNode(MBBInfo &MBBI) { + bool Changed = false; + MachineBasicBlock *MBB = MBBI.getNode()->getBlock(); + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + unsigned Reg; + const Value *Entry; + + // Skip instructions that are not call instructions via registers. + if (!isCallViaRegister(*I, Reg, Entry)) + continue; + + Changed = true; + unsigned N = getCount(Entry); + + if (N != 0) { + // If a function has been called more than twice, we do not have to emit a + // load instruction to get the function address from the GOT, but can + // instead reuse the address that has been loaded before. + if (N >= 2 && !LoadTargetFromGOT) + getCallTargetRegOpnd(*I)->setReg(getReg(Entry)); + + // Erase the $gp operand if this isn't the first time a function has + // been called. $gp needs to be set up only if the function call can go + // through a lazy binding stub. + eraseGPOpnd(*I); + } + + if (Entry) + incCntAndSetReg(Entry, Reg); + + setCallTargetReg(MBB, I); + } + + return Changed; +} + +bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg, + const Value *&Val) const { + if (!MI.isCall()) + return false; + + MachineOperand *MO = getCallTargetRegOpnd(MI); + + // Return if MI is not a function call via a register. + if (!MO) + return false; + + // Get the instruction that loads the function address from the GOT. + Reg = MO->getReg(); + Val = 0; + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MachineInstr *DefMI = MRI.getVRegDef(Reg); + + assert(DefMI); + + // See if DefMI is an instruction that loads from a GOT entry that holds the + // address of a lazy binding stub. + if (!DefMI->mayLoad() || DefMI->getNumOperands() < 3) + return true; + + unsigned Flags = DefMI->getOperand(2).getTargetFlags(); + + if (Flags != MipsII::MO_GOT_CALL && Flags != MipsII::MO_CALL_LO16) + return true; + + // Return the underlying object for the GOT entry in Val. + assert(DefMI->hasOneMemOperand()); + Val = (*DefMI->memoperands_begin())->getValue(); + return true; +} + +unsigned OptimizePICCall::getCount(const Value *Entry) { + return ScopedHT.lookup(Entry).first; +} + +unsigned OptimizePICCall::getReg(const Value *Entry) { + unsigned Reg = ScopedHT.lookup(Entry).second; + assert(Reg); + return Reg; +} + +void OptimizePICCall::incCntAndSetReg(const Value *Entry, unsigned Reg) { + CntRegP P = ScopedHT.lookup(Entry); + ScopedHT.insert(Entry, std::make_pair(P.first + 1, Reg)); +} + +/// Return an OptimizeCall object. +FunctionPass *llvm::createMipsOptimizePICCallPass(MipsTargetMachine &TM) { + return new OptimizePICCall(TM); +} diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 809adc03b15..cc6411fd885 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1077,14 +1077,7 @@ getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - } else - Ops.push_back(Callee); - + Ops.push_back(Callee); MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, CLI, Callee, Chain); } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 5046c1b782f..ab5677a66fc 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -153,6 +153,7 @@ public: virtual void addIRPasses(); virtual bool addInstSelector(); + virtual void addMachineSSAOptimization(); virtual bool addPreEmitPass(); }; } // namespace @@ -182,6 +183,11 @@ bool MipsPassConfig::addInstSelector() { return false; } +void MipsPassConfig::addMachineSSAOptimization() { + addPass(createMipsOptimizePICCallPass(getMipsTargetMachine())); + TargetPassConfig::addMachineSSAOptimization(); +} + void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) { if (Subtarget.allowMixed16_32()) { DEBUG(errs() << "No "); diff --git a/test/CodeGen/Mips/call-optimization.ll b/test/CodeGen/Mips/call-optimization.ll new file mode 100644 index 00000000000..b147f58e455 --- /dev/null +++ b/test/CodeGen/Mips/call-optimization.ll @@ -0,0 +1,83 @@ +; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | \ +; RUN: FileCheck %s -check-prefix=O32 +; RUN: llc -march=mipsel -mips-load-target-from-got=false \ +; RUN: -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=O32-LOADTGT + +@gd1 = common global double 0.000000e+00, align 8 +@gd2 = common global double 0.000000e+00, align 8 + +; O32-LABEL: caller3: +; O32-DAG: lw $25, %call16(callee3) +; O32-DAG: move $gp +; O32: jalr $25 +; O32-DAG: lw $25, %call16(callee3) +; O32-NOT: move $gp +; O32: jalr $25 +; O32-DAG: lw $25, %call16(callee3) +; O32-NOT: move $gp +; O32: jalr $25 + +; O32-LOADTGT-LABEL: caller3: +; O32-LOADTGT-DAG: lw $25, %call16(callee3) +; O32-LOADTGT-DAG: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-DAG: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-DAG: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 + +define void @caller3(i32 %n) { +entry: + tail call void @callee3() + tail call void @callee3() + %tobool1 = icmp eq i32 %n, 0 + br i1 %tobool1, label %while.end, label %while.body + +while.body: + %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dec = add nsw i32 %n.addr.02, -1 + tail call void @callee3() + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: + ret void +} + +declare void @callee3() + +; O32-LABEL: caller4: +; O32-DAG: lw $25, %call16(ceil) +; O32-DAG: move $gp +; O32: jalr $25 +; O32-DAG: lw $25, %call16(ceil) +; O32-NOT: move $gp +; O32: jalr $25 +; O32-DAG: lw $25, %call16(ceil) +; O32-NOT: move $gp +; O32: jalr $25 + +; O32-LOADTGT-LABEL: caller4: +; O32-LOADTGT-DAG: lw $25, %call16(ceil) +; O32-LOADTGT-DAG: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-DAG: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-DAG: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 + +define void @caller4(double %d) { +entry: + %call = tail call double @ceil(double %d) + %call1 = tail call double @ceil(double %call) + store double %call1, double* @gd2, align 8 + %call2 = tail call double @ceil(double %call1) + store double %call2, double* @gd1, align 8 + ret void +} + +declare double @ceil(double)