diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 79b1554e22a..d7506bedfb4 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -39,6 +39,7 @@ class TargetLowering; class TargetMachine; class TargetRegisterClass; class TargetRegisterInfo; +class LoadInst; /// FastISel - This is a fast-path instruction selection class that /// generates poor code and doesn't support illegal types or non-trivial @@ -102,7 +103,16 @@ public: /// index value. std::pair getRegForGEPIndex(const Value *V); - /// recomputeInsertPt - Reset InsertPt to prepare for insterting instructions + /// TryToFoldLoad - The specified machine instr operand is a vreg, and that + /// vreg is being provided by the specified load instruction. If possible, + /// try to fold the load as an operand to the instruction, returning true if + /// possible. + virtual bool TryToFoldLoad(MachineInstr * /*MI*/, unsigned /*OpNo*/, + const LoadInst * /*LI*/) { + return false; + } + + /// recomputeInsertPt - Reset InsertPt to prepare for inserting instructions /// into the current block. void recomputeInsertPt(); diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 01d05ddac11..51895a6f6a2 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -34,6 +34,7 @@ namespace llvm { class ScheduleHazardRecognizer; class GCFunctionInfo; class ScheduleDAGSDNodes; + class LoadInst; /// SelectionDAGISel - This is the common base class used for SelectionDAG-based /// pattern-matching instruction selectors. @@ -282,6 +283,7 @@ private: void PrepareEHLandingPad(); void SelectAllBasicBlocks(const Function &Fn); + bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS); void FinishBasicBlock(); void SelectBasicBlock(BasicBlock::const_iterator Begin, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 66cb5ceb09e..dead5d50b72 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -661,6 +661,43 @@ void SelectionDAGISel::PrepareEHLandingPad() { } } + + + +bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + FastISel *FastIS) { + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) return false; + + // Figure out which vreg this is going into. + unsigned LoadReg = FastIS->getRegForValue(LI); + assert(LoadReg && "Load isn't already assigned a vreg? "); + + // Check to see what the uses of this vreg are. If it has no uses, or more + // than one use (at the machine instr level) then we can't fold it. + MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); + if (RI == RegInfo->reg_end()) + return false; + + // See if there is exactly one use of the vreg. If there are multiple uses, + // then the instruction got lowered to multiple machine instructions or the + // use of the loaded value ended up being multiple operands of the result, in + // either case, we can't fold this. + MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; + if (PostRI != RegInfo->reg_end()) + return false; + + assert(RI.getOperand().isUse() && + "The only use of the vreg must be a use, we haven't emitted the def!"); + + // Ask the target to try folding the load. + return FastIS->TryToFoldLoad(&*RI, RI.getOperandNo(), LI); +} + + + + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -723,8 +760,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(Inst)) + if (FastIS->SelectInstruction(Inst)) { + // If fast isel succeeded, check to see if there is a single-use + // non-volatile load right before the selected instruction, and see if + // the load is used by the instruction. If so, try to fold it. + const Instruction *BeforeInst = 0; + if (Inst != Begin) + BeforeInst = llvm::prior(llvm::prior(BI)); + if (BeforeInst && isa(BeforeInst) && + BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && + TryToFoldFastISelLoad(cast(BeforeInst), FastIS)) { + // If we succeeded, don't re-select the load. + --BI; + } continue; + } // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa(Inst)) { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 0c70eec4827..9390ba91d69 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -63,6 +63,13 @@ public: virtual bool TargetSelectInstruction(const Instruction *I); + /// TryToFoldLoad - The specified machine instr operand is a vreg, and that + /// vreg is being provided by the specified load instruction. If possible, + /// try to fold the load as an operand to the instruction, returning true if + /// possible. + virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); + #include "X86GenFastISel.inc" private: @@ -1941,6 +1948,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { return ResultReg; } +/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// vreg is being provided by the specified load instruction. If possible, +/// try to fold the load as an operand to the instruction, returning true if +/// possible. +bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + X86AddressMode AM; + if (!X86SelectAddress(LI->getOperand(0), AM)) + return false; + + X86InstrInfo &XII = (X86InstrInfo&)TII; + + unsigned Size = TD.getTypeAllocSize(LI->getType()); + unsigned Alignment = LI->getAlignment(); + + SmallVector AddrOps; + AM.getFullAddress(AddrOps); + + MachineInstr *Result = + XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); + if (Result == 0) return false; + + MI->getParent()->insert(MI, Result); + MI->eraseFromParent(); + return true; +} + + namespace llvm { llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { return new X86FastISel(funcInfo); diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 2a6a71dd91a..407b97e0500 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -56,6 +56,31 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } + + + void getFullAddress(SmallVectorImpl &MO) { + assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); + + if (BaseType == X86AddressMode::RegBase) + MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, + false, false, false, 0, false)); + else { + assert(BaseType == X86AddressMode::FrameIndexBase); + MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); + } + + MO.push_back(MachineOperand::CreateImm(Scale)); + MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, + false, false, false, 0, false)); + + if (GV) + MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); + else + MO.push_back(MachineOperand::CreateImm(Disp)); + + MO.push_back(MachineOperand::CreateReg(0, false, false, + false, false, false, 0, false)); + } }; /// addDirectMem - This function is used to add a direct memory reference to the @@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB, if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); - else if (AM.BaseType == X86AddressMode::FrameIndexBase) + else { + assert(AM.BaseType == X86AddressMode::FrameIndexBase); MIB.addFrameIndex(AM.Base.FrameIndex); - else - assert (0); + } + MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f33620641e8..eb0e4b80127 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -845,18 +845,18 @@ public: /// SetSSEDomain - Set the SSEDomain of MI. void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + unsigned OpNum, + const SmallVectorImpl &MOs, + unsigned Size, unsigned Alignment) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - unsigned OpNum, - const SmallVectorImpl &MOs, - unsigned Size, unsigned Alignment) const; - /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll index 35ec1e7115b..8db1936bc20 100644 --- a/test/CodeGen/X86/fast-isel-mem.ll +++ b/test/CodeGen/X86/fast-isel-mem.ll @@ -1,10 +1,8 @@ -; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \ -; RUN: grep lazy_ptr, | count 2 -; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \ -; RUN: grep lea +; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s @src = external global i32 +; rdar://6653118 define i32 @loadgv() nounwind { entry: %0 = load i32* @src, align 4 @@ -12,6 +10,14 @@ entry: %2 = add i32 %0, %1 store i32 %2, i32* @src ret i32 %2 +; This should fold one of the loads into the add. +; CHECK: loadgv: +; CHECK: movl L_src$non_lazy_ptr, %ecx +; CHECK: movl (%ecx), %eax +; CHECK: addl (%ecx), %eax +; CHECK: movl %eax, (%ecx) +; CHECK: ret + } %stuff = type { i32 (...)** } @@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind { entry: store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4 ret void +; CHECK: _t: +; CHECK: movl $0, %eax +; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx + }