mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
implement rdar://6653118 - fastisel should fold loads where possible.
Since mem2reg isn't run at -O0, we get a ton of reloads from the stack, for example, before, this code: int foo(int x, int y, int z) { return x+y+z; } used to compile into: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx movl 4(%rsp), %esi addl %edx, %esi movl (%rsp), %edx addl %esi, %edx movl %edx, %eax addq $12, %rsp ret Now we produce: _foo: ## @foo subq $12, %rsp movl %edi, 8(%rsp) movl %esi, 4(%rsp) movl %edx, (%rsp) movl 8(%rsp), %edx addl 4(%rsp), %edx ## Folded load addl (%rsp), %edx ## Folded load movl %edx, %eax addq $12, %rsp ret Fewer instructions and less register use = faster compiles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113102 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
07c2b7ff68
commit
beac75da37
@ -39,6 +39,7 @@ class TargetLowering;
|
|||||||
class TargetMachine;
|
class TargetMachine;
|
||||||
class TargetRegisterClass;
|
class TargetRegisterClass;
|
||||||
class TargetRegisterInfo;
|
class TargetRegisterInfo;
|
||||||
|
class LoadInst;
|
||||||
|
|
||||||
/// FastISel - This is a fast-path instruction selection class that
|
/// FastISel - This is a fast-path instruction selection class that
|
||||||
/// generates poor code and doesn't support illegal types or non-trivial
|
/// generates poor code and doesn't support illegal types or non-trivial
|
||||||
@ -102,7 +103,16 @@ public:
|
|||||||
/// index value.
|
/// index value.
|
||||||
std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
|
std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
|
||||||
|
|
||||||
/// recomputeInsertPt - Reset InsertPt to prepare for insterting instructions
|
/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
|
||||||
|
/// vreg is being provided by the specified load instruction. If possible,
|
||||||
|
/// try to fold the load as an operand to the instruction, returning true if
|
||||||
|
/// possible.
|
||||||
|
virtual bool TryToFoldLoad(MachineInstr * /*MI*/, unsigned /*OpNo*/,
|
||||||
|
const LoadInst * /*LI*/) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// recomputeInsertPt - Reset InsertPt to prepare for inserting instructions
|
||||||
/// into the current block.
|
/// into the current block.
|
||||||
void recomputeInsertPt();
|
void recomputeInsertPt();
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ namespace llvm {
|
|||||||
class ScheduleHazardRecognizer;
|
class ScheduleHazardRecognizer;
|
||||||
class GCFunctionInfo;
|
class GCFunctionInfo;
|
||||||
class ScheduleDAGSDNodes;
|
class ScheduleDAGSDNodes;
|
||||||
|
class LoadInst;
|
||||||
|
|
||||||
/// SelectionDAGISel - This is the common base class used for SelectionDAG-based
|
/// SelectionDAGISel - This is the common base class used for SelectionDAG-based
|
||||||
/// pattern-matching instruction selectors.
|
/// pattern-matching instruction selectors.
|
||||||
@ -282,6 +283,7 @@ private:
|
|||||||
|
|
||||||
void PrepareEHLandingPad();
|
void PrepareEHLandingPad();
|
||||||
void SelectAllBasicBlocks(const Function &Fn);
|
void SelectAllBasicBlocks(const Function &Fn);
|
||||||
|
bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS);
|
||||||
void FinishBasicBlock();
|
void FinishBasicBlock();
|
||||||
|
|
||||||
void SelectBasicBlock(BasicBlock::const_iterator Begin,
|
void SelectBasicBlock(BasicBlock::const_iterator Begin,
|
||||||
|
@ -661,6 +661,43 @@ void SelectionDAGISel::PrepareEHLandingPad() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
|
||||||
|
FastISel *FastIS) {
|
||||||
|
// Don't try to fold volatile loads. Target has to deal with alignment
|
||||||
|
// constraints.
|
||||||
|
if (LI->isVolatile()) return false;
|
||||||
|
|
||||||
|
// Figure out which vreg this is going into.
|
||||||
|
unsigned LoadReg = FastIS->getRegForValue(LI);
|
||||||
|
assert(LoadReg && "Load isn't already assigned a vreg? ");
|
||||||
|
|
||||||
|
// Check to see what the uses of this vreg are. If it has no uses, or more
|
||||||
|
// than one use (at the machine instr level) then we can't fold it.
|
||||||
|
MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
|
||||||
|
if (RI == RegInfo->reg_end())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// See if there is exactly one use of the vreg. If there are multiple uses,
|
||||||
|
// then the instruction got lowered to multiple machine instructions or the
|
||||||
|
// use of the loaded value ended up being multiple operands of the result, in
|
||||||
|
// either case, we can't fold this.
|
||||||
|
MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
|
||||||
|
if (PostRI != RegInfo->reg_end())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
assert(RI.getOperand().isUse() &&
|
||||||
|
"The only use of the vreg must be a use, we haven't emitted the def!");
|
||||||
|
|
||||||
|
// Ask the target to try folding the load.
|
||||||
|
return FastIS->TryToFoldLoad(&*RI, RI.getOperandNo(), LI);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
||||||
// Initialize the Fast-ISel state, if needed.
|
// Initialize the Fast-ISel state, if needed.
|
||||||
FastISel *FastIS = 0;
|
FastISel *FastIS = 0;
|
||||||
@ -723,8 +760,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
|||||||
FastIS->recomputeInsertPt();
|
FastIS->recomputeInsertPt();
|
||||||
|
|
||||||
// Try to select the instruction with FastISel.
|
// Try to select the instruction with FastISel.
|
||||||
if (FastIS->SelectInstruction(Inst))
|
if (FastIS->SelectInstruction(Inst)) {
|
||||||
|
// If fast isel succeeded, check to see if there is a single-use
|
||||||
|
// non-volatile load right before the selected instruction, and see if
|
||||||
|
// the load is used by the instruction. If so, try to fold it.
|
||||||
|
const Instruction *BeforeInst = 0;
|
||||||
|
if (Inst != Begin)
|
||||||
|
BeforeInst = llvm::prior(llvm::prior(BI));
|
||||||
|
if (BeforeInst && isa<LoadInst>(BeforeInst) &&
|
||||||
|
BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
|
||||||
|
TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) {
|
||||||
|
// If we succeeded, don't re-select the load.
|
||||||
|
--BI;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Then handle certain instructions as single-LLVM-Instruction blocks.
|
// Then handle certain instructions as single-LLVM-Instruction blocks.
|
||||||
if (isa<CallInst>(Inst)) {
|
if (isa<CallInst>(Inst)) {
|
||||||
|
@ -63,6 +63,13 @@ public:
|
|||||||
|
|
||||||
virtual bool TargetSelectInstruction(const Instruction *I);
|
virtual bool TargetSelectInstruction(const Instruction *I);
|
||||||
|
|
||||||
|
/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
|
||||||
|
/// vreg is being provided by the specified load instruction. If possible,
|
||||||
|
/// try to fold the load as an operand to the instruction, returning true if
|
||||||
|
/// possible.
|
||||||
|
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
|
||||||
|
const LoadInst *LI);
|
||||||
|
|
||||||
#include "X86GenFastISel.inc"
|
#include "X86GenFastISel.inc"
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -1941,6 +1948,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
|
|||||||
return ResultReg;
|
return ResultReg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
|
||||||
|
/// vreg is being provided by the specified load instruction. If possible,
|
||||||
|
/// try to fold the load as an operand to the instruction, returning true if
|
||||||
|
/// possible.
|
||||||
|
bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
|
||||||
|
const LoadInst *LI) {
|
||||||
|
X86AddressMode AM;
|
||||||
|
if (!X86SelectAddress(LI->getOperand(0), AM))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
X86InstrInfo &XII = (X86InstrInfo&)TII;
|
||||||
|
|
||||||
|
unsigned Size = TD.getTypeAllocSize(LI->getType());
|
||||||
|
unsigned Alignment = LI->getAlignment();
|
||||||
|
|
||||||
|
SmallVector<MachineOperand, 8> AddrOps;
|
||||||
|
AM.getFullAddress(AddrOps);
|
||||||
|
|
||||||
|
MachineInstr *Result =
|
||||||
|
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
|
||||||
|
if (Result == 0) return false;
|
||||||
|
|
||||||
|
MI->getParent()->insert(MI, Result);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
|
llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
|
||||||
return new X86FastISel(funcInfo);
|
return new X86FastISel(funcInfo);
|
||||||
|
@ -56,6 +56,31 @@ struct X86AddressMode {
|
|||||||
: BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
|
: BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
|
||||||
Base.Reg = 0;
|
Base.Reg = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
|
||||||
|
assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
|
||||||
|
|
||||||
|
if (BaseType == X86AddressMode::RegBase)
|
||||||
|
MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
|
||||||
|
false, false, false, 0, false));
|
||||||
|
else {
|
||||||
|
assert(BaseType == X86AddressMode::FrameIndexBase);
|
||||||
|
MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
MO.push_back(MachineOperand::CreateImm(Scale));
|
||||||
|
MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
|
||||||
|
false, false, false, 0, false));
|
||||||
|
|
||||||
|
if (GV)
|
||||||
|
MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
|
||||||
|
else
|
||||||
|
MO.push_back(MachineOperand::CreateImm(Disp));
|
||||||
|
|
||||||
|
MO.push_back(MachineOperand::CreateReg(0, false, false,
|
||||||
|
false, false, false, 0, false));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// addDirectMem - This function is used to add a direct memory reference to the
|
/// addDirectMem - This function is used to add a direct memory reference to the
|
||||||
@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB,
|
|||||||
|
|
||||||
if (AM.BaseType == X86AddressMode::RegBase)
|
if (AM.BaseType == X86AddressMode::RegBase)
|
||||||
MIB.addReg(AM.Base.Reg);
|
MIB.addReg(AM.Base.Reg);
|
||||||
else if (AM.BaseType == X86AddressMode::FrameIndexBase)
|
else {
|
||||||
|
assert(AM.BaseType == X86AddressMode::FrameIndexBase);
|
||||||
MIB.addFrameIndex(AM.Base.FrameIndex);
|
MIB.addFrameIndex(AM.Base.FrameIndex);
|
||||||
else
|
}
|
||||||
assert (0);
|
|
||||||
MIB.addImm(AM.Scale).addReg(AM.IndexReg);
|
MIB.addImm(AM.Scale).addReg(AM.IndexReg);
|
||||||
if (AM.GV)
|
if (AM.GV)
|
||||||
MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
|
MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
|
||||||
|
@ -845,18 +845,18 @@ public:
|
|||||||
/// SetSSEDomain - Set the SSEDomain of MI.
|
/// SetSSEDomain - Set the SSEDomain of MI.
|
||||||
void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
|
void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
|
||||||
|
|
||||||
|
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
|
||||||
|
MachineInstr* MI,
|
||||||
|
unsigned OpNum,
|
||||||
|
const SmallVectorImpl<MachineOperand> &MOs,
|
||||||
|
unsigned Size, unsigned Alignment) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
|
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
|
||||||
MachineFunction::iterator &MFI,
|
MachineFunction::iterator &MFI,
|
||||||
MachineBasicBlock::iterator &MBBI,
|
MachineBasicBlock::iterator &MBBI,
|
||||||
LiveVariables *LV) const;
|
LiveVariables *LV) const;
|
||||||
|
|
||||||
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
|
|
||||||
MachineInstr* MI,
|
|
||||||
unsigned OpNum,
|
|
||||||
const SmallVectorImpl<MachineOperand> &MOs,
|
|
||||||
unsigned Size, unsigned Alignment) const;
|
|
||||||
|
|
||||||
/// isFrameOperand - Return true and the FrameIndex if the specified
|
/// isFrameOperand - Return true and the FrameIndex if the specified
|
||||||
/// operand and follow operands form a reference to the stack frame.
|
/// operand and follow operands form a reference to the stack frame.
|
||||||
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
|
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
|
||||||
|
@ -1,10 +1,8 @@
|
|||||||
; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
|
; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
|
||||||
; RUN: grep lazy_ptr, | count 2
|
|
||||||
; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
|
|
||||||
; RUN: grep lea
|
|
||||||
|
|
||||||
@src = external global i32
|
@src = external global i32
|
||||||
|
|
||||||
|
; rdar://6653118
|
||||||
define i32 @loadgv() nounwind {
|
define i32 @loadgv() nounwind {
|
||||||
entry:
|
entry:
|
||||||
%0 = load i32* @src, align 4
|
%0 = load i32* @src, align 4
|
||||||
@ -12,6 +10,14 @@ entry:
|
|||||||
%2 = add i32 %0, %1
|
%2 = add i32 %0, %1
|
||||||
store i32 %2, i32* @src
|
store i32 %2, i32* @src
|
||||||
ret i32 %2
|
ret i32 %2
|
||||||
|
; This should fold one of the loads into the add.
|
||||||
|
; CHECK: loadgv:
|
||||||
|
; CHECK: movl L_src$non_lazy_ptr, %ecx
|
||||||
|
; CHECK: movl (%ecx), %eax
|
||||||
|
; CHECK: addl (%ecx), %eax
|
||||||
|
; CHECK: movl %eax, (%ecx)
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
%stuff = type { i32 (...)** }
|
%stuff = type { i32 (...)** }
|
||||||
@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind {
|
|||||||
entry:
|
entry:
|
||||||
store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
|
store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
|
||||||
ret void
|
ret void
|
||||||
|
; CHECK: _t:
|
||||||
|
; CHECK: movl $0, %eax
|
||||||
|
; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user