mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-22 10:33:23 +00:00
Fix rdar://9289512 - not folding load into compare at -O0
The basic issue here is that bottom-up isel is matching the branch and compare, and was failing to fold the load into the branch/compare combo. Fixing this (by allowing folding into any instruction of a sequence that is selected) allows us to produce things like: cmpb $0, 52(%rax) je LBB4_2 instead of: movb 52(%rax), %cl cmpb $0, %cl je LBB4_2 This makes the generated -O0 code run a bit faster, but also speeds up compile time by putting less pressure on the register allocator and generating less code. This was one of the biggest classes of missing load folding. Implementing this shrinks 176.gcc's c-decl.s (as a random example) by about 4% in (verbose-asm) line count. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129656 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8bdc251dc5
commit
fd3f635103
@ -280,7 +280,8 @@ private:
|
|||||||
|
|
||||||
void PrepareEHLandingPad();
|
void PrepareEHLandingPad();
|
||||||
void SelectAllBasicBlocks(const Function &Fn);
|
void SelectAllBasicBlocks(const Function &Fn);
|
||||||
bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS);
|
bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst,
|
||||||
|
FastISel *FastIS);
|
||||||
void FinishBasicBlock();
|
void FinishBasicBlock();
|
||||||
|
|
||||||
void SelectBasicBlock(BasicBlock::const_iterator Begin,
|
void SelectBasicBlock(BasicBlock::const_iterator Begin,
|
||||||
|
@ -748,9 +748,31 @@ void SelectionDAGISel::PrepareEHLandingPad() {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
|
||||||
|
/// load into the specified FoldInst. Note that we could have a sequence where
|
||||||
|
/// multiple LLVM IR instructions are folded into the same machineinstr. For
|
||||||
|
/// example we could have:
|
||||||
|
/// A: x = load i32 *P
|
||||||
|
/// B: y = icmp A, 42
|
||||||
|
/// C: br y, ...
|
||||||
|
///
|
||||||
|
/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
|
||||||
|
/// any other folded instructions) because it is between A and C.
|
||||||
|
///
|
||||||
|
/// If we succeed in folding the load into the operation, return true.
|
||||||
|
///
|
||||||
bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
|
bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
|
||||||
|
const Instruction *FoldInst,
|
||||||
FastISel *FastIS) {
|
FastISel *FastIS) {
|
||||||
|
SmallPtrSet<const Instruction*, 4> FoldedInsts;
|
||||||
|
for (BasicBlock::const_iterator II = FoldInst; &*II != LI; --II)
|
||||||
|
FoldedInsts.insert(II);
|
||||||
|
|
||||||
|
// We know that the load has a single use, but don't know what it is. If it
|
||||||
|
// isn't one of the folded instructions, then we can't succeed here.
|
||||||
|
if (!FoldedInsts.count(LI->use_back()))
|
||||||
|
return false;
|
||||||
|
|
||||||
// Don't try to fold volatile loads. Target has to deal with alignment
|
// Don't try to fold volatile loads. Target has to deal with alignment
|
||||||
// constraints.
|
// constraints.
|
||||||
if (LI->isVolatile()) return false;
|
if (LI->isVolatile()) return false;
|
||||||
@ -835,10 +857,10 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) {
|
|||||||
/// Return false if it needs to be emitted.
|
/// Return false if it needs to be emitted.
|
||||||
static bool isFoldedOrDeadInstruction(const Instruction *I,
|
static bool isFoldedOrDeadInstruction(const Instruction *I,
|
||||||
FunctionLoweringInfo *FuncInfo) {
|
FunctionLoweringInfo *FuncInfo) {
|
||||||
return !I->mayWriteToMemory() &&
|
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
|
||||||
!isa<TerminatorInst>(I) &&
|
!isa<TerminatorInst>(I) && // Terminators aren't folded.
|
||||||
!isa<DbgInfoIntrinsic>(I) &&
|
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
|
||||||
!FuncInfo->isExportedInst(I);
|
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
|
||||||
}
|
}
|
||||||
|
|
||||||
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
||||||
@ -930,16 +952,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
|||||||
|
|
||||||
// Try to select the instruction with FastISel.
|
// Try to select the instruction with FastISel.
|
||||||
if (FastIS->SelectInstruction(Inst)) {
|
if (FastIS->SelectInstruction(Inst)) {
|
||||||
// If fast isel succeeded, check to see if there is a single-use
|
// If fast isel succeeded, skip over all the folded instructions, and
|
||||||
// non-volatile load right before the selected instruction, and see if
|
// then see if there is a load right before the selected instructions.
|
||||||
// the load is used by the instruction. If so, try to fold it.
|
// Try to fold the load if so.
|
||||||
const Instruction *BeforeInst = 0;
|
const Instruction *BeforeInst = Inst;
|
||||||
if (Inst != Begin)
|
while (BeforeInst != Begin) {
|
||||||
BeforeInst = llvm::prior(llvm::prior(BI));
|
BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
|
||||||
if (BeforeInst && isa<LoadInst>(BeforeInst) &&
|
if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
|
||||||
BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
|
break;
|
||||||
TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
|
}
|
||||||
--BI; // If we succeeded, don't re-select the load.
|
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
|
||||||
|
BeforeInst->hasOneUse() &&
|
||||||
|
TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
|
||||||
|
// If we succeeded, don't re-select the load.
|
||||||
|
BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s | FileCheck %s
|
; RUN: llc < %s -fast-isel -O0 -regalloc=fast | FileCheck %s
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||||
target triple = "x86_64-apple-darwin10.0.0"
|
target triple = "x86_64-apple-darwin10.0.0"
|
||||||
@ -12,3 +12,24 @@ define i32 @test1(i32 %i) nounwind ssp {
|
|||||||
|
|
||||||
; CHECK: test1:
|
; CHECK: test1:
|
||||||
; CHECK: andl $8,
|
; CHECK: andl $8,
|
||||||
|
|
||||||
|
|
||||||
|
; rdar://9289512 - The load should fold into the compare.
|
||||||
|
define void @test2(i64 %x) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%x.addr = alloca i64, align 8
|
||||||
|
store i64 %x, i64* %x.addr, align 8
|
||||||
|
%tmp = load i64* %x.addr, align 8
|
||||||
|
%cmp = icmp sgt i64 %tmp, 42
|
||||||
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end: ; preds = %if.then, %entry
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: test2:
|
||||||
|
; CHECK: movq %rdi, -8(%rsp)
|
||||||
|
; CHECK: cmpq $42, -8(%rsp)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user