Generate PPC early conditional returns

PowerPC has a conditional branch to the link register (return) instruction: BCLR.
This should be used any time when we'd otherwise have a conditional branch to a
return. This adds a small pass, PPCEarlyReturn, which runs just prior to the
branch selection pass (and, importantly, after block placement) to generate
these conditional returns when possible. It will also eliminate unconditional
branches to returns (these happen rarely; most of the time these have already
been tail duplicated by the time PPCEarlyReturn is invoked). This is a nice
optimization for small functions that do not maintain a stack frame.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179026 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2013-04-08 16:24:03 +00:00
parent d6b89ef0fa
commit 5ee67e8e76
8 changed files with 218 additions and 28 deletions

View File

@ -31,6 +31,7 @@ namespace llvm {
class MCInst;
FunctionPass *createPPCCTRLoops();
FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@ -40,7 +41,7 @@ namespace llvm {
/// \brief Creates an PPC-specific Target Transformation Info pass.
ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
namespace PPCII {
/// Target Operand Flag enum.

View File

@ -565,9 +565,9 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
bits<3> CR;
let BO = BIBO{2-6};
let BI{0-1} = BIBO{0-1};
let BI{2-4} = CR;
let BO = BIBO{4-0};
let BI{0-1} = BIBO{5-6};
let BI{2-4} = CR{0-2};
let BH = 0;
}

View File

@ -18,8 +18,10 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@ -896,3 +898,149 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 4; // PowerPC instructions are all 4 bytes
}
}
#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns");
STATISTIC(NumBLR, "Number of early returns");
namespace llvm {
void initializePPCEarlyReturnPass(PassRegistry&);
}
namespace {
// PPCEarlyReturn pass - For simple functions without epilogue code, move
// returns up, and create conditional returns, to avoid unnecessary
// branch-to-blr sequences.
struct PPCEarlyReturn : public MachineFunctionPass {
static char ID;
PPCEarlyReturn() : MachineFunctionPass(ID) {
initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
}
const PPCTargetMachine *TM;
const PPCInstrInfo *TII;
protected:
bool processBlock(MachineBasicBlock &LastMBB) {
bool Changed = false;
MachineBasicBlock::iterator I = LastMBB.begin();
I = LastMBB.SkipPHIsAndLabels(I);
// The block must be essentially empty except for the blr.
if (I == LastMBB.end() || I->getOpcode() != PPC::BLR ||
I != LastMBB.getLastNonDebugInstr())
return Changed;
SmallVector<MachineBasicBlock*, 8> PredToRemove;
for (MachineBasicBlock::pred_iterator PI = LastMBB.pred_begin(),
PIE = LastMBB.pred_end(); PI != PIE; ++PI) {
bool OtherReference = false, BlockChanged = false;
for (MachineBasicBlock::iterator J = (*PI)->begin();
J != (*PI)->end();) {
if (J->getOpcode() == PPC::B) {
if (J->getOperand(0).getMBB() == &LastMBB) {
// This is an unconditional branch to the return. Replace the
// branch with a blr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
MachineBasicBlock::iterator K = J++;
K->eraseFromParent();
BlockChanged = true;
++NumBLR;
continue;
}
} else if (J->getOpcode() == PPC::BCC) {
if (J->getOperand(2).getMBB() == &LastMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
.addImm(J->getOperand(0).getImm())
.addReg(J->getOperand(1).getReg());
MachineBasicBlock::iterator K = J++;
K->eraseFromParent();
BlockChanged = true;
++NumBCLR;
continue;
}
} else if (J->isBranch()) {
if (J->isIndirectBranch()) {
if (LastMBB.hasAddressTaken())
OtherReference = true;
} else
for (unsigned i = 0; i < J->getNumOperands(); ++i)
if (J->getOperand(i).isMBB() &&
J->getOperand(i).getMBB() == &LastMBB)
OtherReference = true;
}
++J;
}
if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&LastMBB))
OtherReference = true;
// Predecessors are stored in a vector and can't be removed here.
if (!OtherReference && BlockChanged) {
PredToRemove.push_back(*PI);
}
if (BlockChanged)
Changed = true;
}
for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
PredToRemove[i]->removeSuccessor(&LastMBB);
if (Changed && !LastMBB.hasAddressTaken()) {
// We now might be able to merge this blr-only block into its
// by-layout predecessor.
if (LastMBB.pred_size() == 1 &&
(*LastMBB.pred_begin())->isLayoutSuccessor(&LastMBB)) {
// Move the blr into the preceding block.
MachineBasicBlock &PrevMBB = **LastMBB.pred_begin();
PrevMBB.splice(PrevMBB.end(), &LastMBB, I);
PrevMBB.removeSuccessor(&LastMBB);
}
if (LastMBB.pred_empty())
LastMBB.eraseFromParent();
}
return Changed;
}
public:
virtual bool runOnMachineFunction(MachineFunction &MF) {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getInstrInfo();
bool Changed = false;
// If the function does not have at least two block, then there is
// nothing to do.
if (MF.size() < 2)
return Changed;
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
MachineBasicBlock &B = *I++;
if (processBlock(B))
Changed = true;
}
return Changed;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
}
INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
"PowerPC Early-Return Creation", false, false)
char PPCEarlyReturn::ID = 0;
FunctionPass*
llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }

View File

@ -511,10 +511,14 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
// BCC represents an arbitrary conditional branch on a predicate.
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
let isCodeGenOnly = 1 in
let isCodeGenOnly = 1 in {
def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc} ${cond:reg}, $dst"
/*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
let isReturn = 1, Uses = [LR, RM] in
def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
"b${cond:cc}lr ${cond:reg}", BrB, []>;
}
let Defs = [CTR], Uses = [CTR] in {
def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),

View File

@ -124,6 +124,8 @@ bool PPCPassConfig::addInstSelector() {
}
bool PPCPassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createPPCEarlyReturnPass());
// Must run branch selection immediately preceding the asm printer.
addPass(createPPCBranchSelectionPass());
return false;

View File

@ -126,25 +126,6 @@ produced this with bdnz, the loop would be a single dispatch group.
===-------------------------------------------------------------------------===
Compile:
void foo(int *P) {
if (P) *P = 0;
}
into:
_foo:
cmpwi cr0,r3,0
beqlr cr0
li r0,0
stw r0,0(r3)
blr
This is effectively a simple form of predication.
===-------------------------------------------------------------------------===
Lump the constant pool for each function into ONE pic object, and reference
pieces of it as offsets from the start. For functions like this (contrived
to have lots of constants obviously):

View File

@ -0,0 +1,52 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define void @foo(i32* %P) #0 {
entry:
%tobool = icmp eq i32* %P, null
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
store i32 0, i32* %P, align 4, !tbaa !0
br label %if.end
if.end: ; preds = %entry, %if.then
ret void
; CHECK: @foo
; CHECK: beqlr
; CHECK: blr
}
define void @bar(i32* %P, i32* %Q) #0 {
entry:
%tobool = icmp eq i32* %P, null
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
store i32 0, i32* %P, align 4, !tbaa !0
%tobool1 = icmp eq i32* %Q, null
br i1 %tobool1, label %if.end3, label %if.then2
if.then2: ; preds = %if.then
store i32 1, i32* %Q, align 4, !tbaa !0
br label %if.end3
if.else: ; preds = %entry
store i32 0, i32* %Q, align 4, !tbaa !0
br label %if.end3
if.end3: ; preds = %if.then, %if.then2, %if.else
ret void
; CHECK: @bar
; CHECK: beqlr
; CHECK: blr
}
attributes #0 = { nounwind }
!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA"}

View File

@ -107,9 +107,10 @@ define double @test10(double %x) nounwind {
declare double @trunc(double) nounwind readnone
define float @test11(float %x) nounwind {
define void @test11(float %x, float* %y) nounwind {
%call = tail call float @rintf(float %x) nounwind readnone
ret float %call
store float %call, float* %y
ret void
; CHECK: test11:
; CHECK-NOT: frin
@ -125,9 +126,10 @@ define float @test11(float %x) nounwind {
declare float @rintf(float) nounwind readnone
define double @test12(double %x) nounwind {
define void @test12(double %x, double* %y) nounwind {
%call = tail call double @rint(double %x) nounwind readnone
ret double %call
store double %call, double* %y
ret void
; CHECK: test12:
; CHECK-NOT: frin