Sink noop copies into the basic block that uses them. This reduces the number

of cross-block live ranges, and allows the bb-at-a-time selector to always
coallesce these away, at isel time.

This reduces the load on the coallescer and register allocator.  For example
on a codec on X86, we went from:

   1643 asm-printer           - Number of machine instrs printed
    419 liveintervals         - Number of loads/stores folded into instructions
   1144 liveintervals         - Number of identity moves eliminated after coalescing
   1022 liveintervals         - Number of interval joins performed
    282 liveintervals         - Number of intervals after coalescing
   1304 liveintervals         - Number of original intervals
     86 regalloc              - Number of times we had to backtrack
1.90232 regalloc              - Ratio of intervals processed over total intervals
     40 spiller               - Number of values reused
    182 spiller               - Number of loads added
    121 spiller               - Number of stores added
    132 spiller               - Number of register spills
      6 twoaddressinstruction - Number of instructions commuted to coalesce
    360 twoaddressinstruction - Number of two-address instructions

to:

   1636 asm-printer           - Number of machine instrs printed
    403 liveintervals         - Number of loads/stores folded into instructions
   1155 liveintervals         - Number of identity moves eliminated after coalescing
   1033 liveintervals         - Number of interval joins performed
    279 liveintervals         - Number of intervals after coalescing
   1312 liveintervals         - Number of original intervals
     76 regalloc              - Number of times we had to backtrack
1.88998 regalloc              - Ratio of intervals processed over total intervals
      1 spiller               - Number of copies elided
     41 spiller               - Number of values reused
    191 spiller               - Number of loads added
    114 spiller               - Number of stores added
    128 spiller               - Number of register spills
      4 twoaddressinstruction - Number of instructions commuted to coalesce
    356 twoaddressinstruction - Number of two-address instructions

On this testcase, this change provides a modest reduction in spill code,
regalloc iterations, and total instructions emitted.  It increases the number
of register coallesces.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28115 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2006-05-05 01:04:50 +00:00
parent 71fe0f4a43
commit 7e598096ea

View File

@ -2763,6 +2763,49 @@ static Value *InsertGEPComputeCode(Value *&V, BasicBlock *BB, Instruction *GEPI,
return V = Ptr;
}
/// OptimizeNoopCopyExpression - We have determined that the specified cast
/// instruction is a noop copy (e.g. it's casting from one pointer type to
/// another, int->uint, or int->sbyte on PPC.
static void OptimizeNoopCopyExpression(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
/// InsertedCasts - Only insert a cast in each block once.
std::map<BasicBlock*, CastInst*> InsertedCasts;
for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
UI != E; ) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
// Figure out which BB this cast is used in. For PHI's this is the
// appropriate predecessor block.
BasicBlock *UserBB = User->getParent();
if (PHINode *PN = dyn_cast<PHINode>(User)) {
unsigned OpVal = UI.getOperandNo()/2;
UserBB = PN->getIncomingBlock(OpVal);
}
// Preincrement use iterator so we don't invalidate it.
++UI;
// If this user is in the same block as the cast, don't change the cast.
if (UserBB == DefBB) continue;
// If we have already inserted a cast into this block, use it.
CastInst *&InsertedCast = InsertedCasts[UserBB];
if (!InsertedCast) {
BasicBlock::iterator InsertPt = UserBB->begin();
while (isa<PHINode>(InsertPt)) ++InsertPt;
InsertedCast =
new CastInst(CI->getOperand(0), CI->getType(), "", InsertPt);
}
// Replace a use of the cast with a use of the new casat.
TheUse = InsertedCast;
}
}
/// OptimizeGEPExpression - Since we are doing basic-block-at-a-time instruction
/// selection, we want to be a bit careful about some things. In particular, if
@ -2890,8 +2933,10 @@ bool SelectionDAGISel::runOnFunction(Function &Fn) {
// constants, this way the load of the constant into a vreg will not be placed
// into MBBs that are used some other way.
//
// In this pass we also look for GEP instructions that are used across basic
// blocks and rewrites them to improve basic-block-at-a-time selection.
// In this pass we also look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
//
//
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
PHINode *PN;
@ -2901,9 +2946,37 @@ bool SelectionDAGISel::runOnFunction(Function &Fn) {
if (isa<Constant>(PN->getIncomingValue(i)))
SplitCriticalEdge(PN->getIncomingBlock(i), BB);
for (BasicBlock::iterator E = BB->end(); BBI != E; )
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(BBI++))
for (BasicBlock::iterator E = BB->end(); BBI != E; ) {
Instruction *I = BBI++;
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
OptimizeGEPExpression(GEPI, TLI.getTargetData());
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
// If this is a noop copy, sink it into user blocks to reduce the number
// of virtual registers that must be created and coallesced.
MVT::ValueType SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
MVT::ValueType DstVT = TLI.getValueType(CI->getType());
// This is an fp<->int conversion?
if (MVT::isInteger(SrcVT) != MVT::isInteger(DstVT))
continue;
// If this is an extension, it will be a zero or sign extension, which
// isn't a noop.
if (SrcVT < DstVT) continue;
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
SrcVT = TLI.getTypeToTransformTo(SrcVT);
if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
DstVT = TLI.getTypeToTransformTo(DstVT);
// If, after promotion, these are the same types, this is a noop copy.
if (SrcVT == DstVT)
OptimizeNoopCopyExpression(CI);
}
}
}
FunctionLoweringInfo FuncInfo(TLI, Fn, MF);