A pile of long over-due refactorings here. There are some very, *very*

minor behavior changes with this, but nothing I have seen evidence of in
the wild or expect to be meaningful. The real goal is unifying our logic
and simplifying the interfaces. A summary of the changes follows:

- Make 'callIsSmall' actually accept a callsite so it can handle
  intrinsics, and simplify callers appropriately.
- Nuke a completely bogus declaration of 'callIsSmall' that was still
  lurking in InlineCost.h... No idea how this got missed.
- Teach the 'isInstructionFree' about the various more intelligent
  'free' heuristics that got added to the inline cost analysis during
  review and testing. This mostly surrounds int->ptr and ptr->int casts.
- Switch most of the interesting parts of the inline cost analysis that
  were essentially computing 'is this instruction free?' to use the code
  metrics routine instead. This way we won't keep duplicating logic.

All of this is motivated by the desire to allow other passes to compute
a roughly equivalent 'cost' metric for a particular basic block as the
inline cost analysis. Sadly, re-using the same analysis for both is
really messy because only the actual inline cost analysis is ever going
to go to the contortions required for simplification, SROA analysis,
etc.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156140 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2012-05-04 00:58:03 +00:00
parent a83a6d3725
commit d5003cafd6
5 changed files with 40 additions and 50 deletions

View File

@ -16,6 +16,7 @@
#define LLVM_ANALYSIS_CODEMETRICS_H #define LLVM_ANALYSIS_CODEMETRICS_H
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CallSite.h"
namespace llvm { namespace llvm {
class BasicBlock; class BasicBlock;
@ -29,10 +30,11 @@ namespace llvm {
/// \brief Check whether a call will lower to something small. /// \brief Check whether a call will lower to something small.
/// ///
/// This tests checks whether calls to this function will lower to something /// This tests checks whether this callsite will lower to something
/// significantly cheaper than a traditional call, often a single /// significantly cheaper than a traditional call, often a single
/// instruction. /// instruction. Note that if isInstructionFree(CS.getInstruction()) would
bool callIsSmall(const Function *F); /// return true, so will this function.
bool callIsSmall(ImmutableCallSite CS);
/// \brief Utility to calculate the size and a few similar metrics for a set /// \brief Utility to calculate the size and a few similar metrics for a set
/// of basic blocks. /// of basic blocks.

View File

@ -127,10 +127,6 @@ namespace llvm {
// adding a replacement API. // adding a replacement API.
InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold); InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
}; };
/// callIsSmall - If a call is likely to lower to a single target instruction,
/// or is otherwise deemed small return true.
bool callIsSmall(const Function *Callee);
} }
#endif #endif

View File

@ -22,7 +22,11 @@ using namespace llvm;
/// callIsSmall - If a call is likely to lower to a single target instruction, /// callIsSmall - If a call is likely to lower to a single target instruction,
/// or is otherwise deemed small return true. /// or is otherwise deemed small return true.
/// TODO: Perhaps calls like memcpy, strcpy, etc? /// TODO: Perhaps calls like memcpy, strcpy, etc?
bool llvm::callIsSmall(const Function *F) { bool llvm::callIsSmall(ImmutableCallSite CS) {
if (isa<IntrinsicInst>(CS.getInstruction()))
return true;
const Function *F = CS.getCalledFunction();
if (!F) return false; if (!F) return false;
if (F->hasLocalLinkage()) return false; if (F->hasLocalLinkage()) return false;
@ -79,8 +83,24 @@ bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) {
if (const CastInst *CI = dyn_cast<CastInst>(I)) { if (const CastInst *CI = dyn_cast<CastInst>(I)) {
// Noop casts, including ptr <-> int, don't count. // Noop casts, including ptr <-> int, don't count.
if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) if (CI->isLosslessCast())
return true; return true;
Value *Op = CI->getOperand(0);
// An inttoptr cast is free so long as the input is a legal integer type
// which doesn't contain values outside the range of a pointer.
if (isa<IntToPtrInst>(CI) && TD &&
TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits())
return true;
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
if (isa<PtrToIntInst>(CI) && TD &&
TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits())
return true;
// trunc to a native type is free (assuming the target has compare and // trunc to a native type is free (assuming the target has compare and
// shift-right of the same width). // shift-right of the same width).
if (TD && isa<TruncInst>(CI) && if (TD && isa<TruncInst>(CI) &&
@ -126,7 +146,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
isRecursive = true; isRecursive = true;
} }
if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { if (!callIsSmall(CS)) {
// Each argument to a call takes on average one instruction to set up. // Each argument to a call takes on average one instruction to set up.
NumInsts += CS.arg_size(); NumInsts += CS.arg_size();

View File

@ -398,10 +398,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
SROAArgValues[&I] = SROAArg; SROAArgValues[&I] = SROAArg;
// A ptrtoint cast is free so long as the result is large enough to store the return isInstructionFree(&I, TD);
// pointer, and a legal integer type.
return TD && TD->isLegalInteger(IntegerSize) &&
IntegerSize >= TD->getPointerSizeInBits();
} }
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
@ -428,10 +425,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
SROAArgValues[&I] = SROAArg; SROAArgValues[&I] = SROAArg;
// An inttoptr cast is free so long as the input is a legal integer type return isInstructionFree(&I, TD);
// which doesn't contain values outside the range of a pointer.
return TD && TD->isLegalInteger(IntegerSize) &&
IntegerSize <= TD->getPointerSizeInBits();
} }
bool CallAnalyzer::visitCastInst(CastInst &I) { bool CallAnalyzer::visitCastInst(CastInst &I) {
@ -445,24 +439,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
// Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
disableSROA(I.getOperand(0)); disableSROA(I.getOperand(0));
// No-op casts don't have any cost. return isInstructionFree(&I, TD);
if (I.isLosslessCast())
return true;
// trunc to a native type is free (assuming the target has compare and
// shift-right of the same width).
if (TD && isa<TruncInst>(I) &&
TD->isLegalInteger(TD->getTypeSizeInBits(I.getType())))
return true;
// Result of a cmp instruction is often extended (to be used by other
// cmp instructions, logical or return instructions). These are usually
// no-ops on most sane targets.
if (isa<CmpInst>(I.getOperand(0)))
return true;
// Assume the rest of the casts require work.
return false;
} }
bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
@ -636,21 +613,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
default: default:
return Base::visitCallSite(CS); return Base::visitCallSite(CS);
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::memset: case Intrinsic::memset:
case Intrinsic::memcpy: case Intrinsic::memcpy:
case Intrinsic::memmove: case Intrinsic::memmove:
case Intrinsic::objectsize: // SROA can usually chew through these intrinsics, but they aren't free.
case Intrinsic::ptr_annotation: return false;
case Intrinsic::var_annotation:
// SROA can usually chew through these intrinsics and they have no cost
// so don't pay the price of analyzing them in detail.
return true;
} }
} }
@ -662,7 +629,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
return false; return false;
} }
if (!callIsSmall(F)) { if (!callIsSmall(CS)) {
// We account for the average 1 instruction per call argument setup // We account for the average 1 instruction per call argument setup
// here. // here.
Cost += CS.arg_size() * InlineConstants::InstrCost; Cost += CS.arg_size() * InlineConstants::InstrCost;
@ -706,6 +673,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
} }
bool CallAnalyzer::visitInstruction(Instruction &I) { bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
if (isInstructionFree(&I, TD))
return true;
// We found something we don't understand or can't handle. Mark any SROA-able // We found something we don't understand or can't handle. Mark any SROA-able
// values in the operand list as no longer viable. // values in the operand list as no longer viable.
for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)

View File

@ -391,7 +391,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
if (BB == &F->getEntryBlock() && if (BB == &F->getEntryBlock() &&
FirstNonDbg(BB->front()) == CI && FirstNonDbg(BB->front()) == CI &&
FirstNonDbg(llvm::next(BB->begin())) == TI && FirstNonDbg(llvm::next(BB->begin())) == TI &&
callIsSmall(F)) { callIsSmall(CI)) {
// A single-block function with just a call and a return. Check that // A single-block function with just a call and a return. Check that
// the arguments match. // the arguments match.
CallSite::arg_iterator I = CallSite(CI).arg_begin(), CallSite::arg_iterator I = CallSite(CI).arg_begin(),