//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the cost model analysis. It provides a very basic cost // estimation for LLVM-IR. This analysis uses the services of the codegen // to approximate the cost of any IR instruction when lowered to machine // instructions. The cost results are unit-less and the cost number represents // the throughput of the machine assuming that all loads hit the cache, all // branches are predicted, etc. The cost numbers can be added in order to // compare two or more transformation alternatives. // //===----------------------------------------------------------------------===// #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); namespace { class CostModelAnalysis : public FunctionPass { public: static char ID; // Class identification, replacement for typeinfo CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } /// Returns the expected cost of the instruction. /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. unsigned getInstructionCost(const Instruction *I) const; private: void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; void print(raw_ostream &OS, const Module*) const override; /// The function that we analyze. Function *F; /// Target information. const TargetTransformInfo *TTI; }; } // End of anonymous namespace // Register this pass. char CostModelAnalysis::ID = 0; static const char cm_name[] = "Cost Model Analysis"; INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) FunctionPass *llvm::createCostModelAnalysisPass() { return new CostModelAnalysis(); } void CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } bool CostModelAnalysis::runOnFunction(Function &F) { this->F = &F; TTI = getAnalysisIfAvailable<TargetTransformInfo>(); return false; } static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) return false; return true; } static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { TargetTransformInfo::OperandValueKind OpInfo = TargetTransformInfo::OK_AnyValue; // Check for a splat of a constant or for a non uniform vector of constants. if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; if (cast<Constant>(V)->getSplatValue() != NULL) OpInfo = TargetTransformInfo::OK_UniformConstantValue; } return OpInfo; } static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) { if (M1.size() != M2.size()) return false; for (unsigned i = 0, e = M1.size(); i != e; ++i) if (M1[i] != M2[i]) return false; return true; } static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of // the vector. if (!SI && Level == 0 && IsLeft) return true; else if (!SI) return false; SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether // we look at the left or right side. for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) Mask[i] = val; SmallVector<int, 16> ActualMask = SI->getShuffleMask(); if (!matchMask(Mask, ActualMask)) return false; return true; } static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, unsigned Level, unsigned NumLevels) { // Match one level of pairwise operations. // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 if (BinOp == 0) return false; assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); unsigned Opcode = BinOp->getOpcode(); Value *L = BinOp->getOperand(0); Value *R = BinOp->getOperand(1); ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); if (!LS && Level) return false; ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); if (!RS && Level) return false; // On level 0 we can omit one shufflevector instruction. if (!Level && !RS && !LS) return false; // Shuffle inputs must match. Value *NextLevelOpL = LS ? LS->getOperand(0) : 0; Value *NextLevelOpR = RS ? RS->getOperand(0) : 0; Value *NextLevelOp = 0; if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) return false; NextLevelOp = NextLevelOpL; } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { // On the first level we can omit the shufflevector <0, undef,...>. So the // input to the other shufflevector <1, undef> must match with one of the // inputs to the current binary operation. // Example: // %NextLevelOpL = shufflevector %R, <1, undef ...> // %BinOp = fadd %NextLevelOpL, %R if (NextLevelOpL && NextLevelOpL != R) return false; else if (NextLevelOpR && NextLevelOpR != L) return false; NextLevelOp = NextLevelOpL ? R : L; } else return false; // Check that the next levels binary operation exists and matches with the // current one. BinaryOperator *NextLevelBinOp = 0; if (Level + 1 != NumLevels) { if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) return false; else if (NextLevelBinOp->getOpcode() != Opcode) return false; } // Shuffle mask for pairwise operation must match. if (matchPairwiseShuffleMask(LS, true, Level)) { if (!matchPairwiseShuffleMask(RS, false, Level)) return false; } else if (matchPairwiseShuffleMask(RS, true, Level)) { if (!matchPairwiseShuffleMask(LS, false, Level)) return false; } else return false; if (++Level == NumLevels) return true; // Match next level. return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); } static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty) { if (!EnableReduxCost) return false; // Need to extract the first element. ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return false; BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); if (!RdxStart) return false; Type *VecTy = ReduxRoot->getOperand(0)->getType(); unsigned NumVecElems = VecTy->getVectorNumElements(); if (!isPowerOf2_32(NumVecElems)) return false; // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. // // (X0, X1, X2, X3) // (X0 + X1, X2 + X3, undef, undef) // ((X0 + X1) + (X2 + X3), undef, undef, undef) // // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 // %r = extractelement <4 x float> %bin.rdx8, i32 0 if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) return false; Opcode = RdxStart->getOpcode(); Ty = VecTy; return true; } static std::pair<Value *, ShuffleVectorInst *> getShuffleAndOtherOprd(BinaryOperator *B) { Value *L = B->getOperand(0); Value *R = B->getOperand(1); ShuffleVectorInst *S = 0; if ((S = dyn_cast<ShuffleVectorInst>(L))) return std::make_pair(R, S); S = dyn_cast<ShuffleVectorInst>(R); return std::make_pair(L, S); } static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty) { if (!EnableReduxCost) return false; // Need to extract the first element. ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return false; BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); if (!RdxStart) return false; unsigned RdxOpcode = RdxStart->getOpcode(); Type *VecTy = ReduxRoot->getOperand(0)->getType(); unsigned NumVecElems = VecTy->getVectorNumElements(); if (!isPowerOf2_32(NumVecElems)) return false; // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. // // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 // %r = extractelement <4 x float> %bin.rdx8, i32 0 unsigned MaskStart = 1; Value *RdxOp = RdxStart; SmallVector<int, 32> ShuffleMask(NumVecElems, 0); unsigned NumVecElemsRemain = NumVecElems; while (NumVecElemsRemain - 1) { // Check for the right reduction operation. BinaryOperator *BinOp; if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) return false; if (BinOp->getOpcode() != RdxOpcode) return false; Value *NextRdxOp; ShuffleVectorInst *Shuffle; std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); // Check the current reduction operation and the shuffle use the same value. if (Shuffle == 0) return false; if (Shuffle->getOperand(0) != NextRdxOp) return false; // Check that shuffle masks matches. for (unsigned j = 0; j != MaskStart; ++j) ShuffleMask[j] = MaskStart + j; // Fill the rest of the mask with -1 for undef. std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); if (!matchMask(ShuffleMask, Mask)) return false; RdxOp = NextRdxOp; NumVecElemsRemain /= 2; MaskStart *= 2; } Opcode = RdxOpcode; Ty = VecTy; return true; } unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (!TTI) return -1; switch (I->getOpcode()) { case Instruction::GetElementPtr:{ Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); return TTI->getAddressComputationCost(ValTy); } case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { return TTI->getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { TargetTransformInfo::OperandValueKind Op1VK = getOperandInfo(I->getOperand(0)); TargetTransformInfo::OperandValueKind Op2VK = getOperandInfo(I->getOperand(1)); return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK); } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); } case Instruction::Store: { const StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return TTI->getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(I); return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace()); } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } case Instruction::ExtractElement: { const ExtractElementInst * EEI = cast<ExtractElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); // Try to match a reduction sequence (series of shufflevector and vector // adds followed by a extractelement). unsigned ReduxOpCode; Type *ReduxType; if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) return TTI->getReductionCost(ReduxOpCode, ReduxType, false); else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) return TTI->getReductionCost(ReduxOpCode, ReduxType, true); return TTI->getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { const InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return TTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); unsigned NumVecElems = VecTypOp0->getVectorNumElements(); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, 0); return -1; } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { SmallVector<Type*, 4> Tys; for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) Tys.push_back(II->getArgOperand(J)->getType()); return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Tys); } return -1; default: // We don't have any information on this instruction. return -1; } } void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { if (!F) return; for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { Instruction *Inst = it; unsigned Cost = getInstructionCost(Inst); if (Cost != (unsigned)-1) OS << "Cost Model: Found an estimated cost of " << Cost; else OS << "Cost Model: Unknown cost"; OS << " for instruction: "<< *Inst << "\n"; } } }