TargetTransformInfo: address calculation parameter for gather/scather

Address calculation for gather/scather in vectorized code can incur a
significant cost making vectorization unbeneficial. Add infrastructure to add
cost.
Tests and cost model for targets will be in follow-up commits.

radar://14351991

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186187 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnold Schwaighofer
2013-07-12 19:16:02 +00:00
parent ec11706155
commit c0a11edba6
5 changed files with 69 additions and 9 deletions

View File

@ -4403,6 +4403,59 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
return Cost;
}
/// \brief Check whether the address computation for a non-consecutive memory
/// access looks like an unlikely candidate for being merged into the indexing
/// mode.
///
/// We look for a GEP which has one index that is an induction variable and all
/// other indices are loop invariant. If the stride of this access is also
/// within a small bound we decide that this address computation can likely be
/// merged into the addressing mode.
/// In all other cases, we identify the address computation as complex.
static bool isLikelyComplexAddressComputation(Value *Ptr,
LoopVectorizationLegality *Legal,
ScalarEvolution *SE,
const Loop *TheLoop) {
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
return true;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
unsigned NumOperands = Gep->getNumOperands();
for (unsigned i = 1; i < NumOperands; ++i) {
Value *Opd = Gep->getOperand(i);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
return true;
}
// Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
// can likely be merged into the address computation.
unsigned MaxMergeDistance = 64;
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
if (!AddRec)
return true;
// Check the step is constant.
const SCEV *Step = AddRec->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C)
return true;
const APInt &APStepVal = C->getValue()->getValue();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
return true;
int64_t StepVal = APStepVal.getSExtValue();
return StepVal > MaxMergeDistance;
}
unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// If we know that this instruction will remain uniform, check the cost of
@ -4498,6 +4551,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
bool IsComplexComputation =
isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@ -4513,7 +4568,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
}
// The cost of the scalar loads/stores.
Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
return Cost;