mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-15 21:24:00 +00:00
TargetTransformInfo: address calculation parameter for gather/scather
Address calculation for gather/scather in vectorized code can incur a significant cost making vectorization unbeneficial. Add infrastructure to add cost. Tests and cost model for targets will be in follow-up commits. radar://14351991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186187 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -4403,6 +4403,59 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
|
||||
return Cost;
|
||||
}
|
||||
|
||||
/// \brief Check whether the address computation for a non-consecutive memory
|
||||
/// access looks like an unlikely candidate for being merged into the indexing
|
||||
/// mode.
|
||||
///
|
||||
/// We look for a GEP which has one index that is an induction variable and all
|
||||
/// other indices are loop invariant. If the stride of this access is also
|
||||
/// within a small bound we decide that this address computation can likely be
|
||||
/// merged into the addressing mode.
|
||||
/// In all other cases, we identify the address computation as complex.
|
||||
static bool isLikelyComplexAddressComputation(Value *Ptr,
|
||||
LoopVectorizationLegality *Legal,
|
||||
ScalarEvolution *SE,
|
||||
const Loop *TheLoop) {
|
||||
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
|
||||
if (!Gep)
|
||||
return true;
|
||||
|
||||
// We are looking for a gep with all loop invariant indices except for one
|
||||
// which should be an induction variable.
|
||||
unsigned NumOperands = Gep->getNumOperands();
|
||||
for (unsigned i = 1; i < NumOperands; ++i) {
|
||||
Value *Opd = Gep->getOperand(i);
|
||||
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
|
||||
!Legal->isInductionVariable(Opd))
|
||||
return true;
|
||||
}
|
||||
|
||||
// Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
|
||||
// can likely be merged into the address computation.
|
||||
unsigned MaxMergeDistance = 64;
|
||||
|
||||
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
|
||||
if (!AddRec)
|
||||
return true;
|
||||
|
||||
// Check the step is constant.
|
||||
const SCEV *Step = AddRec->getStepRecurrence(*SE);
|
||||
// Calculate the pointer stride and check if it is consecutive.
|
||||
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
|
||||
if (!C)
|
||||
return true;
|
||||
|
||||
const APInt &APStepVal = C->getValue()->getValue();
|
||||
|
||||
// Huge step value - give up.
|
||||
if (APStepVal.getBitWidth() > 64)
|
||||
return true;
|
||||
|
||||
int64_t StepVal = APStepVal.getSExtValue();
|
||||
|
||||
return StepVal > MaxMergeDistance;
|
||||
}
|
||||
|
||||
unsigned
|
||||
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
// If we know that this instruction will remain uniform, check the cost of
|
||||
@ -4498,6 +4551,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
|
||||
unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
|
||||
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
|
||||
bool IsComplexComputation =
|
||||
isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
|
||||
unsigned Cost = 0;
|
||||
// The cost of extracting from the value vector and pointer vector.
|
||||
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
|
||||
@ -4513,7 +4568,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
}
|
||||
|
||||
// The cost of the scalar loads/stores.
|
||||
Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
|
||||
Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
|
||||
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
|
||||
Alignment, AS);
|
||||
return Cost;
|
||||
|
Reference in New Issue
Block a user