Refine fix to bug 15041.

Thanks to help from Nadav and Hal, I have a more reasonable (and even
correct!) approach.  This specifically penalizes the insertelement
and extractelement operations for the performance hit that will occur
on PowerPC processors.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174725 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt
2013-02-08 18:19:17 +00:00
parent 33daeab1bb
commit 0f58dbae4a
2 changed files with 31 additions and 16 deletions
+15 -16
View File
@@ -194,24 +194,23 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const {
assert(Val->isVectorTy() && "This must be a vector type");
const unsigned Awful = 1000;
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
// Vector element insert/extract with Altivec is very expensive.
// Until VSX is available, avoid vectorizing loops that require
// these operations.
if (Opcode == ISD::EXTRACT_VECTOR_ELT ||
Opcode == ISD::INSERT_VECTOR_ELT)
return Awful;
// Estimated cost of a load-hit-store delay. This was obtained
// experimentally as a minimum needed to prevent unprofitable
// vectorization for the paq8p benchmark. It may need to be
// raised further if other unprofitable cases remain.
unsigned LHSPenalty = 12;
// We don't vectorize SREM/UREM so well. Constrain the vectorizer
// for those as well.
if (Opcode == ISD::SREM || Opcode == ISD::UREM)
return Awful;
// VSELECT is not yet implemented, leading to use of insert/extract
// and ISEL, hence not a good idea.
if (Opcode == ISD::VSELECT)
return Awful;
// Vector element insert/extract with Altivec is very expensive,
// because they require store and reload with the attendant
// processor stall for load-hit-store. Until VSX is available,
// these need to be estimated as very costly.
if (ISD == ISD::EXTRACT_VECTOR_ELT ||
ISD == ISD::INSERT_VECTOR_ELT)
return LHSPenalty +
TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
}