[Constant Hoisting] Make the constant materialization cost operand dependent

Extend the target hook to take also the operand index into account when
calculating the cost of the constant materialization.

Related to <rdar://problem/16381500>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204435 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-03-21 06:04:45 +00:00
parent 337eb3adbe
commit d3cf783ed1
7 changed files with 66 additions and 54 deletions

View File

@ -297,10 +297,10 @@ public:
/// \brief Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
virtual unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
Type *Ty) const;
virtual unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) const;
virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) const;
/// @}
/// \name Vector Target Information

View File

@ -148,14 +148,14 @@ unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
return PrevTTI->getIntImmCost(Imm, Ty);
}
unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, const APInt &Imm,
Type *Ty) const {
return PrevTTI->getIntImmCost(Opcode, Imm, Ty);
unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty) const {
return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty);
}
unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
Type *Ty) const {
return PrevTTI->getIntImmCost(IID, Imm, Ty);
unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) const {
return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty);
}
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
@ -539,12 +539,12 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
return TCC_Basic;
}
unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) const override {
return TCC_Free;
}
unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const override {
return TCC_Free;
}

View File

@ -103,9 +103,9 @@ public:
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) const override;
unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const override;
/// @}
@ -776,6 +776,9 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
if (BitSize == 0)
return ~0U;
if (Imm == 0)
return TCC_Free;
if (Imm.getBitWidth() <= 64 &&
(isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue())))
return TCC_Basic;
@ -783,7 +786,7 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
return 2 * TCC_Basic;
}
unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
@ -791,7 +794,15 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
if (BitSize == 0)
return ~0U;
unsigned ImmIdx = ~0U;
switch (Opcode) {
default: return TCC_Free;
case Instruction::GetElementPtr:
if (Idx != 0)
return TCC_Free;
case Instruction::Store:
ImmIdx = 0;
break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@ -806,28 +817,31 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
case Instruction::Or:
case Instruction::Xor:
case Instruction::ICmp:
if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
return TCC_Free;
else
return X86TTI::getIntImmCost(Imm, Ty);
ImmIdx = 1;
break;
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::IntToPtr:
case Instruction::PtrToInt:
case Instruction::BitCast:
case Instruction::PHI:
case Instruction::Call:
case Instruction::Select:
case Instruction::Ret:
case Instruction::Load:
case Instruction::Store:
return X86TTI::getIntImmCost(Imm, Ty);
break;
}
return TargetTransformInfo::getIntImmCost(Opcode, Imm, Ty);
if ((Idx == ImmIdx) &&
Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
return TCC_Free;
return X86TTI::getIntImmCost(Imm, Ty);
}
unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
Type *Ty) const {
unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -835,21 +849,24 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
return ~0U;
switch (IID) {
default: return TargetTransformInfo::getIntImmCost(IID, Imm, Ty);
default: return TCC_Free;
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
return TCC_Free;
else
return X86TTI::getIntImmCost(Imm, Ty);
case Intrinsic::experimental_stackmap:
if (Idx < 2)
return TCC_Free;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
if (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))
if ((Idx < 4 ) ||
(Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TCC_Free;
else
return X86TTI::getIntImmCost(Imm, Ty);

View File

@ -29,7 +29,7 @@
// certain transformations on them, which would create a new expensive constant.
//
// This optimization is only applied to integer constants in instructions and
// simple (this means not nested) constant cast experessions. For example:
// simple (this means not nested) constant cast expressions. For example:
// %0 = load i64* inttoptr (i64 big_constant to i64*)
//===----------------------------------------------------------------------===//
@ -66,7 +66,7 @@ struct ConstantUser {
ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { }
};
/// \brief Keeps track of a constant candidate and its usees.
/// \brief Keeps track of a constant candidate and its uses.
struct ConstantCandidate {
ConstantUseListType Uses;
ConstantInt *ConstInt;
@ -292,7 +292,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
/// \brief Record constant integer ConstInt for instruction Inst at operand
/// index Idx.
///
/// The operand at index Idx is not necessarily the constant inetger itself. It
/// The operand at index Idx is not necessarily the constant integer itself. It
/// could also be a cast instruction or a constant expression that uses the
// constant integer.
void ConstantHoisting::collectConstantCandidates(Instruction *Inst,
@ -300,12 +300,12 @@ void ConstantHoisting::collectConstantCandidates(Instruction *Inst,
ConstantInt *ConstInt) {
unsigned Cost;
// Ask the target about the cost of materializing the constant for the given
// instruction.
// instruction and operand index.
if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(),
Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
ConstInt->getValue(), ConstInt->getType());
else
Cost = TTI->getIntImmCost(Inst->getOpcode(), ConstInt->getValue(),
Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
ConstInt->getType());
// Ignore cheap integer constants.
@ -582,7 +582,7 @@ bool ConstantHoisting::optimizeConstants(Function &Fn) {
if (ConstantVec.empty())
return false;
// Finally hoist the base constant and emit materializating code for dependent
// Finally hoist the base constant and emit materialization code for dependent
// constants.
bool MadeChange = emitBaseConstants();

View File

@ -3,26 +3,24 @@
; The inner loop should require only one add (and no leas either).
; rdar://8100380
; CHECK: BB0_3:
; CHECK-NEXT: movb $0, flags(%rdx)
; CHECK-NEXT: addq %rax, %rdx
; CHECK-NEXT: cmpq $8192, %rdx
; CHECK: BB0_2:
; CHECK-NEXT: movb $0, flags(%rcx)
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: cmpq $8192, %rcx
; CHECK-NEXT: jl
@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]
define void @foo() nounwind {
entry:
%tmp = icmp slt i64 2, 8192 ; <i1> [#uses=1]
br i1 %tmp, label %bb, label %bb21
br label %bb
bb: ; preds = %entry
br label %bb7
bb7: ; preds = %bb, %bb17
%tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ] ; <i64> [#uses=2]
%tmp9 = icmp slt i64 2, 8192 ; <i1> [#uses=1]
br i1 %tmp9, label %bb10, label %bb17
br label %bb10
bb10: ; preds = %bb7
br label %bb11

View File

@ -827,9 +827,7 @@ declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13
declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*)
define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
entry:
%0 = add i32 0, -1 ; <i32> [#uses=1]
%1 = getelementptr double* null, i32 %0 ; <double*> [#uses=1]
%1 = getelementptr double* null, i32 -1 ; <double*> [#uses=1]
%2 = load double* %1, align 8 ; <double> [#uses=1]
%3 = load double* null, align 8 ; <double> [#uses=2]
%4 = load double* null, align 8 ; <double> [#uses=2]
@ -890,13 +888,12 @@ entry:
store double %52, double* %55, align 8
%56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
store double %53, double* %56, align 8
%57 = add i32 0, 4 ; <i32> [#uses=1]
%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0 ; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %58, align 8
%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 %57, i32* %59, align 4
%60 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2 ; <i32*> [#uses=1]
store i32 3, i32* %60, align 8
%57 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0 ; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %57, align 8
%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 4, i32* %58, align 4
%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2 ; <i32*> [#uses=1]
store i32 3, i32* %59, align 8
unreachable
}

View File

@ -20,10 +20,10 @@ return:
; CHECK-LABEL: @test1
; CHECK: if.end:
; CHECK: %const_mat = add i64 %const, 1
; CHECK-NEXT: %1 = inttoptr i64 %const_mat to i8*
; CHECK: %2 = inttoptr i64 %const to i8*
; CHECK-NEXT: br
; CHECK: return:
; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ inttoptr (i64 68719476736 to i8*), %if.end ]
; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ]
}
define void @test2(i1 %cmp, i64** %tmp) {