diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 178d55305e2..b11674898fb 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -297,10 +297,10 @@ public: /// \brief Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. - virtual unsigned getIntImmCost(unsigned Opcode, const APInt &Imm, - Type *Ty) const; - virtual unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm, + virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const; + virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const; /// @} /// \name Vector Target Information diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 0dcdd12a409..75d053c6891 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -148,14 +148,14 @@ unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { return PrevTTI->getIntImmCost(Imm, Ty); } -unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, const APInt &Imm, - Type *Ty) const { - return PrevTTI->getIntImmCost(Opcode, Imm, Ty); +unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty); } -unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, const APInt &Imm, - Type *Ty) const { - return PrevTTI->getIntImmCost(IID, Imm, Ty); +unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty); } unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { @@ -539,12 +539,12 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { return TCC_Basic; } - unsigned getIntImmCost(unsigned Opcode, const APInt &Imm, + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override { return TCC_Free; } - unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm, + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const override { return TCC_Free; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 1a0208c1a52..87a5dd6536b 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -103,9 +103,9 @@ public: unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; - unsigned getIntImmCost(unsigned Opcode, const APInt &Imm, + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override; - unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm, + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const override; /// @} @@ -776,6 +776,9 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { if (BitSize == 0) return ~0U; + if (Imm == 0) + return TCC_Free; + if (Imm.getBitWidth() <= 64 && (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue()))) return TCC_Basic; @@ -783,7 +786,7 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { return 2 * TCC_Basic; } -unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm, +unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); @@ -791,7 +794,15 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm, if (BitSize == 0) return ~0U; + unsigned ImmIdx = ~0U; switch (Opcode) { + default: return TCC_Free; + case Instruction::GetElementPtr: + if (Idx != 0) + return TCC_Free; + case Instruction::Store: + ImmIdx = 0; + break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -806,28 +817,31 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm, case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: - if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) - return TCC_Free; - else - return X86TTI::getIntImmCost(Imm, Ty); + ImmIdx = 1; + break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::BitCast: + case Instruction::PHI: case Instruction::Call: case Instruction::Select: case Instruction::Ret: case Instruction::Load: - case Instruction::Store: - return X86TTI::getIntImmCost(Imm, Ty); + break; } - return TargetTransformInfo::getIntImmCost(Opcode, Imm, Ty); + + if ((Idx == ImmIdx) && + Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) + return TCC_Free; + + return X86TTI::getIntImmCost(Imm, Ty); } -unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm, - Type *Ty) const { +unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -835,21 +849,24 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm, return ~0U; switch (IID) { - default: return TargetTransformInfo::getIntImmCost(IID, Imm, Ty); + default: return TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: - if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) + if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) return TCC_Free; else return X86TTI::getIntImmCost(Imm, Ty); case Intrinsic::experimental_stackmap: + if (Idx < 2) + return TCC_Free; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - if (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())) + if ((Idx < 4 ) || + (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TCC_Free; else return X86TTI::getIntImmCost(Imm, Ty); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 89df2b496f3..fc5917b8f37 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -29,7 +29,7 @@ // certain transformations on them, which would create a new expensive constant. // // This optimization is only applied to integer constants in instructions and -// simple (this means not nested) constant cast experessions. For example: +// simple (this means not nested) constant cast expressions. For example: // %0 = load i64* inttoptr (i64 big_constant to i64*) //===----------------------------------------------------------------------===// @@ -66,7 +66,7 @@ struct ConstantUser { ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { } }; -/// \brief Keeps track of a constant candidate and its usees. +/// \brief Keeps track of a constant candidate and its uses. struct ConstantCandidate { ConstantUseListType Uses; ConstantInt *ConstInt; @@ -292,7 +292,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const { /// \brief Record constant integer ConstInt for instruction Inst at operand /// index Idx. /// -/// The operand at index Idx is not necessarily the constant inetger itself. It +/// The operand at index Idx is not necessarily the constant integer itself. It /// could also be a cast instruction or a constant expression that uses the // constant integer. void ConstantHoisting::collectConstantCandidates(Instruction *Inst, @@ -300,12 +300,12 @@ void ConstantHoisting::collectConstantCandidates(Instruction *Inst, ConstantInt *ConstInt) { unsigned Cost; // Ask the target about the cost of materializing the constant for the given - // instruction. + // instruction and operand index. if (auto IntrInst = dyn_cast(Inst)) - Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), + Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx, ConstInt->getValue(), ConstInt->getType()); else - Cost = TTI->getIntImmCost(Inst->getOpcode(), ConstInt->getValue(), + Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType()); // Ignore cheap integer constants. @@ -582,7 +582,7 @@ bool ConstantHoisting::optimizeConstants(Function &Fn) { if (ConstantVec.empty()) return false; - // Finally hoist the base constant and emit materializating code for dependent + // Finally hoist the base constant and emit materialization code for dependent // constants. bool MadeChange = emitBaseConstants(); diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll index d4a7ac7da12..8ea3c53de41 100644 --- a/test/CodeGen/X86/lsr-interesting-step.ll +++ b/test/CodeGen/X86/lsr-interesting-step.ll @@ -3,26 +3,24 @@ ; The inner loop should require only one add (and no leas either). ; rdar://8100380 -; CHECK: BB0_3: -; CHECK-NEXT: movb $0, flags(%rdx) -; CHECK-NEXT: addq %rax, %rdx -; CHECK-NEXT: cmpq $8192, %rdx +; CHECK: BB0_2: +; CHECK-NEXT: movb $0, flags(%rcx) +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: cmpq $8192, %rcx ; CHECK-NEXT: jl @flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1] define void @foo() nounwind { entry: - %tmp = icmp slt i64 2, 8192 ; [#uses=1] - br i1 %tmp, label %bb, label %bb21 + br label %bb bb: ; preds = %entry br label %bb7 bb7: ; preds = %bb, %bb17 %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ] ; [#uses=2] - %tmp9 = icmp slt i64 2, 8192 ; [#uses=1] - br i1 %tmp9, label %bb10, label %bb17 + br label %bb10 bb10: ; preds = %bb7 br label %bb11 diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll index 92850f22eaa..c961bd091b9 100644 --- a/test/CodeGen/X86/negate-add-zero.ll +++ b/test/CodeGen/X86/negate-add-zero.ll @@ -827,9 +827,7 @@ declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13 declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*) define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector"* %velv) { -entry: - %0 = add i32 0, -1 ; [#uses=1] - %1 = getelementptr double* null, i32 %0 ; [#uses=1] + %1 = getelementptr double* null, i32 -1 ; [#uses=1] %2 = load double* %1, align 8 ; [#uses=1] %3 = load double* null, align 8 ; [#uses=2] %4 = load double* null, align 8 ; [#uses=2] @@ -890,13 +888,12 @@ entry: store double %52, double* %55, align 8 %56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2 ; [#uses=1] store double %53, double* %56, align 8 - %57 = add i32 0, 4 ; [#uses=1] - %58 = getelementptr %"struct.SubVector >"* null, i32 0, i32 0 ; <%"struct.CDSVector"**> [#uses=1] - store %"struct.CDSVector"* %velv, %"struct.CDSVector"** %58, align 8 - %59 = getelementptr %"struct.SubVector >"* null, i32 0, i32 1 ; [#uses=1] - store i32 %57, i32* %59, align 4 - %60 = getelementptr %"struct.SubVector >"* null, i32 0, i32 2 ; [#uses=1] - store i32 3, i32* %60, align 8 + %57 = getelementptr %"struct.SubVector >"* null, i32 0, i32 0 ; <%"struct.CDSVector"**> [#uses=1] + store %"struct.CDSVector"* %velv, %"struct.CDSVector"** %57, align 8 + %58 = getelementptr %"struct.SubVector >"* null, i32 0, i32 1 ; [#uses=1] + store i32 4, i32* %58, align 4 + %59 = getelementptr %"struct.SubVector >"* null, i32 0, i32 2 ; [#uses=1] + store i32 3, i32* %59, align 8 unreachable } diff --git a/test/Transforms/ConstantHoisting/X86/phi.ll b/test/Transforms/ConstantHoisting/X86/phi.ll index e63c06e91a2..7134723f61a 100644 --- a/test/Transforms/ConstantHoisting/X86/phi.ll +++ b/test/Transforms/ConstantHoisting/X86/phi.ll @@ -20,10 +20,10 @@ return: ; CHECK-LABEL: @test1 ; CHECK: if.end: -; CHECK: %const_mat = add i64 %const, 1 -; CHECK-NEXT: %1 = inttoptr i64 %const_mat to i8* +; CHECK: %2 = inttoptr i64 %const to i8* +; CHECK-NEXT: br ; CHECK: return: -; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ inttoptr (i64 68719476736 to i8*), %if.end ] +; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ] } define void @test2(i1 %cmp, i64** %tmp) {