diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 012ff8ad833..499571fcac6 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -204,20 +204,23 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, std::pair LT = TLI->getTypeLegalizationCost(Ty); + bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); + unsigned OpCost = (IsFloat ? 2 : 1); + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. // If the type is split to multiple registers, assume that thre is some // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; + return LT.first * 2 * OpCost; + return LT.first * 1 * OpCost; } if (!TLI->isOperationExpand(ISD, LT.second)) { // If the operation is custom lowered then assume // thare the code is twice as expensive. - return LT.first * 2; + return LT.first * 2 * OpCost; } // Else, assume that we need to scalarize this op. @@ -230,7 +233,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, } // We don't know anything about this scalar instruction. - return 1; + return OpCost; } unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index 85b442533f4..92f5a1ec3a0 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -66,9 +66,9 @@ define void @avx2mull() { ; CHECK: fmul define i32 @fmul(i32 %arg) { - ;CHECK: cost of 1 {{.*}} fmul + ;CHECK: cost of 2 {{.*}} fmul %A = fmul <4 x float> undef, undef - ;CHECK: cost of 1 {{.*}} fmul + ;CHECK: cost of 2 {{.*}} fmul %B = fmul <8 x float> undef, undef ret i32 undef } diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll index 493f23b0985..bbf565d1cc7 100644 --- a/test/Transforms/BBVectorize/X86/loop1.ll +++ b/test/Transforms/BBVectorize/X86/loop1.ll @@ -34,7 +34,15 @@ for.body: ; preds = %for.body, %entry %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 10 br i1 %exitcond, label %for.end, label %for.body -; CHECK-NOT: <2 x double> +; CHECK: insertelement +; CHECK-NEXT: insertelement +; CHECK-NEXT: fadd <2 x double> +; CHECK-NEXT: insertelement +; CHECK-NEXT: insertelement +; CHECK-NEXT: fadd <2 x double> +; CHECK-NEXT: insertelement +; CHECK-NEXT: fmul <2 x double> + ; CHECK-UNRL: %mul = fmul <2 x double> %2, %2 ; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3 ; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3 diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll index 0113e38bb1c..8abfa5f8bd2 100644 --- a/test/Transforms/BBVectorize/X86/simple.ll +++ b/test/Transforms/BBVectorize/X86/simple.ll @@ -12,7 +12,11 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) { %R = fmul double %Z1, %Z2 ret double %R ; CHECK: @test1 -; CHECK-NOT: fmul <2 x double> +; CHECK: fsub <2 x double> +; CHECK: fmul <2 x double> +; CHECK: fadd <2 x double> +; CHECK: extract +; CHECK: extract ; CHECK: ret double %R } @@ -63,7 +67,12 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { %R = fmul double %Z1, %Z2 ret double %R ; CHECK: @test2 -; CHECK-NOT: fmul <2 x double> +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: fsub <2 x double> +; CHECK: fmul <2 x double> ; CHECK: ret double %R } @@ -80,7 +89,15 @@ define double @test4(double %A1, double %A2, double %B1, double %B2) { %R = fmul double %Z1, %Z2 ret double %R ; CHECK: @test4 -; CHECK-NOT: fmul <2 x double> +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: fsub <2 x double> +; CHECK: fmul <2 x double> +; CHECK: insertelement +; CHECK: insertelement +; CHECK: fadd <2 x double> ; CHECK: ret double %R }