[Fast-math] Disable "(C1/X)*C2 => (C1*C2)/X" if C1/X has multiple uses.

If "C1/X" were having multiple uses, the only benefit of this
transformation is to potentially shorten critical path. But it is at the
cost of instroducing additional div.

  The additional div may or may not incur cost depending on how div is
implemented. If it is implemented using Newton–Raphson iteration, it dosen't
seem to incur any cost (FIXME). However, if the div blocks the entire
pipeline, that sounds to be pretty expensive. Let CodeGen to take care 
this transformation.

  This patch sees 6% on a benchmark.

rdar://15032743


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191037 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Shuxin Yang 2013-09-19 21:13:46 +00:00
parent bf22298093
commit b1ccfb3a54
2 changed files with 18 additions and 3 deletions

View File

@ -374,9 +374,12 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
} else {
if (C0) {
// (C0 / X) * C => (C0 * C) / X
ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
if (isNormalFp(F))
R = BinaryOperator::CreateFDiv(F, Opnd1);
if (FMulOrDiv->hasOneUse()) {
// It would otherwise introduce another div.
ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
if (isNormalFp(F))
R = BinaryOperator::CreateFDiv(F, Opnd1);
}
} else {
// (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));

View File

@ -202,6 +202,18 @@ define float @fmul2(float %f1) {
; CHECK: fdiv fast float 1.200000e+07, %f1
}
; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
@fmul2_external = external global float
define float @fmul2_disable(float %f1) {
%div = fdiv fast float 1.000000e+00, %f1
store float %div, float* @fmul2_external
%mul = fmul fast float %div, 2.000000e+00
ret float %mul
; CHECK-LABEL: @fmul2_disable
; CHECK: store
; CHECK: fmul fast
}
; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
define float @fmul3(float %f1, float %f2) {
%t1 = fdiv float %f1, 2.0e+3