From 961d666be4f3714452ff9f15470f9255d85f0506 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Sat, 7 Apr 2012 20:04:00 +0000 Subject: [PATCH] Convert floating point division by a constant into multiplication by the reciprocal if converting to the reciprocal is exact. Do it even if inexact if -ffast-math. This substantially speeds up ac.f90 from the polyhedron benchmarks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154265 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 ++++++++++ test/CodeGen/ARM/vdiv_combine.ll | 12 ++++----- test/CodeGen/X86/fdiv.ll | 32 ++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/X86/fdiv.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e03668777d8..e0fd3abfb45 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5725,6 +5725,19 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + // fold (fdiv X, c2) -> fmul X, 1/c2 if there is no precision loss or if + // losing precision is acceptable. + if (N1CFP && VT != MVT::ppcf128) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is not too horrible (eg not NaN). + if (st == APFloat::opOK || (st == APFloat::opInexact && + DAG.getTarget().Options.UnsafeFPMath)) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + DAG.getConstantFP(Recip, VT)); + } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll index 13873932abd..7fddbed1ed5 100644 --- a/test/CodeGen/ARM/vdiv_combine.ll +++ b/test/CodeGen/ARM/vdiv_combine.ll @@ -8,7 +8,7 @@ declare void @foo_int32x4_t(<4 x i32>) ; Test signed conversion. ; CHECK: t1 -; CHECK-NOT: vdiv +; CHECK-NOT: {{vdiv|vmul}} define void @t1() nounwind { entry: %tmp = load i32* @iin, align 4, !tbaa !3 @@ -24,7 +24,7 @@ declare void @foo_float32x2_t(<2 x float>) ; Test unsigned conversion. ; CHECK: t2 -; CHECK-NOT: vdiv +; CHECK-NOT: {{vdiv|vmul}} define void @t2() nounwind { entry: %tmp = load i32* @uin, align 4, !tbaa !3 @@ -38,7 +38,7 @@ entry: ; Test which should not fold due to non-power of 2. ; CHECK: t3 -; CHECK: vdiv +; CHECK: {{vdiv|vmul}} define void @t3() nounwind { entry: %tmp = load i32* @iin, align 4, !tbaa !3 @@ -52,7 +52,7 @@ entry: ; Test which should not fold due to power of 2 out of range. ; CHECK: t4 -; CHECK: vdiv +; CHECK: {{vdiv|vmul}} define void @t4() nounwind { entry: %tmp = load i32* @iin, align 4, !tbaa !3 @@ -66,7 +66,7 @@ entry: ; Test case where const is max power of 2 (i.e., 2^32). ; CHECK: t5 -; CHECK-NOT: vdiv +; CHECK-NOT: {{vdiv|vmul}} define void @t5() nounwind { entry: %tmp = load i32* @iin, align 4, !tbaa !3 @@ -80,7 +80,7 @@ entry: ; Test quadword. ; CHECK: t6 -; CHECK-NOT: vdiv +; CHECK-NOT: {{vdiv|vmul}} define void @t6() nounwind { entry: %tmp = load i32* @iin, align 4, !tbaa !3 diff --git a/test/CodeGen/X86/fdiv.ll b/test/CodeGen/X86/fdiv.ll new file mode 100644 index 00000000000..553f14efa13 --- /dev/null +++ b/test/CodeGen/X86/fdiv.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck -check-prefix=UNSAFE %s + +define double @exact(double %x) { +; Exact division by a constant always converted to multiplication. +; CHECK: @exact +; CHECK: mulsd +; UNSAFE: @exact +; UNSAFE: mulsd + %div = fdiv double %x, 2.0 + ret double %div +} + +define double @inexact(double %x) { +; Inexact division by a constant converted to multiplication if unsafe-math. +; CHECK: @inexact +; CHECK: divsd +; UNSAFE: @inexact +; UNSAFE: mulsd + %div = fdiv double %x, 0x41DFFFFFFFC00000 + ret double %div +} + +define double @funky(double %x) { +; No conversion to multiplication if too funky. +; CHECK: @funky +; CHECK: divsd +; UNSAFE: @funky +; UNSAFE: divsd + %div = fdiv double %x, 0.0 + ret double %div +}