diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c8b723a0de2..06bd6ebc545 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7890,11 +7890,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { - // This is a value preserving truncation if both round's are. - bool IsTrunc = N->getConstantOperandVal(1) == 1 && - N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), - DAG.getIntPtrConstant(IsTrunc)); + const bool NIsTrunc = N->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + // If the first fp_round isn't a value preserving truncation, it might + // introduce a tie in the second fp_round, that wouldn't occur in the + // single-step fp_round we want to fold to. + // In other words, double rounding isn't the same as rounding. + // Also, this is a value preserving truncation iff both fp_round's are. + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc)); } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll index 12412d45aa6..d1f49a91d94 100644 --- a/test/CodeGen/AArch64/f16-convert.ll +++ b/test/CodeGen/AArch64/f16-convert.ll @@ -133,7 +133,8 @@ define void @store0(i16* nocapture %a, float %val) nounwind { define void @store1(i16* nocapture %a, double %val) nounwind { ; CHECK-LABEL: store1: -; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: str h0, [x0] ; CHECK-NEXT: ret @@ -158,7 +159,8 @@ define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind { define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind { ; CHECK-LABEL: store3: -; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: str h0, [x0, w1, sxtw #1] ; CHECK-NEXT: ret @@ -184,7 +186,8 @@ define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind { define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind { ; CHECK-LABEL: store5: -; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: str h0, [x0, x1, lsl #1] ; CHECK-NEXT: ret @@ -209,7 +212,8 @@ define void @store6(i16* nocapture %a, float %val) nounwind { define void @store7(i16* nocapture %a, double %val) nounwind { ; CHECK-LABEL: store7: -; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: str h0, [x0, #20] ; CHECK-NEXT: ret @@ -234,7 +238,8 @@ define void @store8(i16* nocapture %a, float %val) nounwind { define void @store9(i16* nocapture %a, double %val) nounwind { ; CHECK-LABEL: store9: -; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: stur h0, [x0, #-20] ; CHECK-NEXT: ret diff --git a/test/CodeGen/X86/fp-double-rounding.ll b/test/CodeGen/X86/fp-double-rounding.ll new file mode 100644 index 00000000000..030cb9a3c01 --- /dev/null +++ b/test/CodeGen/X86/fp-double-rounding.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SAFE +; RUN: llc < %s -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK --check-prefix=UNSAFE + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--" + +; CHECK-LABEL: double_rounding: +; SAFE: callq __trunctfdf2 +; SAFE-NEXT: cvtsd2ss %xmm0 +; UNSAFE: callq __trunctfsf2 +; UNSAFE-NOT: cvt +define void @double_rounding(fp128* %x, float* %f) { +entry: + %0 = load fp128* %x, align 16 + %1 = fptrunc fp128 %0 to double + %2 = fptrunc double %1 to float + store float %2, float* %f, align 4 + ret void +} + +; CHECK-LABEL: double_rounding_precise_first: +; CHECK: fstps (% +; CHECK-NOT: fstpl +define void @double_rounding_precise_first(float* %f) { +entry: + ; Hack, to generate a precise FP_ROUND to double + %precise = call double asm sideeffect "fld %st(0)", "={st(0)}"() + %0 = fptrunc double %precise to float + store float %0, float* %f, align 4 + ret void +}