From 2a46e4c020f3daf69b044b1e0c62ed6676036b49 Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze@braunis.de>
Date: Tue, 14 Jul 2015 02:08:26 +0000
Subject: [PATCH] LegalizeDAG: Fix and improve FCOPYSIGN/FABS legalization

- Factor out code to query and modify the sign bit of a floatingpoint
  value as an integer. This also works if none of the targets integer
  types is big enough to hold all bits of the floatingpoint value.

- Legalize FABS(x) as FCOPYSIGN(x, 0.0) if FCOPYSIGN is available,
  otherwise perform bit manipulation on the sign bit. The previous code
  used "x >u 0 ? x : -x" which is incorrect for x being -0.0! It also
  takes 34 instructions on ARM Cortex-M4. With this patch we only
  require 5:
    vldr d0, LCPI0_0
    vmov r2, r3, d0
    lsrs r2, r3, #31
    bfi r1, r2, #31, #1
    bx lr
  (This could be further improved if the compiler would recognize that
   r2, r3 is zero).

- Only lower FCOPYSIGN(x, y) = sign(x) ? -FABS(x) : FABS(x) if FABS is
  available otherwise perform bit manipulation on the sign bit.

- Perform the sign(x) test by masking out the sign bit and comparing
  with 0 rather than shifting the sign bit to the highest position and
  testing for "<s 0". For x86 copysignl (on 80bit values) this gets us:
    testl $32768, %eax
  rather than:
    shlq $48, %rax
    sets %al
    testb %al, %al

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242107 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp      | 222 ++++++++++++------
 .../CodeGen/Thumb2/float-intrinsics-double.ll |   9 +-
 test/CodeGen/X86/pr13577.ll                   |   4 +-
 3 files changed, 156 insertions(+), 79 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 21ab07234c8..d5f8d0acef7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -39,6 +39,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "legalizedag"
 
+namespace {
+
+struct FloatSignAsInt;
+
 //===----------------------------------------------------------------------===//
 /// This takes an arbitrary SelectionDAG as input and
 /// hacks on it until the target machine can handle it.  This involves
@@ -51,7 +55,6 @@ using namespace llvm;
 /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
 /// will attempt merge setcc and brc instructions into brcc's.
 ///
-namespace {
 class SelectionDAGLegalize {
   const TargetMachine &TM;
   const TargetLowering &TLI;
@@ -130,7 +133,11 @@ private:
   SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
   void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
                                 SmallVectorImpl<SDValue> &Results);
-  SDValue ExpandFCOPYSIGN(SDNode *Node);
+  void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const;
+  SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL,
+                          SDValue NewIntValue) const;
+  SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+  SDValue ExpandFABS(SDNode *Node) const;
   SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
                                SDLoc dl);
   SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
@@ -1568,69 +1575,147 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
                      false, false, false, 0);
 }
 
-SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
-  SDLoc dl(Node);
-  SDValue Tmp1 = Node->getOperand(0);
-  SDValue Tmp2 = Node->getOperand(1);
+namespace {
+/// Keeps track of state when getting the sign of a floatingpoint value as an
+/// integer.
+struct FloatSignAsInt {
+  EVT FloatVT;
+  SDValue Chain;
+  SDValue FloatPtr;
+  SDValue IntPtr;
+  MachinePointerInfo IntPointerInfo;
+  MachinePointerInfo FloatPointerInfo;
+  SDValue IntValue;
+  APInt SignMask;
+};
+}
 
-  // Get the sign bit of the RHS.  First obtain a value that has the same
-  // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
-  SDValue SignBit;
-  EVT FloatVT = Tmp2.getValueType();
-  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+/// Bitcast a floatingpoint value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floatingpoint value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+                                             SDLoc DL, SDValue Value) const {
+  EVT FloatVT = Value.getValueType();
+  unsigned NumBits = FloatVT.getSizeInBits();
+  State.FloatVT = FloatVT;
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+  // Convert to an integer of the same size.
   if (TLI.isTypeLegal(IVT)) {
-    // Convert to an integer with the same sign bit.
-    SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
-  } else {
-    auto &DL = DAG.getDataLayout();
-    // Store the float to memory, then load the sign part out as an integer.
-    MVT LoadTy = TLI.getPointerTy(DL);
-    // First create a temporary that is aligned for both the load and store.
-    SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
-    // Then store the float to it.
-    SDValue Ch =
-      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
-                   false, false, 0);
-    if (DL.isBigEndian()) {
-      assert(FloatVT.isByteSized() && "Unsupported floating point type!");
-      // Load out a legal integer with the same sign bit as the float.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
-                            false, false, false, 0);
-    } else { // Little endian
-      SDValue LoadPtr = StackPtr;
-      // The float may be wider than the integer we are going to load.  Advance
-      // the pointer so that the loaded integer will contain the sign bit.
-      unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
-      unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
-      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
-                           DAG.getConstant(ByteOffset, dl,
-                                           LoadPtr.getValueType()));
-      // Load a legal integer containing the sign bit.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
-                            false, false, false, 0);
-      // Move the sign bit to the top bit of the loaded integer.
-      unsigned BitShift = LoadTy.getSizeInBits() -
-        (FloatVT.getSizeInBits() - 8 * ByteOffset);
-      assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
-      if (BitShift)
-        SignBit = DAG.getNode(
-            ISD::SHL, dl, LoadTy, SignBit,
-            DAG.getConstant(BitShift, dl,
-                            TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
-    }
+    State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+    State.SignMask = APInt::getSignBit(NumBits);
+    return;
   }
-  // Now get the sign bit proper, by seeing whether the value is negative.
-  SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()),
-                         SignBit,
-                         DAG.getConstant(0, dl, SignBit.getValueType()),
-                         ISD::SETLT);
-  // Get the absolute value of the result.
-  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
-  // Select between the nabs and abs value based on the sign bit of
-  // the input.
-  return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
-                      DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
-                      AbsVal);
+
+  auto &DataLayout = DAG.getDataLayout();
+  // Store the float to memory, then load the sign part out as an integer.
+  MVT LoadTy = TLI.getPointerTy(DataLayout);
+  // First create a temporary that is aligned for both the load and store.
+  SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  // Then store the float to it.
+  State.FloatPtr = StackPtr;
+  State.FloatPointerInfo = MachinePointerInfo::getFixedStack(FI);
+  State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+                             State.FloatPointerInfo, false, false, 0);
+
+  if (DataLayout.isBigEndian()) {
+    assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+    // Load out a legal integer with the same sign bit as the float.
+    State.IntPtr = StackPtr;
+    State.IntPointerInfo = State.FloatPointerInfo;
+    State.IntValue = DAG.getLoad(LoadTy, DL, State.Chain, StackPtr,
+                                 State.IntPointerInfo, false, false, false, 0);
+    State.SignMask = APInt::getSignBit(LoadTy.getSizeInBits());
+  } else { // Little endian
+    // The float may be wider than the integer we are going to load.  Advance
+    // the pointer so that the loaded integer will contain the sign bit.
+    unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+    unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+    SDValue LoadPtr
+      = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
+                    DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+    // Load a legal integer containing the sign bit.
+    State.IntPtr = LoadPtr;
+    State.IntPointerInfo = MachinePointerInfo::getFixedStack(FI, ByteOffset);
+    State.IntValue = DAG.getLoad(LoadTy, DL, State.Chain, LoadPtr,
+                                 State.IntPointerInfo, false, false, false, 0);
+    unsigned NumBits = LoadTy.getSizeInBits();
+    unsigned SignBit = (FloatVT.getSizeInBits()-1) - 8*ByteOffset;
+    State.SignMask = APInt::getOneBitSet(NumBits, SignBit);
+  }
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floatingpoint type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+                                          SDLoc DL, SDValue NewIntValue) const {
+  if (!State.Chain) {
+    return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+  }
+
+  // Override the part containing the sign bit in the value stored on the stack.
+  SDValue Chain = DAG.getStore(State.Chain, DL, NewIntValue, State.IntPtr,
+                               State.IntPointerInfo, false, false, 0);
+  return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+                     State.FloatPointerInfo, false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+  SDLoc DL(Node);
+  SDValue Mag = Node->getOperand(0);
+  SDValue Sign = Node->getOperand(1);
+
+  // Get sign bit into an integer value.
+  FloatSignAsInt SignAsInt;
+  getSignAsIntValue(SignAsInt, DL, Sign);
+
+  EVT IntVT = SignAsInt.IntValue.getValueType();
+  SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+  SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+                                SignMask);
+
+  // Transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) if FABS is legal.
+  EVT FloatVT = Mag.getValueType();
+  if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+      TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+    SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+    SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+    SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+                                DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+    return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+  }
+
+  // Transform values to integer, copy the sign bit and transform back.
+  FloatSignAsInt MagAsInt;
+  getSignAsIntValue(MagAsInt, DL, Mag);
+  assert(SignAsInt.SignMask == MagAsInt.SignMask);
+  SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
+  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
+                                    ClearSignMask);
+  SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
+
+  return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+  SDLoc DL(Node);
+  SDValue Value = Node->getOperand(0);
+
+  // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+  EVT FloatVT = Value.getValueType();
+  if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+    SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+    return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+  }
+
+  // Transform value to integer, clear the sign bit and transform back.
+  FloatSignAsInt ValueAsInt;
+  getSignAsIntValue(ValueAsInt, DL, Value);
+  EVT IntVT = ValueAsInt.IntValue.getValueType();
+  SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+                                    ClearSignMask);
+  return modifySignAsInt(ValueAsInt, DL, ClearedSign);
 }
 
 void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -3312,18 +3397,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                        Node->getOperand(0));
     Results.push_back(Tmp1);
     break;
-  case ISD::FABS: {
-    // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
-    EVT VT = Node->getValueType(0);
-    Tmp1 = Node->getOperand(0);
-    Tmp2 = DAG.getConstantFP(0.0, dl, VT);
-    Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
-                        Tmp1, Tmp2, ISD::SETUGT);
-    Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
-    Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3);
-    Results.push_back(Tmp1);
+  case ISD::FABS:
+    Results.push_back(ExpandFABS(Node));
     break;
-  }
   case ISD::SMIN:
   case ISD::SMAX:
   case ISD::UMIN:
diff --git a/test/CodeGen/Thumb2/float-intrinsics-double.ll b/test/CodeGen/Thumb2/float-intrinsics-double.ll
index 01a23bd0fe6..b34176d41a4 100644
--- a/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -109,9 +109,12 @@ declare double     @llvm.fabs.f64(double %Val)
 define double @abs_d(double %a) {
 ; CHECK-LABEL: abs_d:
 ; NONE: bic r1, r1, #-2147483648
-; SP: bl __aeabi_dcmpgt
-; SP: bl __aeabi_dcmpun
-; SP: bl __aeabi_dsub
+; SP: vldr d1, .LCPI{{.*}}
+; SP: vmov r0, r1, d0
+; SP: vmov r2, r3, d1
+; SP: lsrs r2, r3, #31
+; SP: bfi r1, r2, #31, #1
+; SP: vmov d0, r0, r1
 ; DP: vabs.f64 d0, d0
   %1 = call double @llvm.fabs.f64(double %a)
   ret double %1
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
index 090c7262d6e..13b3665d1a7 100644
--- a/test/CodeGen/X86/pr13577.ll
+++ b/test/CodeGen/X86/pr13577.ll
@@ -7,9 +7,7 @@
 
 ; CHECK-LABEL: foo:
 ; CHECK: movq {{.*}}, %rax
-; CHECK: shlq $48, %rax
-; CHECK: sets %al
-; CHECK: testb %al, %al
+; CHECK: testl $32768, %eax
 ; CHECK: flds LCPI0_0(%rip)
 ; CHECK: flds LCPI0_1(%rip)
 ; CHECK: fcmovne %st(1), %st(0)