Invert the TargetLowering flag that controls divide by consant expansion.

Add a new flag to TargetLowering indicating if the target has really cheap signed division by powers of two, make ppc use it. This will probably go away in the future. Implement some more ISD::SDIV folds in the dag combiner Remove now dead code in the x86 backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23853 91177308-0d34-0410-b5e6-96231b3b80d8
2025-03-22 10:36:10 +00:00 · 2005-10-21 00:02:42 +00:00 · 2005-10-21 00:02:42 +00:00 · 405e3ecb56
commit 405e3ecb56
parent 1f347a318c
6 changed files with 68 additions and 76 deletions
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -84,10 +84,14 @@ public:
  /// this target.
  bool isSetCCExpensive() const { return SetCCIsExpensive; }
  
-  /// isIntDivExpensive() - Return true if integer divide is more expensive than
+  /// isIntDivCheap() - Return true if integer divide is usually cheaper than
  /// a sequence of several shifts, adds, and multiplies for this target.
-  bool isIntDivExpensive() const { return IntDivIsExpensive; }
+  bool isIntDivCheap() const { return IntDivIsCheap; }

+  /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
+  /// srl/add/sra.
+  bool isPow2DivCheap() const { return Pow2DivIsCheap; }
+  
  /// getSetCCResultTy - Return the ValueType of the result of setcc operations.
  ///
  MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; }
@ -266,10 +270,15 @@ protected:
  /// setcc operations into other operations if possible.
  void setSetCCIsExpensive() { SetCCIsExpensive = true; }

-  /// setIntDivIsExpensive - Tells the code generator that integer divide is
+  /// setIntDivIsCheap - Tells the code generator that integer divide is
  /// expensive, and if possible, should be replaced by an alternate sequence
  /// of instructions not containing an integer divide.
-  void setIntDivIsExpensive() { IntDivIsExpensive = true; }
+  void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
+  
+  /// setPow2DivIsCheap - Tells the code generator that it shouldn't generate
+  /// srl/add/sra for a signed divide by power of two, and let the target handle
+  /// it.
+  void setPow2DivIsCheap(bool isCheap = true) { Pow2DivIsCheap = isCheap; }
  
  /// addRegisterClass - Add the specified register class as an available
  /// regclass for the specified value type.  This indicates the selector can
@ -400,12 +409,16 @@ private:
  /// setcc operations into other operations if possible.
  bool SetCCIsExpensive;

-  /// IntDivIsExpensive - This is a hack until a real costs model is in place
-  /// that tells the code generator whether integer divide will always be more
-  /// expensive than a sequence of multiplies, shifts, and adds that performs
-  /// the same operation.  If we ever optimize for size, this will be set to
-  /// false unconditionally.
-  bool IntDivIsExpensive;
+  /// IntDivIsCheap - Tells the code generator not to expand integer divides by
+  /// constants into a sequence of muls, adds, and shifts.  This is a hack until
+  /// a real cost model is in place.  If we ever optimize for size, this will be
+  /// set to true unconditionally.
+  bool IntDivIsCheap;
+  
+  /// Pow2DivIsCheap - Tells the code generator that it shouldn't generate
+  /// srl/add/sra for a signed divide by power of two, and let the target handle
+  /// it.
+  bool Pow2DivIsCheap;
  
  /// SetCCResultTy - The type that SetCC operations use.  This defaults to the
  /// PointerTy.
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -745,8 +745,7 @@ SDOperand DAGCombiner::visitMUL(SDNode *N) {
    return N1;
  // fold (mul x, -1) -> 0-x
  if (N1C && N1C->isAllOnesValue())
-    return DAG.getNode(ISD::SUB, N->getValueType(0), 
-                       DAG.getConstant(0, N->getValueType(0)), N0);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
  // fold (mul x, (1 << c)) -> x << c
  if (N1C && isPowerOf2_64(N1C->getValue()))
    return DAG.getNode(ISD::SHL, N->getValueType(0), N0,
@ -777,21 +776,49 @@ SDOperand DAGCombiner::visitSDIV(SDNode *N) {
  if (N0C && N1C && !N1C->isNullValue())
    return DAG.getConstant(N0C->getSignExtended() / N1C->getSignExtended(),
                           N->getValueType(0));
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSignExtended() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
  // If we know the sign bits of both operands are zero, strength reduce to a
  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
  uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
  if (MaskedValueIsZero(N1, SignBit, TLI) &&
      MaskedValueIsZero(N0, SignBit, TLI))
    return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+  // fold (sdiv X, pow2) -> (add (sra X, log(pow2)), (srl X, sizeof(X)-1))
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap() && 
+      (isPowerOf2_64(N1C->getSignExtended()) || 
+       isPowerOf2_64(-N1C->getSignExtended()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDOperand();
+    int64_t pow2 = N1C->getSignExtended();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    SDOperand SRL = DAG.getNode(ISD::SRL, VT, N0,
+                                DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                TLI.getShiftAmountTy()));
+    WorkList.push_back(SRL.Val);
+    SDOperand SGN = DAG.getNode(ISD::ADD, VT, N0, SRL);
+    WorkList.push_back(SGN.Val);
+    SDOperand SRA = DAG.getNode(ISD::SRA, VT, SGN, 
+                                DAG.getConstant(Log2_64(abs2),
+                                                TLI.getShiftAmountTy()));
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+    WorkList.push_back(SRA.Val);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+  }
  // if integer divide is expensive and we satisfy the requirements, emit an
  // alternate sequence.
-  // FIXME: This currently opts out powers of two, since targets can often be
-  // more clever in those cases.  In an idea world, we would have some way to
-  // detect that too.
-  if (N1C && !isPowerOf2_64(N1C->getSignExtended()) && 
-      (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) &&
-      TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT) &&
-      TLI.isIntDivExpensive()) {
+  if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && 
+      !TLI.isIntDivCheap() &&
+      TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT)) {
    return BuildSDIV(N);
  }
  return SDOperand();
@ -815,7 +842,7 @@ SDOperand DAGCombiner::visitUDIV(SDNode *N) {
                                       TLI.getShiftAmountTy()));
  // fold (udiv x, c) -> alternate
  if (N1C && N1C->getValue() && TLI.isOperationLegal(ISD::MULHU, VT) &&
-      TLI.isTypeLegal(VT) && TLI.isIntDivExpensive())
+      TLI.isTypeLegal(VT) && !TLI.isIntDivCheap())
    return BuildUDIV(N);
  return SDOperand();
 }
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -30,6 +30,8 @@ TargetLowering::TargetLowering(TargetMachine &tm)
  maxStoresPerMemSet = maxStoresPerMemCpy = maxStoresPerMemMove = 8;
  allowUnalignedMemoryAccesses = false;
  UseUnderscoreSetJmpLongJmp = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
 }

 TargetLowering::~TargetLowering() {}
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -996,6 +996,11 @@ SDOperand PPCDAGToDAGISel::Select(SDOperand Op) {
    return SDOperand(N, 0);
  }
  case ISD::SDIV: {
+    // FIXME: since this depends on the setting of the carry flag from the srawi
+    //        we should really be making notes about that for the scheduler.
+    // FIXME: It sure would be nice if we could cheaply recognize the 
+    //        srl/add/sra pattern the dag combiner will generate for this as
+    //        sra/addze rather than having to handle sdiv ourselves.  oh well.
    unsigned Imm;
    if (isIntImmediate(N->getOperand(1), Imm)) {
      if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -27,8 +27,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
    
  // Fold away setcc operations if possible.
  setSetCCIsExpensive();
-  // Fold constant integer div/rem into an alternate sequence of instructions  
-  setIntDivIsExpensive();
+  setPow2DivIsCheap();
  
  // Use _setjmp/_longjmp instead of setjmp/longjmp.
  setUseUnderscoreSetJmpLongJmp(true);
--- a/lib/Target/X86/X86ISelPattern.cpp
+++ b/lib/Target/X86/X86ISelPattern.cpp
@ -3035,60 +3035,6 @@ unsigned ISel::SelectExpr(SDOperand N) {
          return Result;
        }
      }
-
-      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-        // FIXME: These special cases should be handled by the lowering impl!
-        unsigned RHS = CN->getValue();
-        bool isNeg = false;
-        if ((int)RHS < 0) {
-          isNeg = true;
-          RHS = -RHS;
-        }
-        if (RHS && (RHS & (RHS-1)) == 0) {   // Signed division by power of 2?
-          unsigned Log = Log2_32(RHS);
-          unsigned SAROpc, SHROpc, ADDOpc, NEGOpc;
-          switch (N.getValueType()) {
-          default: assert("Unknown type to signed divide!");
-          case MVT::i8:
-            SAROpc = X86::SAR8ri;
-            SHROpc = X86::SHR8ri;
-            ADDOpc = X86::ADD8rr;
-            NEGOpc = X86::NEG8r;
-            break;
-          case MVT::i16:
-            SAROpc = X86::SAR16ri;
-            SHROpc = X86::SHR16ri;
-            ADDOpc = X86::ADD16rr;
-            NEGOpc = X86::NEG16r;
-            break;
-          case MVT::i32:
-            SAROpc = X86::SAR32ri;
-            SHROpc = X86::SHR32ri;
-            ADDOpc = X86::ADD32rr;
-            NEGOpc = X86::NEG32r;
-            break;
-          }
-          unsigned RegSize = MVT::getSizeInBits(N.getValueType());
-          Tmp1 = SelectExpr(N.getOperand(0));
-          unsigned TmpReg;
-          if (Log != 1) {
-            TmpReg = MakeReg(N.getValueType());
-            BuildMI(BB, SAROpc, 2, TmpReg).addReg(Tmp1).addImm(Log-1);
-          } else {
-            TmpReg = Tmp1;
-          }
-          unsigned TmpReg2 = MakeReg(N.getValueType());
-          BuildMI(BB, SHROpc, 2, TmpReg2).addReg(TmpReg).addImm(RegSize-Log);
-          unsigned TmpReg3 = MakeReg(N.getValueType());
-          BuildMI(BB, ADDOpc, 2, TmpReg3).addReg(Tmp1).addReg(TmpReg2);
-
-          unsigned TmpReg4 = isNeg ? MakeReg(N.getValueType()) : Result;
-          BuildMI(BB, SAROpc, 2, TmpReg4).addReg(TmpReg3).addImm(Log);
-          if (isNeg)
-            BuildMI(BB, NEGOpc, 1, Result).addReg(TmpReg4);
-          return Result;
-        }
-      }
    }

    if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {