Fix for PR2164: allow transforming arbitrary-width unsigned divides into

multiplies. Some more cleverness would be nice, though. It would be nice if we could do this transformation on illegal types. Also, we would prefer a narrower constant when possible so that we can use a narrower multiply, which can be cheaper. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60283 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-09 16:45:03 +00:00 · 2008-11-30 06:02:26 +00:00 · 2008-11-30 06:02:26 +00:00 · 201c9776bd
commit 201c9776bd
parent 3f93df5733
3 changed files with 77 additions and 97 deletions
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -2221,16 +2221,61 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
  return true;
 }

-// Magic for divide replacement
-
-struct ms {
-  int64_t m;  // magic number
-  int64_t s;  // shift amount
+struct mu {
+  APInt m;     // magic number
+  bool a;      // add indicator
+  uint32_t s;  // shift amount
 };

-struct mu {
-  uint64_t m; // magic number
-  int64_t a;  // add indicator
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu(const APInt& d) {
+  unsigned p;
+  APInt nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
+  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+  nc = allOnes - (-d).urem(d);
+  p = d.getBitWidth() - 1;  // initialize p
+  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
+  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
+  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
+  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1.uge(nc - r1)) {
+      q1 = q1 + q1 + 1;  // update q1
+      r1 = r1 + r1 - nc; // update r1
+    }
+    else {
+      q1 = q1+q1; // update q1
+      r1 = r1+r1; // update r1
+    }
+    if ((r2 + 1).uge(d - r2)) {
+      if (q2.uge(signedMax)) magu.a = 1;
+      q2 = q2+q2 + 1;     // update q2
+      r2 = r2+r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2.uge(signedMin)) magu.a = 1;
+      q2 = q2+q2;     // update q2
+      r2 = r2+r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < d.getBitWidth()*2 &&
+           (q1.ult(delta) || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - d.getBitWidth();  // resulting shift
+  return magu;
+}
+
+// Magic for divide replacement
+// FIXME: This should be APInt'ified
+struct ms {
+  int64_t m;  // magic number
  int64_t s;  // shift amount
 };

@ -2274,46 +2319,6 @@ static ms magic32(int32_t d) {
  return mag;
 }

-/// magicu - calculate the magic numbers required to codegen an integer udiv as
-/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
-static mu magicu32(uint32_t d) {
-  int32_t p;
-  uint32_t nc, delta, q1, r1, q2, r2;
-  struct mu magu;
-  magu.a = 0;               // initialize "add" indicator
-  nc = - 1 - (-d)%d;
-  p = 31;                   // initialize p
-  q1 = 0x80000000/nc;       // initialize q1 = 2p/nc
-  r1 = 0x80000000 - q1*nc;  // initialize r1 = rem(2p,nc)
-  q2 = 0x7FFFFFFF/d;        // initialize q2 = (2p-1)/d
-  r2 = 0x7FFFFFFF - q2*d;   // initialize r2 = rem((2p-1),d)
-  do {
-    p = p + 1;
-    if (r1 >= nc - r1 ) {
-      q1 = 2*q1 + 1;  // update q1
-      r1 = 2*r1 - nc; // update r1
-    }
-    else {
-      q1 = 2*q1; // update q1
-      r1 = 2*r1; // update r1
-    }
-    if (r2 + 1 >= d - r2) {
-      if (q2 >= 0x7FFFFFFF) magu.a = 1;
-      q2 = 2*q2 + 1;     // update q2
-      r2 = 2*r2 + 1 - d; // update r2
-    }
-    else {
-      if (q2 >= 0x80000000) magu.a = 1;
-      q2 = 2*q2;     // update q2
-      r2 = 2*r2 + 1; // update r2
-    }
-    delta = d - 1 - r2;
-  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
-  magu.m = q2 + 1; // resulting magic number
-  magu.s = p - 32;  // resulting shift
-  return magu;
-}
-
 /// magic - calculate the magic numbers required to codegen an integer sdiv as
 /// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
 /// or -1.
@ -2354,47 +2359,6 @@ static ms magic64(int64_t d) {
  return mag;
 }

-/// magicu - calculate the magic numbers required to codegen an integer udiv as
-/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
-static mu magicu64(uint64_t d)
-{
-  int64_t p;
-  uint64_t nc, delta, q1, r1, q2, r2;
-  struct mu magu;
-  magu.a = 0;               // initialize "add" indicator
-  nc = - 1 - (-d)%d;
-  p = 63;                   // initialize p
-  q1 = 0x8000000000000000ull/nc;       // initialize q1 = 2p/nc
-  r1 = 0x8000000000000000ull - q1*nc;  // initialize r1 = rem(2p,nc)
-  q2 = 0x7FFFFFFFFFFFFFFFull/d;        // initialize q2 = (2p-1)/d
-  r2 = 0x7FFFFFFFFFFFFFFFull - q2*d;   // initialize r2 = rem((2p-1),d)
-  do {
-    p = p + 1;
-    if (r1 >= nc - r1 ) {
-      q1 = 2*q1 + 1;  // update q1
-      r1 = 2*r1 - nc; // update r1
-    }
-    else {
-      q1 = 2*q1; // update q1
-      r1 = 2*r1; // update r1
-    }
-    if (r2 + 1 >= d - r2) {
-      if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
-      q2 = 2*q2 + 1;     // update q2
-      r2 = 2*r2 + 1 - d; // update r2
-    }
-    else {
-      if (q2 >= 0x8000000000000000ull) magu.a = 1;
-      q2 = 2*q2;     // update q2
-      r2 = 2*r2 + 1; // update r2
-    }
-    delta = d - 1 - r2;
-  } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
-  magu.m = q2 + 1; // resulting magic number
-  magu.s = p - 64;  // resulting shift
-  return magu;
-}
-
 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
@ -2456,15 +2420,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
                                  std::vector<SDNode*>* Created) const {
  MVT VT = N->getValueType(0);
-  
+
  // Check to see if we can do this.
-  if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
-    return SDValue();       // BuildUDIV only operates on i32 or i64
-  
-  uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-  mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
-  
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
+  // FIXME: We should use a narrower constant when the upper
+  // bits are known to be zero.
+  ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
+  mu magics = magicu(N1C->getAPIntValue());
+
  // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
  SDValue Q;
  if (isOperationLegal(ISD::MULHU, VT))
    Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
@ -2479,6 +2447,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
    Created->push_back(Q.getNode());

  if (magics.a == 0) {
+    assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+           "We shouldn't generate an undefined shift!");
    return DAG.getNode(ISD::SRL, VT, Q, 
                       DAG.getConstant(magics.s, getShiftAmountTy()));
  } else {
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
@ -0,0 +1,10 @@
+; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @a(i16 zeroext %x) nounwind {
+entry:
+	%div = udiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+}
--- a/test/CodeGen/X86/urem-i8-constant.ll
+++ b/test/CodeGen/X86/urem-i8-constant.ll
@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mul
+; RUN: llvm-as < %s | llc -march=x86 | grep 111

 define i8 @foo(i8 %tmp325) {
 	%t546 = urem i8 %tmp325, 37