Fix for PR2164: allow transforming arbitrary-width unsigned divides into

multiplies.

Some more cleverness would be nice, though. It would be nice if we 
could do this transformation on illegal types.  Also, we would 
prefer a narrower constant when possible so that we can use a narrower
multiply, which can be cheaper.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60283 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eli Friedman 2008-11-30 06:02:26 +00:00
parent 3f93df5733
commit 201c9776bd
3 changed files with 77 additions and 97 deletions

View File

@ -2221,16 +2221,61 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
return true;
}
// Magic for divide replacement
struct ms {
int64_t m; // magic number
int64_t s; // shift amount
struct mu {
APInt m; // magic number
bool a; // add indicator
uint32_t s; // shift amount
};
struct mu {
uint64_t m; // magic number
int64_t a; // add indicator
/// magicu - calculate the magic numbers required to codegen an integer udiv as
/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
static mu magicu(const APInt& d) {
unsigned p;
APInt nc, delta, q1, r1, q2, r2;
struct mu magu;
magu.a = 0; // initialize "add" indicator
APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
nc = allOnes - (-d).urem(d);
p = d.getBitWidth() - 1; // initialize p
q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
do {
p = p + 1;
if (r1.uge(nc - r1)) {
q1 = q1 + q1 + 1; // update q1
r1 = r1 + r1 - nc; // update r1
}
else {
q1 = q1+q1; // update q1
r1 = r1+r1; // update r1
}
if ((r2 + 1).uge(d - r2)) {
if (q2.uge(signedMax)) magu.a = 1;
q2 = q2+q2 + 1; // update q2
r2 = r2+r2 + 1 - d; // update r2
}
else {
if (q2.uge(signedMin)) magu.a = 1;
q2 = q2+q2; // update q2
r2 = r2+r2 + 1; // update r2
}
delta = d - 1 - r2;
} while (p < d.getBitWidth()*2 &&
(q1.ult(delta) || (q1 == delta && r1 == 0)));
magu.m = q2 + 1; // resulting magic number
magu.s = p - d.getBitWidth(); // resulting shift
return magu;
}
// Magic for divide replacement
// FIXME: This should be APInt'ified
struct ms {
int64_t m; // magic number
int64_t s; // shift amount
};
@ -2274,46 +2319,6 @@ static ms magic32(int32_t d) {
return mag;
}
/// magicu - calculate the magic numbers required to codegen an integer udiv as
/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
static mu magicu32(uint32_t d) {
int32_t p;
uint32_t nc, delta, q1, r1, q2, r2;
struct mu magu;
magu.a = 0; // initialize "add" indicator
nc = - 1 - (-d)%d;
p = 31; // initialize p
q1 = 0x80000000/nc; // initialize q1 = 2p/nc
r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc)
q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d
r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d)
do {
p = p + 1;
if (r1 >= nc - r1 ) {
q1 = 2*q1 + 1; // update q1
r1 = 2*r1 - nc; // update r1
}
else {
q1 = 2*q1; // update q1
r1 = 2*r1; // update r1
}
if (r2 + 1 >= d - r2) {
if (q2 >= 0x7FFFFFFF) magu.a = 1;
q2 = 2*q2 + 1; // update q2
r2 = 2*r2 + 1 - d; // update r2
}
else {
if (q2 >= 0x80000000) magu.a = 1;
q2 = 2*q2; // update q2
r2 = 2*r2 + 1; // update r2
}
delta = d - 1 - r2;
} while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
magu.m = q2 + 1; // resulting magic number
magu.s = p - 32; // resulting shift
return magu;
}
/// magic - calculate the magic numbers required to codegen an integer sdiv as
/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
/// or -1.
@ -2354,47 +2359,6 @@ static ms magic64(int64_t d) {
return mag;
}
/// magicu - calculate the magic numbers required to codegen an integer udiv as
/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
static mu magicu64(uint64_t d)
{
int64_t p;
uint64_t nc, delta, q1, r1, q2, r2;
struct mu magu;
magu.a = 0; // initialize "add" indicator
nc = - 1 - (-d)%d;
p = 63; // initialize p
q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc
r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc)
q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d
r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d)
do {
p = p + 1;
if (r1 >= nc - r1 ) {
q1 = 2*q1 + 1; // update q1
r1 = 2*r1 - nc; // update r1
}
else {
q1 = 2*q1; // update q1
r1 = 2*r1; // update r1
}
if (r2 + 1 >= d - r2) {
if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
q2 = 2*q2 + 1; // update q2
r2 = 2*r2 + 1 - d; // update r2
}
else {
if (q2 >= 0x8000000000000000ull) magu.a = 1;
q2 = 2*q2; // update q2
r2 = 2*r2 + 1; // update r2
}
delta = d - 1 - r2;
} while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
magu.m = q2 + 1; // resulting magic number
magu.s = p - 64; // resulting shift
return magu;
}
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
@ -2456,15 +2420,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
std::vector<SDNode*>* Created) const {
MVT VT = N->getValueType(0);
// Check to see if we can do this.
if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
return SDValue(); // BuildUDIV only operates on i32 or i64
uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT))
return SDValue();
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
mu magics = magicu(N1C->getAPIntValue());
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
if (isOperationLegal(ISD::MULHU, VT))
Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
@ -2479,6 +2447,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
Created->push_back(Q.getNode());
if (magics.a == 0) {
assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
"We shouldn't generate an undefined shift!");
return DAG.getNode(ISD::SRL, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy()));
} else {

View File

@ -0,0 +1,10 @@
; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1
; ModuleID = '<stdin>'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i686-pc-linux-gnu"
define zeroext i16 @a(i16 zeroext %x) nounwind {
entry:
%div = udiv i16 %x, 33 ; <i32> [#uses=1]
ret i16 %div
}

View File

@ -1,4 +1,4 @@
; RUN: llvm-as < %s | llc -march=x86 | not grep mul
; RUN: llvm-as < %s | llc -march=x86 | grep 111
define i8 @foo(i8 %tmp325) {
%t546 = urem i8 %tmp325, 37