mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 06:32:09 +00:00
Fix for PR2164: allow transforming arbitrary-width unsigned divides into
multiplies. Some more cleverness would be nice, though. It would be nice if we could do this transformation on illegal types. Also, we would prefer a narrower constant when possible so that we can use a narrower multiply, which can be cheaper. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60283 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3f93df5733
commit
201c9776bd
@ -2221,16 +2221,61 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Magic for divide replacement
|
||||
|
||||
struct ms {
|
||||
int64_t m; // magic number
|
||||
int64_t s; // shift amount
|
||||
struct mu {
|
||||
APInt m; // magic number
|
||||
bool a; // add indicator
|
||||
uint32_t s; // shift amount
|
||||
};
|
||||
|
||||
struct mu {
|
||||
uint64_t m; // magic number
|
||||
int64_t a; // add indicator
|
||||
/// magicu - calculate the magic numbers required to codegen an integer udiv as
|
||||
/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
|
||||
static mu magicu(const APInt& d) {
|
||||
unsigned p;
|
||||
APInt nc, delta, q1, r1, q2, r2;
|
||||
struct mu magu;
|
||||
magu.a = 0; // initialize "add" indicator
|
||||
APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
|
||||
APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
|
||||
APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
|
||||
|
||||
nc = allOnes - (-d).urem(d);
|
||||
p = d.getBitWidth() - 1; // initialize p
|
||||
q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
|
||||
r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
|
||||
q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
|
||||
r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
|
||||
do {
|
||||
p = p + 1;
|
||||
if (r1.uge(nc - r1)) {
|
||||
q1 = q1 + q1 + 1; // update q1
|
||||
r1 = r1 + r1 - nc; // update r1
|
||||
}
|
||||
else {
|
||||
q1 = q1+q1; // update q1
|
||||
r1 = r1+r1; // update r1
|
||||
}
|
||||
if ((r2 + 1).uge(d - r2)) {
|
||||
if (q2.uge(signedMax)) magu.a = 1;
|
||||
q2 = q2+q2 + 1; // update q2
|
||||
r2 = r2+r2 + 1 - d; // update r2
|
||||
}
|
||||
else {
|
||||
if (q2.uge(signedMin)) magu.a = 1;
|
||||
q2 = q2+q2; // update q2
|
||||
r2 = r2+r2 + 1; // update r2
|
||||
}
|
||||
delta = d - 1 - r2;
|
||||
} while (p < d.getBitWidth()*2 &&
|
||||
(q1.ult(delta) || (q1 == delta && r1 == 0)));
|
||||
magu.m = q2 + 1; // resulting magic number
|
||||
magu.s = p - d.getBitWidth(); // resulting shift
|
||||
return magu;
|
||||
}
|
||||
|
||||
// Magic for divide replacement
|
||||
// FIXME: This should be APInt'ified
|
||||
struct ms {
|
||||
int64_t m; // magic number
|
||||
int64_t s; // shift amount
|
||||
};
|
||||
|
||||
@ -2274,46 +2319,6 @@ static ms magic32(int32_t d) {
|
||||
return mag;
|
||||
}
|
||||
|
||||
/// magicu - calculate the magic numbers required to codegen an integer udiv as
|
||||
/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
|
||||
static mu magicu32(uint32_t d) {
|
||||
int32_t p;
|
||||
uint32_t nc, delta, q1, r1, q2, r2;
|
||||
struct mu magu;
|
||||
magu.a = 0; // initialize "add" indicator
|
||||
nc = - 1 - (-d)%d;
|
||||
p = 31; // initialize p
|
||||
q1 = 0x80000000/nc; // initialize q1 = 2p/nc
|
||||
r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc)
|
||||
q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d
|
||||
r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d)
|
||||
do {
|
||||
p = p + 1;
|
||||
if (r1 >= nc - r1 ) {
|
||||
q1 = 2*q1 + 1; // update q1
|
||||
r1 = 2*r1 - nc; // update r1
|
||||
}
|
||||
else {
|
||||
q1 = 2*q1; // update q1
|
||||
r1 = 2*r1; // update r1
|
||||
}
|
||||
if (r2 + 1 >= d - r2) {
|
||||
if (q2 >= 0x7FFFFFFF) magu.a = 1;
|
||||
q2 = 2*q2 + 1; // update q2
|
||||
r2 = 2*r2 + 1 - d; // update r2
|
||||
}
|
||||
else {
|
||||
if (q2 >= 0x80000000) magu.a = 1;
|
||||
q2 = 2*q2; // update q2
|
||||
r2 = 2*r2 + 1; // update r2
|
||||
}
|
||||
delta = d - 1 - r2;
|
||||
} while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
|
||||
magu.m = q2 + 1; // resulting magic number
|
||||
magu.s = p - 32; // resulting shift
|
||||
return magu;
|
||||
}
|
||||
|
||||
/// magic - calculate the magic numbers required to codegen an integer sdiv as
|
||||
/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
|
||||
/// or -1.
|
||||
@ -2354,47 +2359,6 @@ static ms magic64(int64_t d) {
|
||||
return mag;
|
||||
}
|
||||
|
||||
/// magicu - calculate the magic numbers required to codegen an integer udiv as
|
||||
/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
|
||||
static mu magicu64(uint64_t d)
|
||||
{
|
||||
int64_t p;
|
||||
uint64_t nc, delta, q1, r1, q2, r2;
|
||||
struct mu magu;
|
||||
magu.a = 0; // initialize "add" indicator
|
||||
nc = - 1 - (-d)%d;
|
||||
p = 63; // initialize p
|
||||
q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc
|
||||
r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc)
|
||||
q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d
|
||||
r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d)
|
||||
do {
|
||||
p = p + 1;
|
||||
if (r1 >= nc - r1 ) {
|
||||
q1 = 2*q1 + 1; // update q1
|
||||
r1 = 2*r1 - nc; // update r1
|
||||
}
|
||||
else {
|
||||
q1 = 2*q1; // update q1
|
||||
r1 = 2*r1; // update r1
|
||||
}
|
||||
if (r2 + 1 >= d - r2) {
|
||||
if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
|
||||
q2 = 2*q2 + 1; // update q2
|
||||
r2 = 2*r2 + 1 - d; // update r2
|
||||
}
|
||||
else {
|
||||
if (q2 >= 0x8000000000000000ull) magu.a = 1;
|
||||
q2 = 2*q2; // update q2
|
||||
r2 = 2*r2 + 1; // update r2
|
||||
}
|
||||
delta = d - 1 - r2;
|
||||
} while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
|
||||
magu.m = q2 + 1; // resulting magic number
|
||||
magu.s = p - 64; // resulting shift
|
||||
return magu;
|
||||
}
|
||||
|
||||
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
|
||||
/// return a DAG expression to select that will generate the same value by
|
||||
/// multiplying by a magic number. See:
|
||||
@ -2456,15 +2420,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
|
||||
SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
|
||||
std::vector<SDNode*>* Created) const {
|
||||
MVT VT = N->getValueType(0);
|
||||
|
||||
|
||||
// Check to see if we can do this.
|
||||
if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
|
||||
return SDValue(); // BuildUDIV only operates on i32 or i64
|
||||
|
||||
uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
|
||||
mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
|
||||
|
||||
// FIXME: We should be more aggressive here.
|
||||
if (!isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
||||
// FIXME: We should use a narrower constant when the upper
|
||||
// bits are known to be zero.
|
||||
ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
|
||||
mu magics = magicu(N1C->getAPIntValue());
|
||||
|
||||
// Multiply the numerator (operand 0) by the magic value
|
||||
// FIXME: We should support doing a MUL in a wider type
|
||||
SDValue Q;
|
||||
if (isOperationLegal(ISD::MULHU, VT))
|
||||
Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
|
||||
@ -2479,6 +2447,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
|
||||
Created->push_back(Q.getNode());
|
||||
|
||||
if (magics.a == 0) {
|
||||
assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
|
||||
"We shouldn't generate an undefined shift!");
|
||||
return DAG.getNode(ISD::SRL, VT, Q,
|
||||
DAG.getConstant(magics.s, getShiftAmountTy()));
|
||||
} else {
|
||||
|
10
test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
Normal file
10
test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1
|
||||
; ModuleID = '<stdin>'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
|
||||
target triple = "i686-pc-linux-gnu"
|
||||
|
||||
define zeroext i16 @a(i16 zeroext %x) nounwind {
|
||||
entry:
|
||||
%div = udiv i16 %x, 33 ; <i32> [#uses=1]
|
||||
ret i16 %div
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 | not grep mul
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep 111
|
||||
|
||||
define i8 @foo(i8 %tmp325) {
|
||||
%t546 = urem i8 %tmp325, 37
|
||||
|
Loading…
x
Reference in New Issue
Block a user