mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-19 19:31:50 +00:00
Optimize some 64-bit multiplication by constants into two lea's or one lea + shl since imulq is slow (latency 5). e.g.
x * 40 => shlq $3, %rdi leaq (%rdi,%rdi,4), %rax This has the added benefit of allowing more multiply to be folded into addressing mode. e.g. a * 24 + b => leaq (%rdi,%rdi,2), %rax leaq (%rsi,%rax,8), %rax git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67917 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e8b64106ec
commit
0b0cd9113a
include/llvm/Target
lib
test/CodeGen/X86
@ -791,9 +791,10 @@ public:
|
||||
bool isCalledByLegalizer() const { return CalledByLegalizer; }
|
||||
|
||||
void AddToWorklist(SDNode *N);
|
||||
SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To);
|
||||
SDValue CombineTo(SDNode *N, SDValue Res);
|
||||
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1);
|
||||
SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To,
|
||||
bool AddTo = true);
|
||||
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
|
||||
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
|
||||
|
||||
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
|
||||
};
|
||||
|
@ -93,14 +93,14 @@ namespace {
|
||||
}
|
||||
|
||||
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
|
||||
bool AddTo = true);
|
||||
bool AddTo = true);
|
||||
|
||||
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
|
||||
return CombineTo(N, &Res, 1, AddTo);
|
||||
}
|
||||
|
||||
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
|
||||
bool AddTo = true) {
|
||||
bool AddTo = true) {
|
||||
SDValue To[] = { Res0, Res1 };
|
||||
return CombineTo(N, To, 2, AddTo);
|
||||
}
|
||||
@ -293,19 +293,19 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
|
||||
}
|
||||
|
||||
SDValue TargetLowering::DAGCombinerInfo::
|
||||
CombineTo(SDNode *N, const std::vector<SDValue> &To) {
|
||||
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size());
|
||||
CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
|
||||
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
|
||||
}
|
||||
|
||||
SDValue TargetLowering::DAGCombinerInfo::
|
||||
CombineTo(SDNode *N, SDValue Res) {
|
||||
return ((DAGCombiner*)DC)->CombineTo(N, Res);
|
||||
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
|
||||
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
|
||||
}
|
||||
|
||||
|
||||
SDValue TargetLowering::DAGCombinerInfo::
|
||||
CombineTo(SDNode *N, SDValue Res0, SDValue Res1) {
|
||||
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1);
|
||||
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
|
||||
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
|
||||
}
|
||||
|
||||
void TargetLowering::DAGCombinerInfo::
|
||||
|
@ -826,6 +826,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setTargetDAGCombine(ISD::SRA);
|
||||
setTargetDAGCombine(ISD::SRL);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
if (Subtarget->is64Bit())
|
||||
setTargetDAGCombine(ISD::MUL);
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
@ -8407,6 +8409,74 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
|
||||
/// PerformMulCombine - Optimize a single multiply with constant into two
|
||||
/// in order to implement it with two cheaper instructions, e.g.
|
||||
/// LEA + SHL, LEA + LEA.
|
||||
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
if (DAG.getMachineFunction().
|
||||
getFunction()->hasFnAttr(Attribute::OptimizeForSize))
|
||||
return SDValue();
|
||||
|
||||
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
|
||||
return SDValue();
|
||||
|
||||
MVT VT = N->getValueType(0);
|
||||
if (VT != MVT::i64)
|
||||
return SDValue();
|
||||
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
if (!C)
|
||||
return SDValue();
|
||||
uint64_t MulAmt = C->getZExtValue();
|
||||
if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
|
||||
return SDValue();
|
||||
|
||||
uint64_t MulAmt1 = 0;
|
||||
uint64_t MulAmt2 = 0;
|
||||
if ((MulAmt % 9) == 0) {
|
||||
MulAmt1 = 9;
|
||||
MulAmt2 = MulAmt / 9;
|
||||
} else if ((MulAmt % 5) == 0) {
|
||||
MulAmt1 = 5;
|
||||
MulAmt2 = MulAmt / 5;
|
||||
} else if ((MulAmt % 3) == 0) {
|
||||
MulAmt1 = 3;
|
||||
MulAmt2 = MulAmt / 3;
|
||||
}
|
||||
if (MulAmt2 &&
|
||||
(isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
if (isPowerOf2_64(MulAmt2) &&
|
||||
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
|
||||
// If second multiplifer is pow2, issue it first. We want the multiply by
|
||||
// 3, 5, or 9 to be folded into the addressing mode unless the lone use
|
||||
// is an add.
|
||||
std::swap(MulAmt1, MulAmt2);
|
||||
|
||||
SDValue NewMul;
|
||||
if (isPowerOf2_64(MulAmt1))
|
||||
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
|
||||
DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
|
||||
else
|
||||
NewMul = DAG.getNode(ISD::MUL, DL, VT, N->getOperand(0),
|
||||
DAG.getConstant(MulAmt1, VT));
|
||||
|
||||
if (isPowerOf2_64(MulAmt2))
|
||||
NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
|
||||
DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
|
||||
else
|
||||
NewMul = DAG.getNode(ISD::MUL, DL, VT, NewMul,
|
||||
DAG.getConstant(MulAmt2, VT));
|
||||
|
||||
// Do not add new nodes to DAG combiner worklist.
|
||||
DCI.CombineTo(N, NewMul, false);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
|
||||
/// when possible.
|
||||
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
|
||||
@ -8668,6 +8738,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
|
||||
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
|
||||
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
|
||||
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
|
||||
case ISD::SHL:
|
||||
case ISD::SRA:
|
||||
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
|
||||
|
15
test/CodeGen/X86/imul-lea-2.ll
Normal file
15
test/CodeGen/X86/imul-lea-2.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 3
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 | grep shl | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 | not grep imul
|
||||
|
||||
define i64 @t1(i64 %a) nounwind readnone {
|
||||
entry:
|
||||
%0 = mul i64 %a, 81 ; <i64> [#uses=1]
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @t2(i64 %a) nounwind readnone {
|
||||
entry:
|
||||
%0 = mul i64 %a, 40 ; <i64> [#uses=1]
|
||||
ret i64 %0
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user