On x86 favors folding short immediate into some arithmetic operations (e.g. add, and, xor, etc.) because materializing an immediate in a register is expensive in turns of code size.

e.g.
movl 4(%esp), %eax
addl $4, %eax

is 2 bytes shorter than

movl $4, %eax
addl 4(%esp), %eax


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60139 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2008-11-27 00:49:46 +00:00
parent 56eca9103e
commit 884c70c912
5 changed files with 62 additions and 11 deletions

View File

@ -80,9 +80,11 @@ public:
return true;
}
/// CanBeFoldedBy - Returns true if the specific operand node N of U can be
/// folded during instruction selection that starts at Root?
virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
/// U can be folded during instruction selection that starts at Root and
/// folding N is profitable.
virtual
bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const {
return true;
}

View File

@ -141,7 +141,8 @@ namespace {
virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const;
virtual
bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
@ -280,7 +281,8 @@ static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
/// isNonImmUse - Start searching from Root up the DAG to check is Def can
/// be reached. Return true if that's the case. However, ignore direct uses
/// by ImmedUse (which would be U in the example illustrated in
/// CanBeFoldedBy) and by Root (which can happen in the store case).
/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
/// case).
/// FIXME: to be really generic, we should allow direct use by any node
/// that is being folded. But realisticly since we only fold loads which
/// have one non-chain use, we only need to watch out for load/op/store
@ -294,9 +296,42 @@ static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
}
bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
SDNode *Root) const {
if (Fast) return false;
if (U == Root)
switch (U->getOpcode()) {
default: break;
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
// If the other operand is a 8-bit immediate we should fold the immediate
// instead. This reduces code size.
// e.g.
// movl 4(%esp), %eax
// addl $4, %eax
// vs.
// movl $4, %eax
// addl 4(%esp), %eax
// The former is 2 bytes shorter. In case where the increment is 1, then
// the saving can be 4 bytes (by using incl %eax).
ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(U->getOperand(1));
if (Imm) {
if (U->getValueType(0) == MVT::i64) {
if ((int32_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
return false;
} else {
if ((int8_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
return false;
}
}
}
}
// If Root use can somehow reach N through a path that that doesn't contain
// U then folding N would create a cycle. e.g. In the following
// diagram, Root can reach N through X. If N is folded into into Root, then
@ -999,7 +1034,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
if (ISD::isNON_EXTLoad(InChain.getNode()) &&
InChain.getValue(0).hasOneUse() &&
N.hasOneUse() &&
CanBeFoldedBy(N.getNode(), Pred.getNode(), Op.getNode())) {
IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(InChain);
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
return false;
@ -1086,7 +1121,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
SDValue &Index, SDValue &Disp) {
if (ISD::isNON_EXTLoad(N.getNode()) &&
N.hasOneUse() &&
CanBeFoldedBy(N.getNode(), P.getNode(), P.getNode()))
IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
return false;
}

View File

@ -0,0 +1,14 @@
; RUN: llvm-as < %s | llc -march=x86 | grep inc
; RUN: llvm-as < %s | llc -march=x86 | grep add | grep 4
define i32 @test(i32 %X) nounwind {
entry:
%0 = add i32 %X, 1
ret i32 %0
}
define i32 @test2(i32 %X) nounwind {
entry:
%0 = add i32 %X, 4
ret i32 %0
}

View File

@ -1,4 +1,4 @@
; RUN: llvm-as < %s | llc | grep {addl.(%eax), %ecx}
; RUN: llvm-as < %s | llc | grep {addl.\$4, %ecx}
; RUN: llvm-as < %s | llc | not grep leal
; this should not sink %1 into bb1, that would increase reg pressure.

View File

@ -522,8 +522,8 @@ public:
if (NeedCheck) {
std::string ParentName(RootName.begin(), RootName.end()-1);
emitCheck("CanBeFoldedBy(" + RootName + ".getNode(), " + ParentName +
".getNode(), N.getNode())");
emitCheck("IsLegalAndProfitableToFold(" + RootName +
".getNode(), " + ParentName + ".getNode(), N.getNode())");
}
}
}