Fix a problem where probing for addressing modes caused expressions to be

emitted too early.  In particular, this fixes
Regression/CodeGen/X86/regpressure.ll:regpressure3.

This also improves the 2nd basic block in 164.gzip:flush_block, which went from

.LBBflush_block_1:      # loopentry.1.i
        movzx %EAX, WORD PTR [dyn_ltree + 20]
        movzx %ECX, WORD PTR [dyn_ltree + 16]
        mov DWORD PTR [%ESP + 32], %ECX
        movzx %ECX, WORD PTR [dyn_ltree + 12]
        movzx %EDX, WORD PTR [dyn_ltree + 8]
        movzx %EBX, WORD PTR [dyn_ltree + 4]
        mov DWORD PTR [%ESP + 36], %EBX
        movzx %EBX, WORD PTR [dyn_ltree]
        add DWORD PTR [%ESP + 36], %EBX
        add %EDX, DWORD PTR [%ESP + 36]
        add %ECX, %EDX
        add DWORD PTR [%ESP + 32], %ECX
        add %EAX, DWORD PTR [%ESP + 32]
        movzx %ECX, WORD PTR [dyn_ltree + 24]
        add %EAX, %ECX
        mov %ECX, 0
        mov %EDX, %ECX

to

.LBBflush_block_1:      # loopentry.1.i
        movzx %EAX, WORD PTR [dyn_ltree]
        movzx %ECX, WORD PTR [dyn_ltree + 4]
        add %ECX, %EAX
        movzx %EAX, WORD PTR [dyn_ltree + 8]
        add %EAX, %ECX
        movzx %ECX, WORD PTR [dyn_ltree + 12]
        add %ECX, %EAX
        movzx %EAX, WORD PTR [dyn_ltree + 16]
        add %EAX, %ECX
        movzx %ECX, WORD PTR [dyn_ltree + 20]
        add %ECX, %EAX
        movzx %EAX, WORD PTR [dyn_ltree + 24]
        add %ECX, %EAX
        mov %EAX, 0
        mov %EDX, %EAX

... which results in less spilling in the function.

This change alone speeds up 164.gzip from 37.23s to 36.24s on apoc.  The
default isel takes 37.31s.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@19650 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2005-01-18 01:06:26 +00:00
parent e9fe2bcac2
commit 98a8ba0393

View File

@ -300,7 +300,31 @@ LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
}
namespace {
/// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
/// SDOperand's instead of register numbers for the leaves of the matched
/// tree.
struct X86ISelAddressMode {
enum {
RegBase,
FrameIndexBase,
} BaseType;
struct { // This is really a union, discriminated by BaseType!
SDOperand Reg;
int FrameIndex;
} Base;
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
GlobalValue *GV;
X86ISelAddressMode()
: BaseType(RegBase), Scale(1), IndexReg(), Disp(), GV(0) {
}
};
}
namespace {
@ -352,7 +376,10 @@ namespace {
void EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
unsigned RTrue, unsigned RFalse, unsigned RDest);
unsigned SelectExpr(SDOperand N);
bool SelectAddress(SDOperand N, X86AddressMode &AM);
X86AddressMode SelectAddrExprs(const X86ISelAddressMode &IAM);
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM);
void SelectAddress(SDOperand N, X86AddressMode &AM);
void Select(SDOperand N);
};
}
@ -464,14 +491,62 @@ unsigned ISel::ComputeRegPressure(SDOperand O) {
return Result;
}
/// SelectAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done.
bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
X86AddressMode ISel::SelectAddrExprs(const X86ISelAddressMode &IAM) {
X86AddressMode Result;
// If we need to emit two register operands, emit the one with the highest
// register pressure first.
if (IAM.BaseType == X86ISelAddressMode::RegBase &&
IAM.Base.Reg.Val && IAM.IndexReg.Val) {
if (getRegPressure(IAM.Base.Reg) > getRegPressure(IAM.IndexReg)) {
Result.Base.Reg = SelectExpr(IAM.Base.Reg);
Result.IndexReg = SelectExpr(IAM.IndexReg);
} else {
Result.IndexReg = SelectExpr(IAM.IndexReg);
Result.Base.Reg = SelectExpr(IAM.Base.Reg);
}
} else if (IAM.BaseType == X86ISelAddressMode::RegBase && IAM.Base.Reg.Val) {
Result.Base.Reg = SelectExpr(IAM.Base.Reg);
} else if (IAM.IndexReg.Val) {
Result.IndexReg = SelectExpr(IAM.IndexReg);
}
switch (IAM.BaseType) {
case X86ISelAddressMode::RegBase:
Result.BaseType = X86AddressMode::RegBase;
break;
case X86ISelAddressMode::FrameIndexBase:
Result.BaseType = X86AddressMode::FrameIndexBase;
Result.Base.FrameIndex = IAM.Base.FrameIndex;
break;
default:
assert(0 && "Unknown base type!");
break;
}
Result.Scale = IAM.Scale;
Result.Disp = IAM.Disp;
Result.GV = IAM.GV;
return Result;
}
/// SelectAddress - Pattern match the maximal addressing mode for this node and
/// emit all of the leaf registers.
void ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
X86ISelAddressMode IAM;
MatchAddress(N, IAM);
AM = SelectAddrExprs(IAM);
}
/// MatchAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done. This just pattern matches for the
/// addressing mode, it does not cause any code to be emitted. For that, use
/// SelectAddress.
bool ISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM) {
switch (N.getOpcode()) {
default: break;
case ISD::FrameIndex:
if (AM.BaseType == X86AddressMode::RegBase && AM.Base.Reg == 0) {
AM.BaseType = X86AddressMode::FrameIndexBase;
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
}
@ -490,7 +565,7 @@ bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
// if so.
if (ExprMap.count(N)) break;
if (AM.IndexReg == 0 && AM.Scale == 1)
if (AM.IndexReg.Val == 0 && AM.Scale == 1)
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
unsigned Val = CN->getValue();
if (Val == 1 || Val == 2 || Val == 3) {
@ -502,12 +577,12 @@ bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
// constant into the disp field here.
if (ShVal.Val->getOpcode() == ISD::ADD && !ExprMap.count(ShVal) &&
isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
AM.IndexReg = SelectExpr(ShVal.Val->getOperand(0));
AM.IndexReg = ShVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
AM.Disp += AddVal->getValue() << Val;
} else {
AM.IndexReg = SelectExpr(ShVal);
AM.IndexReg = ShVal;
}
return false;
}
@ -519,26 +594,26 @@ bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
if (ExprMap.count(N)) break;
// X*[3,5,9] -> X+X*[2,4,8]
if (AM.IndexReg == 0 && AM.BaseType == X86AddressMode::RegBase &&
AM.Base.Reg == 0)
if (AM.IndexReg.Val == 0 && AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base.Reg.Val == 0)
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
AM.Scale = unsigned(CN->getValue())-1;
SDOperand MulVal = N.Val->getOperand(0);
unsigned Reg;
SDOperand Reg;
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (MulVal.Val->getOpcode() == ISD::ADD && !ExprMap.count(MulVal) &&
isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
Reg = SelectExpr(MulVal.Val->getOperand(0));
Reg = MulVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
AM.Disp += AddVal->getValue() * CN->getValue();
} else {
Reg = SelectExpr(N.Val->getOperand(0));
Reg = N.Val->getOperand(0);
}
AM.IndexReg = AM.Base.Reg = Reg;
@ -551,13 +626,13 @@ bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
// so.
if (ExprMap.count(N)) break;
X86AddressMode Backup = AM;
if (!SelectAddress(N.Val->getOperand(0), AM) &&
!SelectAddress(N.Val->getOperand(1), AM))
X86ISelAddressMode Backup = AM;
if (!MatchAddress(N.Val->getOperand(0), AM) &&
!MatchAddress(N.Val->getOperand(1), AM))
return false;
AM = Backup;
if (!SelectAddress(N.Val->getOperand(1), AM) &&
!SelectAddress(N.Val->getOperand(0), AM))
if (!MatchAddress(N.Val->getOperand(1), AM) &&
!MatchAddress(N.Val->getOperand(0), AM))
return false;
AM = Backup;
break;
@ -565,10 +640,10 @@ bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
}
// Is the base register already occupied?
if (AM.BaseType != X86AddressMode::RegBase || AM.Base.Reg) {
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
// If so, check to see if the scale index register is set.
if (AM.IndexReg == 0) {
AM.IndexReg = SelectExpr(N);
if (AM.IndexReg.Val == 0) {
AM.IndexReg = N;
AM.Scale = 1;
return false;
}
@ -578,8 +653,8 @@ bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
}
// Default, generate it as a register.
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = SelectExpr(N);
AM.BaseType = X86ISelAddressMode::RegBase;
AM.Base.Reg = N;
return false;
}
@ -1056,6 +1131,7 @@ bool ISel::isFoldableLoad(SDOperand Op, SDOperand OtherOp) {
void ISel::EmitFoldedLoad(SDOperand Op, X86AddressMode &AM) {
SDOperand Chain = Op.getOperand(0);
SDOperand Address = Op.getOperand(1);
if (getRegPressure(Chain) > getRegPressure(Address)) {
Select(Chain);
SelectAddress(Address, AM);
@ -1503,15 +1579,16 @@ unsigned ISel::SelectExpr(SDOperand N) {
// See if we can codegen this as an LEA to fold operations together.
if (N.getValueType() == MVT::i32) {
X86AddressMode AM;
if (!SelectAddress(Op0, AM) && !SelectAddress(Op1, AM)) {
X86ISelAddressMode AM;
if (!MatchAddress(Op0, AM) && !MatchAddress(Op1, AM)) {
// If this is not just an add, emit the LEA. For a simple add (like
// reg+reg or reg+imm), we just emit an add. It might be a good idea to
// leave this as LEA, then peephole it to 'ADD' after two address elim
// happens.
if (AM.Scale != 1 || AM.BaseType == X86AddressMode::FrameIndexBase ||
AM.GV || (AM.Base.Reg && AM.IndexReg && AM.Disp)) {
addFullAddress(BuildMI(BB, X86::LEA32r, 4, Result), AM);
if (AM.Scale != 1 || AM.BaseType == X86ISelAddressMode::FrameIndexBase||
AM.GV || (AM.Base.Reg.Val && AM.IndexReg.Val && AM.Disp)) {
X86AddressMode XAM = SelectAddrExprs(AM);
addFullAddress(BuildMI(BB, X86::LEA32r, 4, Result), XAM);
return Result;
}
}
@ -1646,9 +1723,9 @@ unsigned ISel::SelectExpr(SDOperand N) {
case 3:
case 5:
case 9:
X86AddressMode AM;
// Remove N from exprmap so SelectAddress doesn't get confused.
ExprMap.erase(N);
X86AddressMode AM;
SelectAddress(N, AM);
// Restore it to the map.
ExprMap[N] = Result;
@ -2436,12 +2513,12 @@ bool ISel::TryToFoldLoadOpStore(SDNode *Node) {
LoweredTokens.insert(TheLoad.getValue(1));
Select(Chain);
Select(TheLoad.getOperand(0));
X86AddressMode AM;
SelectAddress(TheLoad.getOperand(1), AM);
unsigned Reg = SelectExpr(Op1);
addFullAddress(BuildMI(BB, Opc, 4+1),AM).addReg(Reg);
addFullAddress(BuildMI(BB, Opc, 4+1), AM).addReg(Reg);
return true;
}