Enhance ComputeMaskedBits to know that aligned frameindexes

have their low bits set to zero.  This allows us to optimize
out explicit stack alignment code like in stack-align.ll:test4 when
it is redundant.

Doing this causes the code generator to start turning FI+cst into
FI|cst all over the place, which is general goodness (that is the
canonical form) except that various pieces of the code generator
don't handle OR aggressively.  Fix this by introducing a new
SelectionDAG::isBaseWithConstantOffset predicate, and using it
in places that are looking for ADD(X,CST).  The ARM backend in
particular was missing a lot of addressing mode folding opportunities
around OR.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125470 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2011-02-13 22:25:43 +00:00
parent eafbe659f8
commit 0a9481f44f
7 changed files with 105 additions and 75 deletions

View File

@ -2040,6 +2040,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
return;
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
// The low bits are known zero if the pointer is aligned.
KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
return;
}
break;
default:
// Allow the target to implement this method for its nodes.
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
@ -2279,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
/// isBaseWithConstantOffset - Return true if the specified operand is an
/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
/// semantics as an ADD. This handles the equivalence:
/// X|Cst == X+Cst iff X&~Cst = 0.
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
!isa<ConstantSDNode>(Op.getOperand(1)))
return false;
if (Op.getOpcode() == ISD::OR &&
!MaskedValueIsZero(Op.getOperand(0),
cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
return false;
return true;
}
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
if (NoNaNsFPMath)
@ -6327,11 +6355,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
if (FS != BFS || FS != (int)Bytes) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
if (V && (V->getSExtValue() == Dist*Bytes))
return true;
}
// Handle X+C
if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
return true;
const GlobalValue *GV1 = NULL;
const GlobalValue *GV2 = NULL;
@ -6372,15 +6400,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
} else if (Ptr.getOpcode() == ISD::ADD &&
isa<ConstantSDNode>(Ptr.getOperand(1)) &&
} else if (isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
// Handle FI+Cst
FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
FrameOffset = Ptr.getConstantOperandVal(1);
}
if (FrameIdx != (1 << 31)) {
// FIXME: Handle FI+CST.
const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);

View File

@ -2467,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
if (isa<GlobalAddressSDNode>(N)) {
GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
@ -2493,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
}
}
}
return false;
}

View File

@ -427,16 +427,19 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
// Match simple R + imm12 operands.
// Base only.
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index...
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
}
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
} else
Base = N;
@ -494,11 +497,13 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
}
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB)
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
// ISD::OR that is equivalent to an ISD::ADD.
!CurDAG->isBaseWithConstantOffset(N))
return false;
// Leave simple R +/- imm12 operands for LDRi12
if (N.getOpcode() == ISD::ADD) {
if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
-0x1000+1, 0x1000, RHSC)) // 12 bits.
@ -510,7 +515,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
return false;
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
unsigned ShAmt = 0;
@ -535,7 +540,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
// Try matching (R shl C) + (R).
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
if (ShOpcVal != ARM_AM::no_shift) {
@ -597,7 +602,9 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
}
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
// ISD::OR that is equivalent to an ADD.
!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@ -615,7 +622,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
// Match simple R +/- imm12 operands.
if (N.getOpcode() == ISD::ADD) {
if (N.getOpcode() != ISD::SUB) {
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
-0x1000+1, 0x1000, RHSC)) { // 12 bits.
@ -649,7 +656,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
unsigned ShAmt = 0;
@ -674,7 +681,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
// Try matching (R shl C) + (R).
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
if (ShOpcVal != ARM_AM::no_shift) {
@ -756,7 +763,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
return true;
}
if (N.getOpcode() != ISD::ADD) {
if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@ -781,7 +788,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (RHSC < 0) {
AddSub = ARM_AM::sub;
RHSC = - RHSC;
RHSC = -RHSC;
}
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
return true;
@ -815,7 +822,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
SDValue &Base, SDValue &Offset) {
if (N.getOpcode() != ISD::ADD) {
if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@ -843,7 +850,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (RHSC < 0) {
AddSub = ARM_AM::sub;
RHSC = - RHSC;
RHSC = -RHSC;
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
MVT::i32);
@ -899,8 +906,7 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
SDValue &Base, SDValue &Offset){
// FIXME dl should come from the parent load or store, not the address
if (N.getOpcode() != ISD::ADD) {
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
if (!NC || !NC->isNullValue())
return false;
@ -927,7 +933,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
return false; // We want to select tLDRpci instead.
}
if (N.getOpcode() != ISD::ADD)
if (!CurDAG->isBaseWithConstantOffset(N))
return false;
// Thumb does not have [sp, r] address mode.
@ -983,7 +989,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return false; // We want to select tLDRpci instead.
}
if (N.getOpcode() != ISD::ADD) {
if (!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@ -1053,7 +1059,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
return true;
}
if (N.getOpcode() != ISD::ADD)
if (!CurDAG->isBaseWithConstantOffset(N))
return false;
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
@ -1108,14 +1114,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
// Match simple R + imm12 operands.
// Base only.
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index...
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
} else if (N.getOpcode() == ARMISD::Wrapper &&
}
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
@ -1156,21 +1165,23 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getSExtValue();
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
!CurDAG->isBaseWithConstantOffset(N))
return false;
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getSExtValue();
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
}
}
@ -1198,7 +1209,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
if (N.getOpcode() != ISD::ADD)
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
return false;
// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.

View File

@ -685,25 +685,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
return false;
}
/// isLogicallyAddWithConstant - Return true if this node is semantically an
/// add of a value with a constantint.
static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
// Check for (add x, Cst)
if (V->getOpcode() == ISD::ADD)
return isa<ConstantSDNode>(V->getOperand(1));
// Check for (or x, Cst), where Cst & x == 0.
if (V->getOpcode() != ISD::OR ||
!isa<ConstantSDNode>(V->getOperand(1)))
return false;
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
// Check to see if the LHS & C is zero.
return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
}
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
@ -789,7 +770,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
if (CurDAG->isBaseWithConstantOffset(ShVal)) {
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@ -964,7 +945,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
if (isLogicallyAddWithConstant(N, CurDAG)) {
if (CurDAG->isBaseWithConstantOffset(N)) {
X86ISelAddressMode Backup = AM;
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
uint64_t Offset = CN->getSExtValue();

View File

@ -4201,8 +4201,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
FI = FINode->getIndex();
Offset = 0;
} else if (Ptr.getOpcode() == ISD::ADD &&
isa<ConstantSDNode>(Ptr.getOperand(1)) &&
} else if (DAG.isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
Offset = Ptr.getConstantOperandVal(1);

View File

@ -64,6 +64,6 @@ entry:
%0 = load i16* %retval ; <i16> [#uses=1]
ret i16 %0
; CHECK: mov2:
; CHECK: mov.w 2(r1), 6(r1)
; CHECK: mov.w 0(r1), 4(r1)
; CHECK: mov.w 2(r1), 6(r1)
}

View File

@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
target triple = "i686-apple-darwin8"
@G = external global double
define void @test({ double, double }* byval %z, double* %P) {
define void @test({ double, double }* byval %z, double* %P) nounwind {
entry:
%tmp3 = load double* @G, align 16 ; <double> [#uses=1]
%tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1]
@ -21,14 +21,14 @@ entry:
ret void
}
define void @test2() alignstack(16) {
define void @test2() alignstack(16) nounwind {
entry:
; CHECK: andl{{.*}}$-16, %esp
ret void
}
; Use a call to force a spill.
define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) {
define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
entry:
; CHECK: andl{{.*}}$-32, %esp
call void @test2()
@ -38,3 +38,14 @@ entry:
declare double @fabs(double)
; The pointer is already known aligned, so and x,-16 is eliminable.
define i32 @test4() nounwind {
entry:
%buffer = alloca [2048 x i8], align 16
%0 = ptrtoint [2048 x i8]* %buffer to i32
%and = and i32 %0, -16
ret i32 %and
; CHECK: test4:
; CHECK-NOT: and
; CHECK: ret
}