Switch the SCEV expander and LoopStrengthReduce to use

TargetTransformInfo rather than TargetLowering, removing one of the
primary instances of the layering violation of Transforms depending
directly on Target.

This is a really big deal because LSR used to be a "special" pass that
could only be tested fully using llc and by looking at the full output
of it. It also couldn't run with any other loop passes because it had to
be created by the backend. No longer is this true. LSR is now just
a normal pass and we should probably lift the creation of LSR out of
lib/CodeGen/Passes.cpp and into the PassManagerBuilder. =] I've not done
this, or updated all of the tests to use opt and a triple, because
I suspect someone more familiar with LSR would do a better job. This
change should be essentially without functional impact for normal
compilations, and only change behvaior of targetless compilations.

The conversion required changing all of the LSR code to refer to the TTI
interfaces, which fortunately are very similar to TargetLowering's
interfaces. However, it also allowed us to *always* expect to have some
implementation around. I've pushed that simplification through the pass,
and leveraged it to simplify code somewhat. It required some test
updates for one of two things: either we used to skip some checks
altogether but now we get the default "no" answer for them, or we used
to have no information about the target and now we do have some.

I've also started the process of removing AddrMode, as the TTI interface
doesn't use it any longer. In some cases this simplifies code, and in
others it adds some complexity, but I think it's not a bad tradeoff even
there. Subsequent patches will try to clean this up even further and use
other (more appropriate) abstractions.

Yet again, almost all of the formatting changes brought to you by
clang-format. =]

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171735 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2013-01-07 14:41:08 +00:00
parent 35d711aee0
commit e4ba75f43e
10 changed files with 164 additions and 186 deletions

View File

@ -22,7 +22,7 @@
#include <set>
namespace llvm {
class TargetLowering;
class TargetTransformInfo;
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are safe to speculate.
@ -129,7 +129,7 @@ namespace llvm {
/// representative. Return the number of phis eliminated.
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
SmallVectorImpl<WeakVH> &DeadInsts,
const TargetLowering *TLI = NULL);
const TargetTransformInfo *TTI = NULL);
/// expandCodeFor - Insert code to directly compute the specified SCEV
/// expression into the program. The inserted code is inserted into the

View File

@ -115,11 +115,9 @@ Pass *createLICMPass();
//===----------------------------------------------------------------------===//
//
// LoopStrengthReduce - This pass is strength reduces GEP instructions that use
// a loop's canonical induction variable as one of their indices. It takes an
// optional parameter used to consult the target machine whether certain
// transformations are profitable.
// a loop's canonical induction variable as one of their indices.
//
Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
Pass *createLoopStrengthReducePass();
Pass *createGlobalMergePass(const TargetLowering *TLI = 0);

View File

@ -16,11 +16,11 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
@ -1600,14 +1600,14 @@ static bool width_descending(Value *lhs, Value *rhs) {
/// the same context that SCEVExpander is used.
unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
SmallVectorImpl<WeakVH> &DeadInsts,
const TargetLowering *TLI) {
const TargetTransformInfo *TTI) {
// Find integer phis in order of increasing width.
SmallVector<PHINode*, 8> Phis;
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
Phis.push_back(Phi);
}
if (TLI)
if (TTI)
std::sort(Phis.begin(), Phis.end(), width_descending);
unsigned NumElim = 0;
@ -1635,8 +1635,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
if (!OrigPhiRef) {
OrigPhiRef = Phi;
if (Phi->getType()->isIntegerTy() && TLI
&& TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
if (Phi->getType()->isIntegerTy() && TTI
&& TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
// This phi can be freely truncated to the narrowest phi type. Map the
// truncated expression to it so it will be reused for narrow types.
const SCEV *TruncExpr =

View File

@ -179,7 +179,9 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) const {
return false;
// Guess that reg+reg addressing is allowed. This heuristic is taken from
// the implementation of LSR.
return !BaseGV && BaseOffset == 0 && Scale <= 1;
}
bool isTruncateFree(Type *Ty1, Type *Ty2) const {

View File

@ -362,7 +362,7 @@ void TargetPassConfig::addIRPasses() {
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
addPass(createLoopStrengthReducePass(getTargetLowering()));
addPass(createLoopStrengthReducePass());
if (PrintLSR)
addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}

View File

@ -37,8 +37,8 @@
//
// TODO: Handle multiple loops at a time.
//
// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
// instead of a GlobalValue?
// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
// of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
// smaller encoding (on x86 at least).
@ -63,6 +63,7 @@
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@ -72,7 +73,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@ -1270,46 +1270,42 @@ void LSRUse::dump() const {
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const AddrMode &AM,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
// If we have low-level target information, ask the target if it can
// completely fold this address.
if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
// Otherwise, just guess that reg+reg addressing is legal.
return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
//return ;
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
if (AM.BaseGV)
if (BaseGV)
return false;
// ICmp only has two operands; don't allow more than two non-trivial parts.
if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
if (Scale != 0 && HasBaseReg && BaseOffset != 0)
return false;
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
// putting the scaled register in the other operand of the icmp.
if (AM.Scale != 0 && AM.Scale != -1)
if (Scale != 0 && Scale != -1)
return false;
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
if (!TLI)
return false;
if (BaseOffset != 0) {
// We have one of:
// ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
// ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
// Offs is the ICmp immediate.
int64_t Offs = AM.BaseOffs;
if (AM.Scale == 0)
Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
return TLI->isLegalICmpImmediate(Offs);
if (Scale == 0)
// The cast does the right thing with INT64_MIN.
BaseOffset = -(uint64_t)BaseOffset;
return TTI.isLegalICmpImmediate(BaseOffset);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@ -1317,92 +1313,87 @@ static bool isLegalUse(const AddrMode &AM,
case LSRUse::Basic:
// Only handle single-register values.
return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
return !BaseGV && Scale == 0 && BaseOffset == 0;
case LSRUse::Special:
// Special case Basic to handle -1 scales.
return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isLegalUse(AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
int64_t Scale) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
if (isLegalUse(AM, Kind, AccessTy, TLI)) {
AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
(MaxOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
return isLegalUse(AM, Kind, AccessTy, TLI);
}
return false;
MinOffset = (uint64_t)BaseOffset + MinOffset;
if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
(MaxOffset > 0))
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
Scale) &&
isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
}
static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.AM.BaseGV,
F.AM.BaseOffs, F.AM.HasBaseReg, F.AM.Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
// Canonicalize a scale of 1 to a base register if the formula doesn't
// already have a base register.
if (!AM.HasBaseReg && AM.Scale == 1) {
AM.Scale = 0;
AM.HasBaseReg = true;
if (!HasBaseReg && Scale == 1) {
Scale = 0;
HasBaseReg = true;
}
return isLegalUse(AM, Kind, AccessTy, TLI);
return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI,
ScalarEvolution &SE) {
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
ScalarEvolution &SE, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
Type *AccessTy, const SCEV *S, bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
int64_t BaseOffs = ExtractImmediate(S, SE);
int64_t BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale);
}
namespace {
@ -1502,7 +1493,7 @@ class LSRInstance {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
const TargetLowering *const TLI;
const TargetTransformInfo &TTI;
Loop *const L;
bool Changed;
@ -1638,7 +1629,7 @@ class LSRInstance {
Pass *P);
public:
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
LSRInstance(Loop *L, Pass *P);
bool getChanged() const { return Changed; }
@ -1688,12 +1679,9 @@ void LSRInstance::OptimizeShadowIV() {
}
if (!DestTy) continue;
if (TLI) {
// If target does not support DestTy natively then do not apply
// this transformation.
EVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
}
// If target does not support DestTy natively then do not apply
// this transformation.
if (!TTI.isTypeLegal(DestTy)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
@ -2015,18 +2003,17 @@ LSRInstance::OptimizeLoopTermCond() {
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Without TLI, assume that any stride might be valid, and so any
// use might be shared.
if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
AddrMode AM;
AM.Scale = C->getSExtValue();
if (TLI->isLegalAddressingMode(AM, AccessTy))
int64_t Scale = C->getSExtValue();
if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
AM.Scale = -AM.Scale;
if (TLI->isLegalAddressingMode(AM, AccessTy))
Scale = -Scale;
if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
}
}
@ -2096,13 +2083,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
Kind, AccessTy, TLI))
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
Kind, AccessTy, TLI))
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
@ -2131,7 +2118,8 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
}
@ -2396,7 +2384,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
ScalarEvolution &SE, const TargetLowering *TLI) {
ScalarEvolution &SE, const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
@ -2654,7 +2642,7 @@ void LSRInstance::CollectChains() {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
@ -2681,7 +2669,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
Value *Operand, const TargetLowering *TLI) {
Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
@ -2690,8 +2678,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
return false;
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
LSRUse::Address, getAccessType(UserInst), TLI))
if (!isAlwaysFoldable(TTI, LSRUse::Address,
getAccessType(UserInst), /*BaseGV=*/ 0,
IncOffset, /*HaseBaseReg=*/ false))
return false;
return true;
@ -2762,7 +2751,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
TLI)) {
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
@ -3106,9 +3095,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, *J, Base.getNumRegs() > 1))
continue;
// Collect all operands except *J.
@ -3120,9 +3108,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@ -3132,10 +3119,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
if (TLI && InnerSumSC &&
if (InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
F.BaseRegs.erase(F.BaseRegs.begin() + i);
@ -3144,9 +3131,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
else
@ -3204,8 +3191,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
continue;
Formula F = Base;
F.AM.BaseGV = GV;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@ -3229,8 +3215,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
E = Worklist.end(); I != E; ++I) {
Formula F = Base;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
LU.AccessTy, F)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
@ -3249,8 +3235,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
continue;
Formula F = Base;
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@ -3297,7 +3282,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
F.AM.BaseOffs = NewBaseOffs;
// Check that this scale is legal.
if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
continue;
// Compensate for the use having MinOffset built into it.
@ -3352,13 +3337,13 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Base.AM.Scale = Factor;
Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI)) {
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
Base)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
LSRUse::Special, LU.AccessTy, TLI) &&
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
LU.AccessTy, Base) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
@ -3391,9 +3376,6 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// This requires TargetLowering to tell us which truncates are free.
if (!TLI) return;
// Don't bother truncating symbolic values.
if (Base.AM.BaseGV) return;
@ -3405,7 +3387,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@ -3560,8 +3542,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
Formula NewF = F;
NewF.AM.BaseOffs = Offs;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
NewF))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
@ -3585,10 +3567,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI)) {
if (!TLI ||
!TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, NewF)) {
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@ -3898,9 +3879,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(F.AM,
LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, F)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
@ -4589,13 +4569,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
LI(P->getAnalysis<LoopInfo>()),
TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
LSRInstance::LSRInstance(Loop *L, Pass *P)
: IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@ -4678,14 +4656,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
#ifndef NDEBUG
// Formulae should be legal.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
const LSRUse &LU = *I;
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J)
assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI) &&
"Illegal formula generated!");
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
I != E; ++I) {
const LSRUse &LU = *I;
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end();
J != JE; ++J)
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
*J) && "Illegal formula generated!");
};
#endif
@ -4757,13 +4735,9 @@ void LSRInstance::dump() const {
namespace {
class LoopStrengthReduce : public LoopPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *const TLI;
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopStrengthReduce(const TargetLowering *tli = 0);
LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
@ -4775,6 +4749,7 @@ private:
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
@ -4784,14 +4759,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
Pass *llvm::createLoopStrengthReducePass() {
return new LoopStrengthReduce();
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
: LoopPass(ID), TLI(tli) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
@ -4810,13 +4784,14 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
AU.addRequired<TargetTransformInfo>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
// Run the main LSR transformation.
Changed |= LSRInstance(TLI, L, this).getChanged();
Changed |= LSRInstance(L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
@ -4826,8 +4801,10 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.
replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
DeadInsts,
&getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);

View File

@ -5,16 +5,17 @@
; PR13361: LSR + SCEV "hangs" on reasonably sized test with sequence of loops
;
; Without limits on CollectSubexpr, we have thousands of formulae for
; the use that crosses loops. With limits we have five.
; the use that crosses loops. With limits we have six.
; CHECK: LSR on loop %bb221:
; CHECK: After generating reuse formulae:
; CHECK: LSR is examining the following uses:
; CHECK: LSR Use: Kind=Special
; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
; CHECK-NOT:reg
; CHECK: Filtering for use
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -loop-reduce -S | grep "phi double" | count 1
; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | grep "phi double" | count 1
define void @foobar(i32 %n) nounwind {
entry:

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -loop-reduce -S | FileCheck %s
; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s
;
; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
; nonzero initial value.

View File

@ -4,12 +4,12 @@
; LSR should properly handle the post-inc offset when folding the
; non-IV operand of an icmp into the IV.
; CHECK: %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
; CHECK: %5 = lshr i64 %4, 1
; CHECK: %6 = mul i64 %5, 2
; CHECK: %3 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
; CHECK: %4 = lshr i64 %3, 1
; CHECK: %5 = mul i64 %4, 2
; CHECK: br label %for.body
; CHECK: for.body:
; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %5, %for.body.lr.ph ]
; CHECK: %lsr.iv.next = add i64 %lsr.iv2, -2
; CHECK: %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
; CHECK: %cmp27 = icmp eq i16* %lsr.iv.next3, null