Revert "[CodeGenPrepare] Move sign/zero extensions near loads using type promotion."

This reverts commit r224351. It causes assertion failures when building
ICU.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224397 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Reid Kleckner 2014-12-17 00:29:23 +00:00
parent 74b5b195fd
commit 0c7f4e46b6
5 changed files with 48 additions and 571 deletions

View File

@ -264,11 +264,6 @@ public:
return MaskAndBranchFoldingIsLegal;
}
/// \brief Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
/// promotedInst1(...(promotedInstN(ext(load)))).
bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
/// Return true if the target can combine store(extractelement VectorTy,
/// Idx).
/// \p Cost[out] gives the cost of that transformation when this is true.
@ -1959,9 +1954,6 @@ protected:
/// a mask of a single bit, a compare, and a branch into a single instruction.
bool MaskAndBranchFoldingIsLegal;
/// \see enableExtLdPromotion.
bool EnableExtLdPromotion;
protected:
/// Return true if the value types that can be represented by the specified
/// register class are all legal.

View File

@ -91,16 +91,6 @@ static cl::opt<bool> StressStoreExtract(
"stress-cgp-store-extract", cl::Hidden, cl::init(false),
cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
static cl::opt<bool> DisableExtLdPromotion(
"disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
"CodeGenPrepare"));
static cl::opt<bool> StressExtLdPromotion(
"stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
"optimization in CodeGenPrepare"));
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
struct TypeIsSExt {
@ -109,7 +99,6 @@ struct TypeIsSExt {
TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
};
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@ -169,7 +158,7 @@ class TypePromotionTransaction;
bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *&I);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
@ -177,10 +166,6 @@ class TypePromotionTransaction;
bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
bool sinkAndCmp(Function &F);
bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInst);
bool splitBranchCondition(Function &F);
};
}
@ -1737,23 +1722,6 @@ static bool MightBeFoldableInst(Instruction *I) {
}
}
/// \brief Check whether or not \p Val is a legal instruction for \p TLI.
/// \note \p Val is assumed to be the product of some type promotion.
/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
/// to be legal, as the non-promoted value would have had the same state.
static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
Instruction *PromotedInst = dyn_cast<Instruction>(Val);
if (!PromotedInst)
return false;
int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
// If the ISDOpcode is undefined, it was undefined before the promotion.
if (!ISDOpcode)
return true;
// Otherwise, check if the promoted instruction is legal or not.
return TLI.isOperationLegalOrCustom(
ISDOpcode, TLI.getValueType(PromotedInst->getType()));
}
/// \brief Hepler class to perform type promotion.
class TypePromotionHelper {
/// \brief Utility function to check whether or not a sign or zero extension
@ -1783,59 +1751,46 @@ class TypePromotionHelper {
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
static Value *promoteOperandForTruncAndAnyExt(
Instruction *Ext, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs);
static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext,
TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInsts);
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
static Value *
promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
static Value *promoteOperandForOther(Instruction *Ext,
TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInsts, bool IsSExt);
/// \see promoteOperandForOther.
static Value *
signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
static Value *signExtendOperandForOther(Instruction *Ext,
TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInsts,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs) {
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
Truncs, true);
unsigned &CreatedInsts) {
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true);
}
/// \see promoteOperandForOther.
static Value *
zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
static Value *zeroExtendOperandForOther(Instruction *Ext,
TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInsts,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs) {
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
Truncs, false);
unsigned &CreatedInsts) {
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false);
}
public:
/// Type for the utility function that promotes the operand of Ext.
typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs);
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInsts);
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
@ -1879,8 +1834,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// Check if we can use this operand in the extension.
// If the type is larger than the result type of the extension,
// we cannot.
if (!OpndVal->getType()->isIntegerTy() ||
OpndVal->getType()->getIntegerBitWidth() >
if (OpndVal->getType()->getIntegerBitWidth() >
ConsideredExtType->getIntegerBitWidth())
return false;
@ -1946,9 +1900,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs) {
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
@ -1974,11 +1926,8 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// Check if the extension is still needed.
Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
if (ExtInst && Exts)
Exts->push_back(ExtInst);
if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType())
return ExtVal;
}
// At this point we have: ext ty opnd to ty.
// Reassign the uses of ExtInst to the opnd and remove ExtInst.
@ -1989,9 +1938,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
Value *TypePromotionHelper::promoteOperandForOther(
Instruction *Ext, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) {
// By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
@ -2006,8 +1953,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
ITrunc->removeFromParent();
// Insert it just after the definition.
ITrunc->insertAfter(ExtOpnd);
if (Truncs)
Truncs->push_back(ITrunc);
}
TPT.replaceAllUsesWith(ExtOpnd, Trunc);
@ -2068,8 +2013,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
: TPT.createZExt(Ext, Opnd, Ext->getType()));
++CreatedInsts;
}
if (Exts)
Exts->push_back(ExtForOpnd);
TPT.setOperand(ExtForOpnd, 0, Opnd);
// Move the sign extension before the insertion point.
@ -2107,7 +2051,16 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
// Check that we did not create an illegal instruction.
return isPromotedInstructionLegal(TLI, PromotedOperand);
Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand);
if (!PromotedInst)
return false;
int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
// If the ISDOpcode is undefined, it was undefined before the promotion.
if (!ISDOpcode)
return true;
// Otherwise, check if the promoted instruction is legal or not.
return TLI.isOperationLegalOrCustom(
ISDOpcode, TLI.getValueType(PromotedInst->getType()));
}
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
@ -2301,8 +2254,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
unsigned CreatedInsts = 0;
Value *PromotedOperand =
TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
@ -3001,172 +2953,17 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
/// sign extensions.
static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
assert(!Inst->use_empty() && "Input must have at least one use");
const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
bool IsSExt = isa<SExtInst>(FirstUser);
Type *ExtTy = FirstUser->getType();
for (const User *U : Inst->users()) {
const Instruction *UI = cast<Instruction>(U);
if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
return false;
Type *CurTy = UI->getType();
// Same input and output types: Same instruction after CSE.
if (CurTy == ExtTy)
continue;
// If IsSExt is true, we are in this situation:
// a = Inst
// b = sext ty1 a to ty2
// c = sext ty1 a to ty3
// Assuming ty2 is shorter than ty3, this could be turned into:
// a = Inst
// b = sext ty1 a to ty2
// c = sext ty2 b to ty3
// However, the last sext is not free.
if (IsSExt)
return false;
// This is a ZExt, maybe this is free to extend from one type to another.
// In that case, we would not account for a different use.
Type *NarrowTy;
Type *LargeTy;
if (ExtTy->getScalarType()->getIntegerBitWidth() >
CurTy->getScalarType()->getIntegerBitWidth()) {
NarrowTy = CurTy;
LargeTy = ExtTy;
} else {
NarrowTy = ExtTy;
LargeTy = CurTy;
}
if (!TLI.isZExtFree(NarrowTy, LargeTy))
return false;
}
// All uses are the same or can be derived from one another for free.
return true;
}
/// \brief Try to form ExtLd by promoting \p Exts until they reach a
/// load instruction.
/// If an ext(load) can be formed, it is returned via \p LI for the load
/// and \p Inst for the extension.
/// Otherwise LI == nullptr and Inst == nullptr.
/// When some promotion happened, \p TPT contains the proper state to
/// revert them.
///
/// \return true when promoting was necessary to expose the ext(load)
/// opportunity, false otherwise.
///
/// Example:
/// \code
/// %ld = load i32* %addr
/// %add = add nuw i32 %ld, 4
/// %zext = zext i32 %add to i64
/// \endcode
/// =>
/// \code
/// %ld = load i32* %addr
/// %zext = zext i32 %ld to i64
/// %add = add nuw i64 %zext, 4
/// \encode
/// Thanks to the promotion, we can match zext(load i32*) to i64.
bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
LoadInst *&LI, Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInsts = 0) {
// Iterate over all the extensions to see if one form an ext(load).
for (auto I : Exts) {
// Check if we directly have ext(load).
if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
Inst = I;
// No promotion happened here.
return false;
}
// Check whether or not we want to do any promotion.
if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
continue;
// Get the action to perform the promotion.
TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
I, InsertedTruncsSet, *TLI, PromotedInsts);
// Check if we can promote.
if (!TPH)
continue;
// Save the current state.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
SmallVector<Instruction *, 4> NewExts;
unsigned NewCreatedInsts = 0;
// Promote.
Value *PromotedVal =
TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
assert(PromotedVal &&
"TypePromotionHelper should have filtered out those cases");
// We would be able to merge only one extension in a load.
// Therefore, if we have more than 1 new extension we heuristically
// cut this search path, because it means we degrade the code quality.
// With exactly 2, the transformation is neutral, because we will merge
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
if (!StressExtLdPromotion &&
(TotalCreatedInsts > 1 ||
!isPromotedInstructionLegal(*TLI, PromotedVal))) {
// The promotion is not profitable, rollback to the previous state.
TPT.rollback(LastKnownGood);
continue;
}
// The promotion is profitable.
// Check if it exposes an ext(load).
(void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
// If we have created a new extension, i.e., now we have two
// extensions. We must make sure one of them is merged with
// the load, otherwise we may degrade the code quality.
(LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
// Promotion happened.
return true;
// If this does not help to expose an ext(load) then, rollback.
TPT.rollback(LastKnownGood);
}
// None of the extension can form an ext(load).
LI = nullptr;
Inst = nullptr;
return false;
}
/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
/// basic block as the load, unless conditions are unfavorable. This allows
/// SelectionDAG to fold the extend into the load.
/// \p I[in/out] the extension may be modified during the process if some
/// promotions apply.
///
bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
// Try to promote a chain of computation if it allows to form
// an extended load.
TypePromotionTransaction TPT;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
SmallVector<Instruction *, 1> Exts;
Exts.push_back(I);
bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
// Look for a load being extended.
LoadInst *LI = nullptr;
Instruction *OldExt = I;
bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
if (!LI || !I) {
assert(!HasPromoted && !LI && "If we did not match any load instruction "
"the code must remain the same");
I = OldExt;
return false;
}
LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
if (!LI) return false;
// If they're already in the same block, there's nothing to do.
// Make the cheap checks first if we did not promote.
// If we promoted, we need to check if it is indeed profitable.
if (!HasPromoted && LI->getParent() == I->getParent())
if (LI->getParent() == I->getParent())
return false;
EVT VT = TLI->getValueType(I->getType());
@ -3176,11 +2973,8 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
// isn't worthwhile.
if (!LI->hasOneUse() && TLI &&
(TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
!TLI->isTruncateFree(I->getType(), LI->getType())) {
I = OldExt;
TPT.rollback(LastKnownGood);
!TLI->isTruncateFree(I->getType(), LI->getType()))
return false;
}
// Check whether the target supports casts folded into loads.
unsigned LType;
@ -3190,15 +2984,11 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
assert(isa<SExtInst>(I) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
if (TLI && !TLI->isLoadExtLegal(LType, LoadVT)) {
I = OldExt;
TPT.rollback(LastKnownGood);
if (TLI && !TLI->isLoadExtLegal(LType, LoadVT))
return false;
}
// Move the extend into the same block as the load, so that SelectionDAG
// can fold it.
TPT.commit();
I->removeFromParent();
I->insertAfter(LI);
++NumExtsMoved;

View File

@ -714,7 +714,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
JumpIsExpensive = false;
PredictableSelectIsExpensive = false;
MaskAndBranchFoldingIsLegal = false;
EnableExtLdPromotion = false;
HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;

View File

@ -1689,7 +1689,7 @@ void X86TargetLowering::resetOperationActions() {
// Predictable cmov don't hurt on atom because it's in-order.
PredictableSelectIsExpensive = !Subtarget->isAtom();
EnableExtLdPromotion = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
verifyIntrinsicTables();

View File

@ -1,21 +1,12 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s
; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
; rdar://7304838
; CodeGenPrepare should move the zext into the block with the load
; so that SelectionDAG can select it with the load.
;
; CHECK-LABEL: foo:
; CHECK: movsbl ({{%rdi|%rcx}}), %eax
;
; OPTALL-LABEL: @foo
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; OPTALL: store i32 [[ZEXT]], i32* %q
; OPTALL: ret
define void @foo(i8* %p, i32* %q) {
entry:
%t = load i8* %p
@ -28,298 +19,3 @@ true:
false:
ret void
}
; Check that we manage to form a zextload is an operation with only one
; argument to explicitly extend is in the the way.
; OPTALL-LABEL: @promoteOneArg
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
; Make sure the operation is not promoted when the promotion pass is disabled.
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
; OPTALL: store i32 [[RES]], i32* %q
; OPTALL: ret
define void @promoteOneArg(i8* %p, i32* %q) {
entry:
%t = load i8* %p
%add = add nuw i8 %t, 2
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
%s = zext i8 %add to i32
store i32 %s, i32* %q
ret void
false:
ret void
}
; Check that we manage to form a sextload is an operation with only one
; argument to explicitly extend is in the the way.
; Version with sext.
; OPTALL-LABEL: @promoteOneArgSExt
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
; OPTALL: store i32 [[RES]], i32* %q
; OPTALL: ret
define void @promoteOneArgSExt(i8* %p, i32* %q) {
entry:
%t = load i8* %p
%add = add nsw i8 %t, 2
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
%s = sext i8 %add to i32
store i32 %s, i32* %q
ret void
false:
ret void
}
; Check that we manage to form a zextload is an operation with two
; arguments to explicitly extend is in the the way.
; Extending %add will create two extensions:
; 1. One for %b.
; 2. One for %t.
; #1 will not be removed as we do not know anything about %b.
; #2 may not be merged with the load because %t is used in a comparison.
; Since two extensions may be emitted in the end instead of one before the
; transformation, the regular heuristic does not apply the optimization.
;
; OPTALL-LABEL: @promoteTwoArgZext
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
;
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
;
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
;
; OPTALL: store i32 [[RES]], i32* %q
; OPTALL: ret
define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
entry:
%t = load i8* %p
%add = add nuw i8 %t, %b
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
%s = zext i8 %add to i32
store i32 %s, i32* %q
ret void
false:
ret void
}
; Check that we manage to form a sextload is an operation with two
; arguments to explicitly extend is in the the way.
; Version with sext.
; OPTALL-LABEL: @promoteTwoArgSExt
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
;
; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
;
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
;
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
; OPTALL: store i32 [[RES]], i32* %q
; OPTALL: ret
define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
entry:
%t = load i8* %p
%add = add nsw i8 %t, %b
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
%s = sext i8 %add to i32
store i32 %s, i32* %q
ret void
false:
ret void
}
; Check that we do not a zextload if we need to introduce more than
; one additional extension.
; OPTALL-LABEL: @promoteThreeArgZext
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
;
; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
;
; DISABLE: add nuw i8
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
;
; OPTALL: store i32 [[RES]], i32* %q
; OPTALL: ret
define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
entry:
%t = load i8* %p
%tmp = add nuw i8 %t, %b
%add = add nuw i8 %tmp, %c
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
%s = zext i8 %add to i32
store i32 %s, i32* %q
ret void
false:
ret void
}
; Check that we manage to form a zextload after promoting and merging
; two extensions.
; OPTALL-LABEL: @promoteMergeExtArgZExt
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
;
; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
;
; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
;
; OPTALL: store i32 [[RES]], i32* %q
; OPTALL: ret
define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
entry:
%t = load i8* %p
%ext = zext i8 %t to i16
%add = add nuw i16 %ext, %b
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
%s = zext i16 %add to i32
store i32 %s, i32* %q
ret void
false:
ret void
}
; Check that we manage to form a sextload after promoting and merging
; two extensions.
; Version with sext.
; OPTALL-LABEL: @promoteMergeExtArgSExt
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
;
; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
;
; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
; OPTALL: store i32 [[RES]], i32* %q
; OPTALL: ret
define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
entry:
%t = load i8* %p
%ext = zext i8 %t to i16
%add = add nsw i16 %ext, %b
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
%s = sext i16 %add to i32
store i32 %s, i32* %q
ret void
false:
ret void
}
; Check that we manage to catch all the extload opportunities that are exposed
; by the different iterations of codegen prepare.
; Moreover, check that we do not promote more than we need to.
; Here is what is happening in this test (not necessarly in this order):
; 1. We try to promote the operand of %sextadd.
; a. This creates one sext of %ld2 and one of %zextld
; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
; introduced one. This is fine with the current heuristic: neutral.
; => We have one zext of %zextld left and we created one sext of %ld2.
; 2. We try to promote the operand of %sextaddza.
; a. This creates one sext of %zexta and one of %zextld
; b. The sext of %zexta does not lead to any load, it stays here, even if it
; could have been combine with the zext of %a.
; c. The sext of %zextld leads to %ld and can be combined with it. This is
; done by promoting %zextld. This is fine with the current heuristic:
; neutral.
; => We have created a new zext of %ld and we created one sext of %zexta.
; 3. We try to promote the operand of %sextaddb.
; a. This creates one sext of %b and one of %zextld
; b. The sext of %b is a dead-end, nothing to be done.
; c. Same thing as 2.c. happens.
; => We have created a new zext of %ld and we created one sext of %b.
; 4. We try to promote the operand of the zext of %zextld introduced in #1.
; a. Same thing as 2.c. happens.
; b. %zextld does not have any other uses. It is dead coded.
; => We have created a new zext of %ld and we removed a zext of %zextld and
; a zext of %ld.
; Currently we do not try to reuse existing extensions, so in the end we have
; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
;
; OPTALL-LABEL: @severalPromotions
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %addr1
; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32* %addr2
; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
; We do not combine this one: see 2.b.
; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32
; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64
; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]]
; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]]
;
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64
; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64
;
; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
; OPTALL: ret
define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
%ld = load i8* %addr1
%zextld = zext i8 %ld to i32
%ld2 = load i32* %addr2
%add = add nsw i32 %ld2, %zextld
%sextadd = sext i32 %add to i64
%zexta = zext i8 %a to i32
%addza = add nsw i32 %zexta, %zextld
%sextaddza = sext i32 %addza to i64
%addb = add nsw i32 %b, %zextld
%sextaddb = sext i32 %addb to i64
call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
ret void
}
declare void @dummy(i64, i64, i64)